# HG changeset patch # User Nathan Phillip Brink # Date 2012-08-30 03:50:20 # Node ID b5c4b3fbe0601cef40185b9bf24320be2d395f8c # Parent d7b53f34caaf5966416c4bb14db1f5e5bb26309e Add support for Dordt College. diff --git a/inc/school.crawl.inc b/inc/school.crawl.inc --- a/inc/school.crawl.inc +++ b/inc/school.crawl.inc @@ -673,6 +673,34 @@ function school_crawl_table_rownodes(DOM /** * \brief + * Resolve an index into a table row, supporting colspan fuzzyish + * indexing. + * + * \param $tr_node + * The row to index into. + * \param $i + * The zero-based index to index as. + * \return + * The DOM Node at that index. + */ +function school_crawl_table_rownode_index(DOMNodeList $rownodes, $i) +{ + foreach ($rownodes as $data_node) + { + $colspan = 1; + if ($data_node->hasAttribute('colspan')) + $colspan = $data_node->getAttribute('colspan'); + /** \todo check validity of colspan */ + + $i -= $colspan; + if ($i < 0) + return $data_node; + } + return NULL; +} + +/** + * \brief * Detect if a point in a buffer is at a newline. * * \internal diff --git a/school.d/dordt.crawl.inc b/school.d/dordt.crawl.inc new file mode 100644 --- /dev/null +++ b/school.d/dordt.crawl.inc @@ -0,0 +1,215 @@ + + * + * This file is a part of slate_permutate. + * + * slate_permutate is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * slate_permutate is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with slate_permutate. If not, see . + */ + +function dordt_crawl_semester_list(array $school, array &$semesters, &$school_crawl_log) +{ + $cookies = array(); + $uri = $school['url'] . 'academics/course_schedules/'; + $semesters_html = school_crawl_geturi($uri, $cookies, $school_crawl_log); + $semesters_dom = new DOMDocument(); + $semesters_dom->loadHTML($semesters_html); + $semesters_xpath = new DOMXPath($semesters_dom); + $seasons_map = array( + 'S' => 'spring', + 'F' => 'fall', + ); + foreach ($semesters_xpath->query('//a[contains(@href, "course_list.pl?")]') as $a_node) + { + $q = array(); + list(, $href_querystring) = explode('?', $href = $a_node->getAttribute('href')); + foreach (explode('&', $href_querystring) as $href_query_namevalue) + { + list($name, $value) = explode('=', $href_query_namevalue, 2); + $q[$name] = $value; + } + + if (empty($q['year']) + || empty($q['sem'])) + { + school_crawl_logf($school_crawl_log, 2, "Unable to parse URI's GET arguments into year and sem parts: %s", $href); + continue; + } + if (empty($seasons_map[$q['sem']])) + { + school_crawl_logf($school_crawl_log, 2, "Unable to parse season `%s' into season for `%s': %s", + $q['sem'], $a_node->textContent, $href); + continue; + } + + $semesters[] = new Semester($q['year'], $seasons_map[$q['sem']]); + } + + return 0; +} + +function dordt_crawl_semester(array $school, Semester $semester, &$school_crawl_log) +{ + $seasons_map = array( + 'spring' => 'S', + 'fall' => 'F', + ); + if (empty($seasons_map[$semester->season_get()])) + { + school_crawl_logf($school_crawl_log, 2, "Unable to understand season %s.", $semester->season_get()); + return 1; + } + + $cookies = array(); + $uri = $school['url'] . 'academics/course_schedules/'; + $semesters_html = school_crawl_geturi($uri, $cookies, $school_crawl_log); + $semesters_dom = new DOMDocument(); + $semesters_dom->loadHTML($semesters_html); + $semesters_xpath = new DOMXPath($semesters_dom); + $semester_href = NULL; + foreach ($semesters_xpath->query('//a[contains(@href, "course_list.pl?") and contains(@href, "sem=' . $seasons_map[$semester->season_get()] . '") and contains(@href, "year=' . $semester->year_get() . '")]') as $a_node) + $semester_href = $a_node->getAttribute('href'); + if ($semester_href === NULL) + { + school_crawl_logf($school_crawl_log, 4, "Unable to find link associated with schedule's semester"); + return 1; + } + + $uri = school_crawl_url($uri, $semester_href); + $semester_html = school_crawl_geturi($uri, $cookies, $school_crawl_log); + $semester_dom = new DOMDocument(); + $semester_dom->loadHTML($semester_html); + $semester_xpath = new DOMXPath($semester_dom); + + foreach ($semester_xpath->query('//ul[contains(concat(" ", normalize-space(@class), " "), " columns ")]//a[contains(@href, "#")]') + as $a_node) + + { + /*
  • Academic Skills
  • */ + list(, $hash) = explode('#', $a_node->getAttribute('href')); + $semester->department_name_set($hash, $a_node->textContent); + } + + $labs = array(); + foreach ($semester_xpath->query('//table[contains(concat(" ", normalize-space(@class), " "), " schedule ")]') as $table_schedule) + { + $head_tr = NULL; + foreach ($semester_xpath->query('.//thead//tr', $table_schedule) as $head_tr) + break; + if ($head_tr === NULL) + { + school_crawl_logf($school_crawl_log, 4, "Unable to find headings for some table."); + continue; + } + + $column_mapping = array( + 'DEPT' => FALSE, + 'NUM' => FALSE, + 'SEC' => FALSE, /* section */ + 'TITLE' => FALSE, + 'CR' => FALSE, /* credits */ + 'RM' => FALSE, /* room */ + 'INSTRUCTOR' => FALSE, + ); + $willy = FALSE; + foreach ($column_mapping as $name => $false) + if (($column_mapping[$name] = school_crawl_table_resolve_column($head_tr, $name)) === FALSE) + { + school_crawl_logf($school_crawl_log, 4, "Unable to resolve column %s.", $name); + $willy = TRUE; + break; + } + if ($willy) + continue; + + /* + * + * AGRI + * 111L + * 01 + * Agri 111 Lab + * 0.00 + * De Vries, G + * + * + * + * 2:00 PM - 5:00 PM, W + * SB 138 + * + */ + foreach ($semester_xpath->query('.//tbody//tr', $table_schedule) as $tr_row) + { + $rownodes = school_crawl_table_rownodes($tr_row); + $new_dept = school_crawl_table_rownode_index($rownodes, $column_mapping['DEPT'])->textContent; + + if (!empty($new_dept)) + { + $course = school_crawl_table_rownode_index($rownodes, $column_mapping['NUM'])->textContent; + $section = school_crawl_table_rownode_index($rownodes, $column_mapping['SEC'])->textContent; + $title = school_crawl_table_rownode_index($rownodes, $column_mapping['TITLE'])->textContent; + $credits = school_crawl_table_rownode_index($rownodes, $column_mapping['CR'])->textContent; + $instructor = school_crawl_table_rownode_index($rownodes, $column_mapping['INSTRUCTOR'])->textContent; + $dept = $new_dept; + } + else + { + if (empty($dept)) + { + school_crawl_logf($school_crawl_log, 6, "Unexpected empty DEPT column"); + continue; + } + + $room = school_crawl_table_rownode_index($rownodes, $column_mapping['RM'])->textContent; + $timestuff = school_crawl_table_rownode_index($rownodes, $column_mapping['TITLE'])->textContent; + /* + * 2:00 PM - 5:00 PM, W + * 1:00 PM - 1:50 PM, M/W/F + * 2:00 PM - 5:00 PM, T + * 1:50 PM - 4:50 PM, Th + * 8:00 AM - 9:15 AM, T/Th + */ + if (!preg_match('/(\\d*:\\d* .M) - (\\d*:\\d* .M), (.*)/', $timestuff, $matches)) + { + school_crawl_logf($school_crawl_log, 7, "Unable to parse timestuff: %s", + $timestuff); + continue; + } + $time_start = school_crawl_time_format(strptime($matches[1], '%l:%M %p')); + $time_end = school_crawl_time_format(strptime($matches[2], '%l:%M %p')); + $days = school_crawl_days_format($school_crawl_log, explode('/', $matches[3])); + + school_crawl_logf($school_crawl_log, 1, "Adding %s", $dept . '-' . $course . '-' . $section); + $semester->section_add($dept, $course, new Section($section, array(new SectionMeeting($days, $time_start, $time_end, $room, 'lecture', $instructor)), NULL, $credits), $title); + + /* Save labs for dependency generation later */ + if (preg_match('/L$/', $course)) + { + $labs += array($dept => array()); + $labs[$dept][$course] = TRUE; + } + } + } + } + + /* Bind lab dependencies */ + foreach ($labs as $dept => $courses) + { + foreach ($courses as $course => $true) + { + $course_base = $semester->class_get($dept, substr($course, 0, strlen($course) - 1)); + if (!empty($course_base)) + $course_base->dependency_add($semester->class_get($dept, $course)); + } + } +} diff --git a/school.d/dordt.inc b/school.d/dordt.inc new file mode 100644 --- /dev/null +++ b/school.d/dordt.inc @@ -0,0 +1,29 @@ + + * + * This file is a part of slate_permutate. + * + * slate_permutate is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * slate_permutate is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with slate_permutate. If not, see . + */ + +function dordt_info() +{ + return array( + 'name' => 'Dordt College', + 'example_course_id' => 'ENGL-101', + 'registration_url' => 'http://dordt.edu/services_support/registrar/', + 'student_address' => 'Dork', + ); +}