diff --git a/school.d/cedarville.inc b/school.d/cedarville.inc
--- a/school.d/cedarville.inc
+++ b/school.d/cedarville.inc
@@ -63,241 +63,3 @@ function cedarville_default_classes()
return array($chapel);
}
-
-
-/**
- * \brief
- * Parse given html into an array, first row is row headers
- *
- * \param $html
- * HTML that PHP's DOM would willingly would eat.
- */
-function table_parse($html)
-{
- libxml_use_internal_errors(true); // Suppress warnings
- $arr = array();
- $dom = new DOMDocument;
- if(!$html)
- return NULL;
-
- $dom->loadHTML($html);
- $dom->preserveWhiteSpace = FALSE;
- $tables = $dom->getElementsByTagName('table');
- $rows = $tables->item(0)->getElementsByTagName('tr'); // Get first table on page
- foreach ($rows as $rownum => $row) {
- $cols = $row->getElementsByTagName('td');
- foreach($cols as $colnum => $col){
- $arr[$rownum][$colnum] = $col->nodeValue;
- }
- }
- return $arr;
-}
-
-/** Crawls Cedarville course listings. $season is "fa" or "sp", year is 4-digit year */
-function cedarville_crawl($semester, $verbosity = 1)
-{
-
- $season = strtolower(substr($semester->season_get(), 0, 2));
- $year = $semester->year_get();
- $season_string = $year . $season;
-
- $basepath = 'http://cedarville.edu/courses/schedule/';
-
- if ($verbosity)
- echo "cedarville_crawl(): Beginning crawl of Cedarville:\n";
-
- if ($verbosity > 1)
- echo "cedarville_crawl(): Determining list of departments.\n";
- /*
- * We need two passes because the first department's code name is
- * not accessible available in the first pageload.
- */
- $departments = array();
- if (cedarville_crawl_departments_get($basepath . $year . $season . '_index.htm', $departments, $season_string))
- return 1;
- if (!count($departments))
- {
- echo "cedarville_crawl(): Unable to get a listing of departments.\n";
- return 1;
- }
- /* find the first department whose name we don't yet know */
- if (cedarville_crawl_departments_get($basepath . $year . $season . '_' . current(array_keys($departments)) . '_all.htm', $departments, $season_string))
- return 1;
-
- $tables = array();
- foreach ($departments as $department => $dept_name)
- {
- echo 'cedarville_crawl(): Crawling department ' . $department . ' (' . $dept_name . ")...\n";
- $html = file_get_contents($basepath . $year . $season . '_' . $department . '_' . 'all.htm');
- if (!$html)
- continue;
- $tables[$department] = table_parse(cedarville_html_fix($html));
- }
-
- $meeting_type_maps = array('LAB' => 'lab', 'LECT' => 'lecture');
-
- foreach ($tables as $dept_table)
- {
- /*
- * Discard the first row, which has the contents of the
|
- * elements.
- */
- unset($dept_table[0]);
-
- foreach($dept_table as $course_table)
- {
- /*
- * format:
- * 0: course synonym, an unsigned integer.
- * 1: section spec, parsable by Section::parse().
- * 2: friendly course title.
- * 3: Instructor name.
- * 4: Number of credit hours in decimal notation.
- * 5: Fee.
- * 6: Meeting time, explained below.
- * 7: Cap.
- * 8-10: Textbook link. Most rows only have column 8, not
- * all the way through 10. This information seems
- * quite useless.
- *
- * Section meeting time/place format:
- *
- * Confusing example: ' ILB WI219 TR 08:30A-09:45A'
- * Complete example plus lab: ' LEC TYL203 MWF 08:00A-08:50A LAB ENS118 TR 03:00P-04:30P'
- *
- * Appears to have format:
- * : -
- *
- * It appears tht may be:
- * LEC: normal lecture meeting.
- * ONL: online course.
- * ILB: ethan says a partially online course...?
- * HYB: hybrid of...?
- * FLD: field...?
- * FE2: ?
- * CLN: ?
- * LAB: Lab
- * LES: something for some PFMU/PLMU class?
- */
-
- $synonym = $course_table[0];
- $section_parts = Section::parse($course_table[1]);
- if (count($section_parts) < 3)
- {
- error_log('Error parsing section_id. Given `' . $course_table[1] . '\', interpreted as `'
- . implode('-', $section_parts) . '\'. Skipping.');
- continue;
- }
-
- $instructor = $course_table[3];
-
- /*
- * Each course may have multiple meeting times associated
- * with it at Cedarville. We are not sure how to handle this
- * quite, because different class sections may be tied with
- * different lab meetings and stuff...
- */
- $meetings_str = $course_table[6];
- if (strpos($meetings_str, 'TBA') !== FALSE)
- {
- if ($verbosity > 1)
- error_log('Skipping ' . implode('-', $section_parts) . ' because its meeting time info has `TBA\' in it.');
- continue;
- }
- $meetings = array();
- $meeting_multiple_types = array();
- while (strlen($meetings_str) > 5)
- {
- if (!preg_match(';^ ([A-Z]+) +([A-Z]+[A-Z0-9]*) +([MTWRF]{1,5}) +([0-9:AP]+)-([0-9:AP]+);',
- $meetings_str, $meeting_matches))
- {
- if (preg_match(';^Dates:[^0-9]+([/0-9]{8})-([/0-9]{8});',
- $meetings_str, $meeting_matches))
- {
- if ($verbosity > 4)
- error_log('Skipping some meeting data for '
- . implode('-', $section_parts) . ' because it is a date range: `'
- . $meeting_matches[0] . '\'');
- $meetings_str = substr($meetings_str, strlen($meeting_matches[0]));
- continue;
- }
-
- if ($verbosity > 0)
- error_log('Error parsing meeting time. Given `' . $meetings_str . '\'. Skipping '
- . implode('-', $section_parts));
- break;
- }
- /* prepare for parsing the next meeting time */
- $meetings_str = substr($meetings_str, strlen($meeting_matches[0]));
-
- $days = school_crawl_days_str_format($meeting_matches[3]);
- $time_start = school_crawl_time_format(strptime($meeting_matches[4] . 'M', '%I:%M%p'));
- $time_end = school_crawl_time_format(strptime($meeting_matches[5] . 'M', '%I:%M%p'));
- $room = $meeting_matches[2];
-
- $type = $meeting_matches[1];
- while (isset($meeting_type_maps[$type]))
- $type = $meeting_type_maps[$type];
- $type = strtolower($type);
-
- $meetings[] = new SectionMeeting($days, $time_start, $time_end,
- $room, $type);
- }
-
- $semester->section_add($section_parts['department'], $section_parts['course'],
- new Section($section_parts['section'], $meetings,
- $synonym, $instructor));
- }
- }
-
- return 0;
-}
-
-/**
- * \brief
- * Scan cedarville's course listing pages for departments.
- *
- * \return
- * An associative array mapping department codes onto department
- * friendly names.
- */
-function cedarville_crawl_departments_get($dept_url, array &$departments, $season_string)
-{
- $html = file_get_contents($dept_url);
- $dept_dom = new DOMDocument();
- if (!$dept_dom->loadHTML(cedarville_html_fix($html)))
- {
- echo "cedarville_crawl(): Error determining list of available departments: Unable to parse HTML.\n";
- return 1;
- }
- $xpath = new DOMXPath($dept_dom);
-
- $dept_node_list = $xpath->query('/descendant::div[@id="contenttext"]/child::span[position()=1 or position()=2]/child::a');
- foreach ($dept_node_list as $dept_node)
- {
- $href = $dept_node->getAttribute('href');
- if (!preg_match('/^' . preg_quote($season_string, '/') . '_([a-z]+)_[a-z]+\.htm$/', $href, $matches))
- {
- echo 'cedarvillege_crawl(): Error determining list of available departments: Unable to parse the department string out of href="' . $href . "\".\n";
- return 1;
- }
-
- $dept = $matches[1];
- $departments[$dept] = $dept_node->textContent;
- }
-
- return 0;
-}
-
-/**
- * \brief
- * Fix some incorrect usage of the HTML entity delimiter, the ampersand.
- */
-function cedarville_html_fix($html)
-{
- $html = preg_replace('/&&/', '&&', $html);
- $html = preg_replace('/&([^;]{5})/', '&$1', $html);
- $html = preg_replace('/ID="(LINKS|HERE)"/', '', $html);
-
- return $html;
-}