diff --git a/school.d/calvin.crawl.inc b/school.d/calvin.crawl.inc --- a/school.d/calvin.crawl.inc +++ b/school.d/calvin.crawl.inc @@ -18,6 +18,8 @@ * along with slate_permutate. If not, see . */ +require_once dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR . 'inc' . DIRECTORY_SEPARATOR . 'school.crawl.webadvisor.inc'; + /** * \brief * Retrieve a list of crawlable semesters from Calvin College. @@ -32,73 +34,7 @@ */ function calvin_crawl_semester_list(array $school, array &$semesters, &$school_crawl_log) { - $season_map = array( - 'FA' => Semester::SEASON_FALL, - 'IN' => 'interim', - 'SP' => Semester::SEASON_SPRING, - 'MA' => 'may', - /* I don't know if SU is a valid Calvin Semester ID or not */ - 'SU' => Semester::SEASON_SUMMER); - - /** - * The first link we start at is the one from KV into WebAdvisor. - * - * 1. https://kvdata.calvin.edu/walive/WebAdvisor?type=P&pid=ST-WESTS12A&LASTTOKEN=NULL - * - * - * Calls javascript:getWindowHTML(). This merely adds - * TOKENIDX=NULL to the query string, so we can skip this step - * and just have TOKENIDX=NULL. - * - * 2. https://kvdata.calvin.edu/walive/WebAdvisor?type=P&pid=ST-WESTS12A&LASTTOKEN=NULL&TOKENIDX=NULL - * - * - * In the above, the second argument to setWindowHTML() is - * random. Thus, we have to capture this value. - */ - - $cookies = array(); - $uri = 'https://kvdata.calvin.edu/walive/WebAdvisor?type=P&pid=ST-WESTS12A&LASTTOKEN=NULL'; - $semesters_html = calvin_crawl_geturi($uri, $cookies, $school_crawl_log); - - $semesters_dom = new DOMDocument(); - $semesters_dom->loadHTML($semesters_html); - - /* - * Discover the available semesters - */ - $semesters_var1 = $semesters_dom->getElementById('VAR1'); - if (empty($semesters_var1)) - { - school_crawl_logf($school_crawl_log, 0, "Error: Unable to load list of semesters."); - return 1; - } - $semesters_select_nodes = $semesters_var1->childNodes; - foreach ($semesters_select_nodes as $semester_node) - { - if ($semester_node->tagName != 'option' - || !$semester_node->hasAttribute('value') - || !strlen($semester_node->getAttribute('value'))) - continue; - - $semester_str = $semester_node->getAttribute('value'); - - if (empty($season_map[substr($semester_str, 3)])) - { - school_crawl_logf($school_crawl_log, 6, "Warning: Unknown semester identification chars: %s. Skipping this semester.", - $semester_str); - continue; - } - $season = $season_map[substr($semester_str, 3)]; - $year_timespec = strptime(substr($semester_str, 0, 2), '%y'); - $year = $year_timespec['tm_year'] + 1900; - - $semester = new Semester($year, $season); - $semesters[$semester_str] = $semester; - } - $semester = array_reverse($semesters, TRUE); - - return 0; + return school_crawl_webadvisor_semester_list($school, $semesters, $school_crawl_log); } /** @@ -114,470 +50,7 @@ function calvin_crawl_semester_list(arra */ function calvin_crawl_semester(array $school, Semester $semester, &$school_crawl_log) { - $cookies = array(); - $uri = 'https://kvdata.calvin.edu/walive/WebAdvisor?type=P&pid=ST-WESTS12A&LASTTOKEN=NULL'; - $html = calvin_crawl_geturi($uri, $cookies, $school_crawl_log); - $form_uri = $uri; - $seed_dom = new DOMDocument(); - $seed_dom->loadHTML($html); - $return_url = dom_input_value($seed_dom, 'RETURN.URL'); - - /* - * First, read all of the friendly subject/department names. They're - * not in the output, but they're in the ``Subjects'' dropdown of - * the input form. The element and return its value attribute. - * - * \param $domdocument - * The DOMDocument to search. - * \param $name - * The name attribute of the element. - * \return - * The value attribute of the input element or NULL if not found. - */ -function dom_input_value($domdocument, $name) -{ - $xpath = new DOMXPath($domdocument); - $input_node_list = $xpath->query('/descendant::input[attribute::name="' . $name . '"]'); - - if (!$input_node_list->length) - return NULL; - $input_node = $input_node_list->item(0); - if (!$input_node->hasAttribute('value')) - return NULL; - return $input_node->getAttribute('value'); -} - -/** - * \brief - * Returns the content of an element with the given ID. - * - * A convenience function. - * - * \param $domdocument - * A DOMDocument to search. - * \param $id - * The id attribute of the element whose content are requested. - * \return - * A UTF-8 string of the contents of the given element or NULL if - * the element isn't found. - */ -function dom_id_content($domdocument, $id) -{ - $node = $domdocument->getElementById($id); - if ($node) - { - return $node->nodeValue; - } - return NULL; -} - -/** - * \brief - * Searches for and removes a