* * This file is a part of slate_permutate. * * slate_permutate is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * slate_permutate is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with slate_permutate. If not, see . */ /** * \brief * Retrieve a list of crawlable semesters from Calvin College. * * \param $school * The calvin school handle. * \param $semesters * The array to populate with empty Semester objects. * \param $school_crawl_log * A school_crawl_log handle for informing the user/developer of * progress. */ function calvin_crawl_semester_list(array $school, array &$semesters, &$school_crawl_log) { $season_map = array( 'FA' => Semester::SEASON_FALL, 'IN' => 'interim', 'SP' => Semester::SEASON_SPRING, 'MA' => 'may', /* I don't know if SU is a valid Calvin Semester ID or not */ 'SU' => Semester::SEASON_SUMMER); /** * The first link we start at is the one from KV into WebAdvisor. * * 1. https://kvdata.calvin.edu/walive/WebAdvisor?type=P&pid=ST-WESTS12A&LASTTOKEN=NULL * * * Calls javascript:getWindowHTML(). This merely adds * TOKENIDX=NULL to the query string, so we can skip this step * and just have TOKENIDX=NULL. * * 2. https://kvdata.calvin.edu/walive/WebAdvisor?type=P&pid=ST-WESTS12A&LASTTOKEN=NULL&TOKENIDX=NULL * * * In the above, the second argument to setWindowHTML() is * random. Thus, we have to capture this value. */ $cookies = array(); $uri = 'https://kvdata.calvin.edu/walive/WebAdvisor?type=P&pid=ST-WESTS12A&LASTTOKEN=NULL'; $semesters_html = calvin_crawl_geturi($uri, $cookies, $school_crawl_log); $semesters_dom = new DOMDocument(); $semesters_dom->loadHTML($semesters_html); /* * Discover the available semesters */ $semesters_var1 = $semesters_dom->getElementById('VAR1'); if (empty($semesters_var1)) { school_crawl_logf($school_crawl_log, 0, "Error: Unable to load list of semesters."); return 1; } $semesters_select_nodes = $semesters_var1->childNodes; foreach ($semesters_select_nodes as $semester_node) { if ($semester_node->tagName != 'option' || !$semester_node->hasAttribute('value') || !strlen($semester_node->getAttribute('value'))) continue; $semester_str = $semester_node->getAttribute('value'); if (empty($season_map[substr($semester_str, 3)])) { school_crawl_logf($school_crawl_log, 6, "Warning: Unknown semester identification chars: %s. Skipping this semester.", $semester_str); continue; } $season = $season_map[substr($semester_str, 3)]; $year_timespec = strptime(substr($semester_str, 0, 2), '%y'); $year = $year_timespec['tm_year'] + 1900; $semester = new Semester($year, $season); $semesters[$semester_str] = $semester; } $semester = array_reverse($semesters, TRUE); return 0; } /** * \brief * Crawl the courses for a semester from Calvin College. * * \param $school * The calvin school handle. * \param $semester * The Semester object to populate with courses. * \param $school_crawl_log * The logger handle. */ function calvin_crawl_semester(array $school, Semester $semester, &$school_crawl_log) { $cookies = array(); $uri = 'https://kvdata.calvin.edu/walive/WebAdvisor?type=P&pid=ST-WESTS12A&LASTTOKEN=NULL'; $html = calvin_crawl_geturi($uri, $cookies, $school_crawl_log); $seed_dom = new DOMDocument(); $seed_dom->loadHTML($html); $return_url = dom_input_value($seed_dom, 'RETURN.URL'); /* * First, read all of the friendly subject/department names. They're * not in the output, but they're in the ``Subjects'' dropdown of * the input form. The element and return its value attribute. * * \param $domdocument * The DOMDocument to search. * \param $name * The name attribute of the element. * \return * The value attribute of the input element or NULL if not found. */ function dom_input_value($domdocument, $name) { $xpath = new DOMXPath($domdocument); $input_node_list = $xpath->query('/descendant::input[attribute::name="' . $name . '"]'); if (!$input_node_list->length) return NULL; $input_node = $input_node_list->item(0); if (!$input_node->hasAttribute('value')) return NULL; return $input_node->getAttribute('value'); } /** * \brief * Returns the content of an element with the given ID. * * A convenience function. * * \param $domdocument * A DOMDocument to search. * \param $id * The id attribute of the element whose content are requested. * \return * A UTF-8 string of the contents of the given element or NULL if * the element isn't found. */ function dom_id_content($domdocument, $id) { $node = $domdocument->getElementById($id); if ($node) { return $node->nodeValue; } return NULL; } /** * \brief * Searches for and removes a