Changeset - d6fbd191da57
[Not reviewed]
default
0 1 0
Nathan Brink (binki) - 13 years ago 2012-04-26 01:33:07
ohnobinki@ohnopublishing.net
Remove unnecessary CONSTITUENCY GET parameter from the Calvin College crawler.
1 file changed with 6 insertions and 6 deletions:
0 comments (0 inline, 0 general)
school.d/calvin.crawl.inc
Show inline comments
 
@@ -34,40 +34,40 @@ function calvin_crawl_semester_list(arra
 
{
 
  $season_map = array(
 
		      'FA' => Semester::SEASON_FALL,
 
		      'IN' => 'interim',
 
		      'SP' => Semester::SEASON_SPRING,
 
		      'MA' => 'may',
 
		      /* I don't know if SU is a valid Calvin Semester ID or not */
 
		      'SU' => Semester::SEASON_SUMMER);
 

	
 
  /**
 
   * The first link we start at is the one from KV into WebAdvisor.
 
   *
 
   * 1. https://kvdata.calvin.edu/walive/WebAdvisor?CONSTITUENCY=WBST&type=P&pid=ST-WESTS12A&LASTTOKEN=NULL
 
   * 1. https://kvdata.calvin.edu/walive/WebAdvisor?type=P&pid=ST-WESTS12A&LASTTOKEN=NULL
 
   *    <body onload="javascript:getWindowHTML();">
 
   *
 
   *    Calls javascript:getWindowHTML(). This merely adds
 
   *    TOKENIDX=NULL to the query string, so we can skip this step
 
   *    and just have TOKENIDX=NULL.
 
   *
 
   * 2. https://kvdata.calvin.edu/walive/WebAdvisor?CONSTITUENCY=WBST&type=P&pid=ST-WESTS12A&LASTTOKEN=NULL&TOKENIDX=NULL
 
   * 2. https://kvdata.calvin.edu/walive/WebAdvisor?type=P&pid=ST-WESTS12A&LASTTOKEN=NULL&TOKENIDX=NULL
 
   *    <body onload="javascript:setWindowHTML('', '7699844013');">
 
   *
 
   *    In the above, the second argument to setWindowHTML() is
 
   *    random. Thus, we have to capture this value.
 
   */
 

	
 
  $cookies = array();
 
  $uri = 'https://kvdata.calvin.edu/walive/WebAdvisor?CONSTITUENCY=WBST&type=P&pid=ST-WESTS12A&LASTTOKEN=NULL';
 
  $uri = 'https://kvdata.calvin.edu/walive/WebAdvisor?type=P&pid=ST-WESTS12A&LASTTOKEN=NULL';
 
  $semesters_html = calvin_crawl_geturi($uri, $cookies, $school_crawl_log);
 

	
 
  $semesters_dom = new DOMDocument();
 
  $semesters_dom->loadHTML($semesters_html);
 

	
 
  /*
 
   * Discover the available semesters
 
   */
 
  $semesters_var1 = $semesters_dom->getElementById('VAR1');
 
  if (empty($semesters_var1))
 
    {
 
      school_crawl_logf($school_crawl_log, 0, "Error: Unable to load list of semesters.");
 
@@ -106,25 +106,25 @@ function calvin_crawl_semester_list(arra
 
 *   Crawl the courses for a semester from Calvin College.
 
 *
 
 * \param $school
 
 *   The calvin school handle.
 
 * \param $semester
 
 *   The Semester object to populate with courses.
 
 * \param $school_crawl_log
 
 *   The logger handle.
 
 */
 
function calvin_crawl_semester(array $school, Semester $semester, &$school_crawl_log)
 
{
 
  $cookies = array();
 
  $uri = 'https://kvdata.calvin.edu/walive/WebAdvisor?CONSTITUENCY=WBST&type=P&pid=ST-WESTS12A&LASTTOKEN=NULL';
 
  $uri = 'https://kvdata.calvin.edu/walive/WebAdvisor?type=P&pid=ST-WESTS12A&LASTTOKEN=NULL';
 
  $html = calvin_crawl_geturi($uri, $cookies, $school_crawl_log);
 
  $seed_dom = new DOMDocument();
 
  $seed_dom->loadHTML($html);
 
  $return_url = dom_input_value($seed_dom, 'RETURN.URL');
 

	
 
  /*
 
   * First, read all of the friendly subject/department names. They're
 
   * not in the output, but they're in the ``Subjects'' dropdown of
 
   * the input form. The <select name="LIST.VAR1_1" id="LIST_VAR1_1"/>
 
   * is associated with subjects/departments.
 
   */
 
  foreach (school_crawl_form_select_array($seed_dom->getElementById('LIST_VAR1_1')) as $department_id => $department_name)
 
@@ -550,27 +550,27 @@ function calvin_crawl_geturi(&$uri, arra
 

	
 
  school_crawl_logf($school_crawl_log, 7, "Using WebAdvisor token: %s.", $token);
 
  school_crawl_logf($school_crawl_log, 7, "");
 

	
 
  /*
 
   * setWindowHTML() will first remove the query string parameters
 
   * 'CLONE' and 'FORCEIDX'. Then it appends TOKENIDX=<token> to the
 
   * query parameters.
 
   *
 
   * Example, where TOKENIDX does not start out as NULL but where a
 
   * CLONE=Y command is being sent:
 
   *
 
   * Input: HTTPS://kvdata.calvin.edu/walive/WebAdvisor?TYPE=P&PID=ST-WESTS13C&CLONE=Y&CLONE_PROCESS=Y&SPAUKQ=708501792841963&CONSTITUENCY=WBST&TOKENIDX=1507971558
 
   * Input: HTTPS://kvdata.calvin.edu/walive/WebAdvisor?TYPE=P&PID=ST-WESTS13C&CLONE=Y&CLONE_PROCESS=Y&SPAUKQ=708501792841963&TOKENIDX=1507971558
 
   *
 
   * Result: HTTPS://kvdata.calvin.edu/walive/WebAdvisor?TYPE=P&PID=ST-WESTS13C&CLONE_PROCESS=Y&SPAUKQ=708501792841963&CONSTITUENCY=WBST&TOKENIDX=2281086932
 
   * Result: HTTPS://kvdata.calvin.edu/walive/WebAdvisor?TYPE=P&PID=ST-WESTS13C&CLONE_PROCESS=Y&SPAUKQ=708501792841963&TOKENIDX=2281086932
 
   */
 
  $uri = preg_replace('/([?&])TOKENIDX=[^&]+/', '$1TOKENIDX=' . $token,
 
		      preg_replace('/([?&])(CLONE|FORCEIDX)=[^&]+&?/', '$1', $uri));
 

	
 
  return calvin_crawl_noscript_filter(school_crawl_geturi($uri, $cookies, $school_crawl_log));
 
}
 

	
 
/**
 
 * \brief
 
 *   Add a course to a semester if that semester doesn't yet have this
 
 *   course.
 
 *
0 comments (0 inline, 0 general)