Changeset - 5e29bc7ac9ff
[Not reviewed]
default
0 1 0
Nathan Brink (binki) - 13 years ago 2012-09-30 19:14:34
ohnobinki@ohnopublishing.net
Fill in more of the schedule search form for Calvin to fix an error that prevented crawling.
1 file changed with 20 insertions and 0 deletions:
0 comments (0 inline, 0 general)
school.d/calvin.crawl.inc
Show inline comments
 
@@ -108,36 +108,46 @@ function calvin_crawl_semester_list(arra
 
 * \param $school
 
 *   The calvin school handle.
 
 * \param $semester
 
 *   The Semester object to populate with courses.
 
 * \param $school_crawl_log
 
 *   The logger handle.
 
 */
 
function calvin_crawl_semester(array $school, Semester $semester, &$school_crawl_log)
 
{
 
  $cookies = array();
 
  $uri = 'https://kvdata.calvin.edu/walive/WebAdvisor?type=P&pid=ST-WESTS12A&LASTTOKEN=NULL';
 
  $html = calvin_crawl_geturi($uri, $cookies, $school_crawl_log);
 
  $form_uri = $uri;
 
  $seed_dom = new DOMDocument();
 
  $seed_dom->loadHTML($html);
 
  $return_url = dom_input_value($seed_dom, 'RETURN.URL');
 

	
 
  /*
 
   * First, read all of the friendly subject/department names. They're
 
   * not in the output, but they're in the ``Subjects'' dropdown of
 
   * the input form. The <select name="LIST.VAR1_1" id="LIST_VAR1_1"/>
 
   * is associated with subjects/departments.
 
   */
 
  $department_var1_list = array();
 
  foreach (school_crawl_form_select_array($seed_dom->getElementById('LIST_VAR1_1')) as $department_id => $department_name)
 
    {
 
    $semester->department_name_set($department_id, trim(reset($department_name)));
 
      $department_var1_list[] = $department_id;
 
    }
 

	
 
  while (count($department_var1_list))
 
    {
 
      /* Start back on the form page... */
 
      $uri = $form_uri;
 

	
 
  /*
 
   * LIST.VAR<X>_<N>: <X> is the column, <N> is the row. There
 
   * are apparently a max of 5 rows (see the LIST.VAR<X>_MAX
 
   * below).
 
   *
 
   * Columns:
 
   * LIST.VAR1: department
 
   * LIST.VAR2: course_level
 
   * LIST.VAR3: IIRC, a course identifier, such as 156 from MATH-156
 
   * LIST.VAR4: I forget
 
   *
 
@@ -174,24 +184,33 @@ function calvin_crawl_semester(array $sc
 
      if (!isset($form[$colname . '_MAX']))
 
	$form[$colname . '_MAX'] = '5';
 

	
 
      foreach (array('1', '2', '3', '4', '5') as $list_row)
 
	{
 
	  $rowname = $colname . '_' . $list_row;
 
	  if (!isset($form[$rowname]))
 
	    $form[$rowname] = '';
 
	}
 
    }
 

	
 
  /*
 
   * Fill in some semesters.
 
   */
 
  foreach (array('1', '2', '3', '4', '5') as $var1_row)
 
    if (count($department_var1_list))
 
      {
 
	$form['LIST.VAR1_' . $var1_row] = array_shift($department_var1_list);
 
      }
 

	
 
  /*
 
   * VAR7 and VAR 8 is a constraint of times during which
 
   * courses meet
 
   */
 
  $form['VAR7'] = '';
 
  $form['VAR8'] = '';
 

	
 
  /* ``course title keywords'' */
 
  $form['VAR3'] = '';
 

	
 
  /* ? */
 
  $form['VAR6'] = '';
 
  $form['VAR21'] = '';
 
@@ -410,24 +429,25 @@ function calvin_crawl_semester(array $sc
 
      if (!preg_match(';Page ([0-9]+) of ([0-9]+)\</td\>$;m', $html, $pages))
 
	{
 
	  school_crawl_logf($school_crawl_log, 0, 'Unable to determine the number of pages in this Calvin resultset');
 
	  break;
 
	}
 

	
 
      school_crawl_logf($school_crawl_log, 8, "calvin_crawl(): finished page %d of %d with %d courses.", $pages[1], $pages[2], $list_row - 1);
 

	
 
      $form = array(
 
		    'ACTION*Grp:WSS.COURSE.SECTIONS' => 'NEXT',
 
		    );
 
    }
 
    }
 

	
 
  $has_stat = FALSE;
 
  foreach ($skipped_sections as $reason => $num)
 
    {
 
      if (!$num)
 
	continue;
 
      if (!$has_stat)
 
	school_crawl_logf($school_crawl_log, 7, 'Skipped some sections for <reason>: <number skipped>:');
 
      school_crawl_logf($school_crawl_log, 7, "%s: %d", $reason, $num);
 
    }
 

	
 
  /*
0 comments (0 inline, 0 general)