Changeset - 5e29bc7ac9ff
[Not reviewed]
default
0 1 0
Nathan Brink (binki) - 13 years ago 2012-09-30 19:14:34
ohnobinki@ohnopublishing.net
Fill in more of the schedule search form for Calvin to fix an error that prevented crawling.
1 file changed with 20 insertions and 0 deletions:
0 comments (0 inline, 0 general)
school.d/calvin.crawl.inc
Show inline comments
 
@@ -96,60 +96,70 @@ function calvin_crawl_semester_list(arra
 
      $semester = new Semester($year, $season);
 
      $semesters[$semester_str] = $semester;
 
    }
 
  $semester = array_reverse($semesters, TRUE);
 

	
 
  return 0;
 
}
 

	
 
/**
 
 * \brief
 
 *   Crawl the courses for a semester from Calvin College.
 
 *
 
 * \param $school
 
 *   The calvin school handle.
 
 * \param $semester
 
 *   The Semester object to populate with courses.
 
 * \param $school_crawl_log
 
 *   The logger handle.
 
 */
 
function calvin_crawl_semester(array $school, Semester $semester, &$school_crawl_log)
 
{
 
  $cookies = array();
 
  $uri = 'https://kvdata.calvin.edu/walive/WebAdvisor?type=P&pid=ST-WESTS12A&LASTTOKEN=NULL';
 
  $html = calvin_crawl_geturi($uri, $cookies, $school_crawl_log);
 
  $form_uri = $uri;
 
  $seed_dom = new DOMDocument();
 
  $seed_dom->loadHTML($html);
 
  $return_url = dom_input_value($seed_dom, 'RETURN.URL');
 

	
 
  /*
 
   * First, read all of the friendly subject/department names. They're
 
   * not in the output, but they're in the ``Subjects'' dropdown of
 
   * the input form. The <select name="LIST.VAR1_1" id="LIST_VAR1_1"/>
 
   * is associated with subjects/departments.
 
   */
 
  $department_var1_list = array();
 
  foreach (school_crawl_form_select_array($seed_dom->getElementById('LIST_VAR1_1')) as $department_id => $department_name)
 
    {
 
    $semester->department_name_set($department_id, trim(reset($department_name)));
 
      $department_var1_list[] = $department_id;
 
    }
 

	
 
  while (count($department_var1_list))
 
    {
 
      /* Start back on the form page... */
 
      $uri = $form_uri;
 

	
 
  /*
 
   * LIST.VAR<X>_<N>: <X> is the column, <N> is the row. There
 
   * are apparently a max of 5 rows (see the LIST.VAR<X>_MAX
 
   * below).
 
   *
 
   * Columns:
 
   * LIST.VAR1: department
 
   * LIST.VAR2: course_level
 
   * LIST.VAR3: IIRC, a course identifier, such as 156 from MATH-156
 
   * LIST.VAR4: I forget
 
   *
 
   */
 
  $semester_str = sprintf("%02d/%s", $semester->year_get() % 100, strtoupper(substr($semester->season, 0, 2)));
 
  school_crawl_logf($school_crawl_log, 6, 'Using %s for a semester string.',
 
		    $semester_str);
 
  $form = array('VAR1' => $semester_str,
 
		'LIST.VAR1_1' => '',
 
		'LIST.VAR2_1' => '',
 

	
 
		/*
 
		 * Other form items we're not querying but which need
 
		 * to be sent blankly.
 
		 */
 
@@ -162,48 +172,57 @@ function calvin_crawl_semester(array $sc
 
		  */
 
		 /*'SUBMIT2' => 'SUBMIT',*/
 

	
 
		 'DATE.VAR1' => '',
 
		 'DATE.VAR2' => '',
 

	
 
		 'LIST.VAR1_CONTROLLER' => 'LIST.VAR1',
 
		 'LIST.VAR1_MEMBERS' => 'LIST.VAR1*LIST.VAR2*LIST.VAR3*LIST.VAR4',
 
		);
 
  foreach (array('1', '2', '3', '4') as $list_col)
 
    {
 
      $colname = 'LIST.VAR' . $list_col;
 
      if (!isset($form[$colname . '_MAX']))
 
	$form[$colname . '_MAX'] = '5';
 

	
 
      foreach (array('1', '2', '3', '4', '5') as $list_row)
 
	{
 
	  $rowname = $colname . '_' . $list_row;
 
	  if (!isset($form[$rowname]))
 
	    $form[$rowname] = '';
 
	}
 
    }
 

	
 
  /*
 
   * Fill in some semesters.
 
   */
 
  foreach (array('1', '2', '3', '4', '5') as $var1_row)
 
    if (count($department_var1_list))
 
      {
 
	$form['LIST.VAR1_' . $var1_row] = array_shift($department_var1_list);
 
      }
 

	
 
  /*
 
   * VAR7 and VAR 8 is a constraint of times during which
 
   * courses meet
 
   */
 
  $form['VAR7'] = '';
 
  $form['VAR8'] = '';
 

	
 
  /* ``course title keywords'' */
 
  $form['VAR3'] = '';
 

	
 
  /* ? */
 
  $form['VAR6'] = '';
 
  $form['VAR21'] = '';
 

	
 
  /* instructor's last name */
 
  $form['VAR9'] = '';
 

	
 
  /*
 
   * VAR10 through VAR16 are Monday through Sunday checkboxes
 
   * for days of the week that classes meet.
 
   *
 
   * But we specify no days of the week to avoid this being a
 
   * constraint ;-).
 
   */
 
  /*
 
@@ -398,48 +417,49 @@ function calvin_crawl_semester(array $sc
 
	    {
 
	      $date_start_time = school_crawl_gmmktime($date_start_time, -5 * 60*60);
 
	      $semester->time_start_pool_add($date_start_time);
 
	    }
 
	  if ($date_end_time !== FALSE)
 
	    {
 
	      $date_end_time = school_crawl_gmmktime($date_end_time, -5 * 60*60);
 
	      $semester->time_end_pool_add($date_end_time);
 
	    }
 
	}
 
	}
 

	
 
      if (!preg_match(';Page ([0-9]+) of ([0-9]+)\</td\>$;m', $html, $pages))
 
	{
 
	  school_crawl_logf($school_crawl_log, 0, 'Unable to determine the number of pages in this Calvin resultset');
 
	  break;
 
	}
 

	
 
      school_crawl_logf($school_crawl_log, 8, "calvin_crawl(): finished page %d of %d with %d courses.", $pages[1], $pages[2], $list_row - 1);
 

	
 
      $form = array(
 
		    'ACTION*Grp:WSS.COURSE.SECTIONS' => 'NEXT',
 
		    );
 
    }
 
    }
 

	
 
  $has_stat = FALSE;
 
  foreach ($skipped_sections as $reason => $num)
 
    {
 
      if (!$num)
 
	continue;
 
      if (!$has_stat)
 
	school_crawl_logf($school_crawl_log, 7, 'Skipped some sections for <reason>: <number skipped>:');
 
      school_crawl_logf($school_crawl_log, 7, "%s: %d", $reason, $num);
 
    }
 

	
 
  /*
 
   * Calculate lab-based course dependencies.
 
   */
 
  school_crawl_logf($school_crawl_log, 7, 'Adding implicit lab dependencies.');
 
  foreach ($semester->departments_get() as $department)
 
    foreach ($semester->department_classes_get($department) as $course)
 
    {
 
      $the_course = $semester->class_get($department, $course);
 
      $lab_course = $semester->class_get($department, $course . 'L');
 
      if (!empty($lab_course))
 
	{
 
	  $the_course->dependency_add($lab_course);
 
	  school_crawl_logf($school_crawl_log, 8, "Adding dependency of %s-%s for %s-%s.",
0 comments (0 inline, 0 general)