Changeset - b63a5ae7d0a0
[Not reviewed]
default
0 1 0
Nathan Brink (binki) - 14 years ago 2012-02-16 00:22:02
ohnobinki@ohnopublishing.net
Add support for friendly department/subject names to calvin and fix parsing the course title.
1 file changed with 10 insertions and 1 deletions:
0 comments (0 inline, 0 general)
school.d/calvin.crawl.inc
Show inline comments
 
@@ -101,48 +101,57 @@ function calvin_crawl_semester_list(arra
 
  return 0;
 
}
 

	
 
/**
 
 * \brief
 
 *   Crawl the courses for a semester from Calvin College.
 
 *
 
 * \param $school
 
 *   The calvin school handle.
 
 * \param $semester
 
 *   The Semester object to populate with courses.
 
 * \param $school_crawl_log
 
 *   The logger handle.
 
 */
 
function calvin_crawl_semester(array $school, Semester $semester, &$school_crawl_log)
 
{
 
  $cookies = array();
 
  $uri = 'https://kvdata.calvin.edu/walive/WebAdvisor?CONSTITUENCY=WBST&type=P&pid=ST-WESTS12A&LASTTOKEN=NULL';
 
  $html = calvin_crawl_geturi($uri, $cookies, $school_crawl_log);
 
  $seed_dom = new DOMDocument();
 
  $seed_dom->loadHTML($html);
 
  $return_url = dom_input_value($seed_dom, 'RETURN.URL');
 

	
 
  /*
 
   * First, read all of the friendly subject/department names. They're
 
   * not in the output, but they're in the ``Subjects'' dropdown of
 
   * the input form. The <select name="LIST.VAR1_1" id="LIST_VAR1_1"/>
 
   * is associated with subjects/departments.
 
   */
 
  foreach (school_crawl_form_select_array($seed_dom->getElementById('LIST_VAR1_1')) as $department_id => $department_name)
 
    $semester->department_name_set($department_id, trim(reset($department_name)));
 

	
 
  /*
 
   * LIST.VAR<X>_<N>: <X> is the column, <N> is the row. There
 
   * are apparently a max of 5 rows (see the LIST.VAR<X>_MAX
 
   * below).
 
   *
 
   * Columns:
 
   * LIST.VAR1: department
 
   * LIST.VAR2: course_level
 
   * LIST.VAR3: IIRC, a course identifier, such as 156 from MATH-156
 
   * LIST.VAR4: I forget
 
   *
 
   */
 
  $semester_str = sprintf("%02d/%s", $semester->year_get() % 100, strtoupper(substr($semester->season, 0, 2)));
 
  school_crawl_logf($school_crawl_log, 6, 'Using %s for a semester string.',
 
		    $semester_str);
 
  $form = array('VAR1' => $semester_str,
 
		'LIST.VAR1_1' => '',
 
		'LIST.VAR2_1' => '',
 

	
 
		/*
 
		 * Other form items we're not querying but which need
 
		 * to be sent blankly.
 
		 */
 
		 'RETURN.URL' => $return_url,
 
		 'SUBMIT_OPTIONS' => '',
 
@@ -227,49 +236,49 @@ function calvin_crawl_semester(array $sc
 

	
 
	  /* check if we're done with this particular page */
 
	  if (!strlen($openness) && !strlen($sec_short_title) && !strlen($sec_meetings_info))
 
	    {
 
	      $list_done = TRUE;
 
	      break;
 
	    }
 

	
 
	  /*
 
	   * The same info below should be retrievable with 
 
	   * dom_id_content($results_dom, 'SEC_FACULTY_INFO_' . $list_row);
 
	   */
 
	  $faculty_name = dom_input_value($results_dom, 'SEC.FACULTY.INFO_' . $list_row);
 
	  $credits = dom_input_value($results_dom, 'SEC.MIN.CRED_' . $list_row); /* or id="SEC_FACULTY_INFO_$list_row" */
 
	  $comment = dom_id_content($results_dom, 'SEC_COMMENTS_' . $list_row); /* or name="SEC.COMMENTS_$list_row" */
 
	  $short_title_onclick = $results_dom->getElementById('SEC_SHORT_TITLE_' . $list_row)->getAttribute('onclick');
 

	
 
	  /* parse */
 
	  $section_id = Section::parse($sec_short_title);
 
	  $synonym = NULL;
 
	  $title = NULL;
 
	  if (preg_match(';\(([0-9]+)\)(.*);', $sec_short_title, $matches))
 
	    {
 
	      $synonym = $matches[1];
 
	      $title = $matches[2];
 
	      $title = trim($matches[2]);
 
	    }
 

	
 
	  school_crawl_logf($school_crawl_log, 10, "");
 
	  school_crawl_logf($school_crawl_log, 10, implode('-', $section_id) . ': ' . $sec_short_title);
 
	  school_crawl_logf($school_crawl_log, 10, $openness);
 
	  school_crawl_logf($school_crawl_log, 10, $sec_meetings_info);
 
	  school_crawl_logf($school_crawl_log, 10, $faculty_name);
 
	  school_crawl_logf($school_crawl_log, 10, $credits);
 
	  school_crawl_logf($school_crawl_log, 10, $comment);
 
	  school_crawl_logf($school_crawl_log, 10, "synonym: %s", $synonym);
 
	  school_crawl_logf($school_crawl_log, 10, "title: %s", $title);
 

	
 
	  /*
 
	   * The input format for this is, thankfully, pretty rigid
 
	   * :-D. Example input format:
 
	   *
 
	   * '01/31/2011-05/11/2011 Lecture Monday, Wednesday 01:00PM - 03:50PM, Spoelhof Center, Room 101'
 
	   *
 
	   * OR
 
	   *
 
	   * '01/31/2011-05/18/2011 Practicum Days to be Announced, Times to be AnnouncedTo Be Arranged, Room TBA'
 
	   *
 
	   * OR
 
	   *
0 comments (0 inline, 0 general)