Changeset - 3c2d5e03082f
[Not reviewed]
default
0 1 0
Nathan Brink (binki) - 14 years ago 2012-02-08 01:25:34
ohnobinki@ohnopublishing.net
Fix ccbcmd's support for multiple section meetings and for crawling the professor associated with sections.
1 file changed with 45 insertions and 29 deletions:
0 comments (0 inline, 0 general)
school.d/ccbcmd.crawl.inc
Show inline comments
 
<?php /* -*- mode: php; -*- */
 
/*
 
 * Copyright 2011 Nathan Phillip Brink <ohnobinki@ohnopublishing.net>
 
 * Copyright 2012 Nathan Phillip Brink <ohnobinki@ohnopublishing.net>
 
 *
 
 * This file is a part of slate_permutate.
 
 *
 
 * slate_permutate is free software: you can redistribute it and/or modify
 
 * it under the terms of the GNU Affero General Public License as published by
 
 * the Free Software Foundation, either version 3 of the License, or
 
@@ -213,37 +213,52 @@ function ccbcmd_crawl_semester($school, 
 
      /*
 
       * There are some rows with the time set to TBA and with empty
 
       * section_id columns. Respond to this by skipping empty
 
       * section_id columns since there's no useful data in these
 
       * rows. We use strlen() < 3 because trim() doesn't take care of
 
       * &nbsp; :-/
 
       *
 
       * There are other times that the section_id row is empty and
 
       * the time column is set to something. In this case, the
 
       * subsequent rows are describing additional SectionMeetings
 
       * which should be added to the existing Section.
 
       */
 
      $section_id = trim($children->item($section_offsets['section_id'])->textContent);
 
      if (strlen($section_id) < 3)
 
	continue;
 
      if (strlen($section_id) > 2)
 
	{
 
	  /**
 
	   * \todo
 
	   *   If a section's section ID ends in `W', like `EFW', that
 
	   *   means it's a semi-online course. We should probably
 
	   *   distinguish these from normal sections, probably
 
	   *   disabling them from showing up by default.
 
	   */
 
	  $section_id_parts = Section::parse($section_id);
 
	  $registration_number = $children->item($section_offsets['registration_number'])->textContent;
 
	  $credit_hours = (float)$children->item($section_offsets['credits'])->textContent;
 
	  $section = new Section($section_id_parts['section'], array(), $registration_number, $credit_hours);
 
	  $semester->section_add($section_id_parts['department'], $section_id_parts['course'], $section,
 
				 trim($children->item($section_offsets['title'])->textContent));
 
	}
 
      if (empty($section))
 
	{
 
	  school_crawl_logf($school_crawl_log, 4, "Expected a section row beofre having a row with only partial data. Ignoring row.");
 
	  continue;
 
	}
 

	
 
      $section_id_parts = Section::parse($section_id);
 

	
 
      $registration_number = $children->item($section_offsets['registration_number'])->textContent;
 
      $instructor = $children->item($section_offsets['instructor'])->textContent;
 

	
 
      $section_meetings = array();
 
      {
 
	$time_range_text = $children->item($section_offsets['times'])->textContent;
 
	if (strpos($time_range_text, 'TBA') !== FALSE)
 
	  {
 
	    /*
 
	     * Add the section to the autocomplete list, just without
 
	     * any meeting info (i.e., $section_meetings is still
 
	     * empty now).
 
	     */
 
	    $semester->section_add($section_id_parts['department'], $section_id_parts['course'],
 
				   new Section($section_id_parts['section'], $section_meetings, $registration_number));
 
	    continue;
 
	  /*
 
	   * There is no way to get meeting info and create
 
	   * SectionMeetings.
 
	   */
 
	  continue;
 

	
 
	  }
 
	if (($dash_pos = strpos($time_range_text, '-')) === FALSE)
 
	  {
 
	    school_crawl_logf($school_crawl_log, 0, "Unable to understand course's time range format, cannot find dash: ``%s''.",
 
		    $time_range_text);
 
	    return 1;
 
	  }
 
@@ -251,47 +266,48 @@ function ccbcmd_crawl_semester($school, 
 
	$time_start_text = substr($time_range_text, 0, $dash_pos);
 
	$time_start = strptime($time_start_text, '%I:%M %p');
 
	$time_end_text = substr($time_range_text, $dash_pos + 1);
 
	/*
 
	 * Make sure that _only_ one date range is specified to ensure
 
	 * data integrity. I.e., make sure that the college doesn't
 
	 * suddenly support multiple meeting times without our
 
	 * anticipating that and then cause us to have invalid
 
	 * data. ;-). --binki
 
	 * suddenly support multiple meeting times in one field
 
	 * without our anticipating that and then cause us to have
 
	 * invalid data. ;-). The college does support multiple
 
	 * section meetings, it does this by having multiple rows per
 
	 * section. The extra rows _only_ have the days, time, prof,
 
	 * and dates columns. --binki
 
	 */
 
	if (strpos($time_end_text, '-') !== FALSE)
 
	  {
 
	    school_crawl_logf($school_crawl_log, 0, "College seems to support multiple meeting times per semester which we don't know how to parse (even though slate_permutate itself can handle this situation): ``%s'' time_end_text: ``%s''.",
 
	    school_crawl_logf($school_crawl_log, 4, "Entry seems to have invalid date column data: ``%s'' time_end_text: ``%s''.",
 
		    $time_range_text, $time_end_text);
 
	    return 1;
 
	    continue;
 
	  }
 
	$time_end = strptime($time_end_text, '%I:%M %p');
 
	if ($time_end === FALSE || $time_start === FALSE)
 
	  {
 
	    school_crawl_logf($school_crawl_log, 0, "Error parsing start or end time: start: ``%s'' end: ``%s''.",
 
	    school_crawl_logf($school_crawl_log, 4, "Error parsing start or end time: start: ``%s'' end: ``%s''.",
 
		    $time_start_text, $time_end_text);
 
	    return 1;
 
	    continue;
 
	  }
 

	
 
	$days = school_crawl_days_str_format($school_crawl_log, $children->item($section_offsets['days'])->textContent);
 

	
 
	$section_meetings[] = new SectionMeeting($days, school_crawl_time_format($time_start), school_crawl_time_format($time_end),
 
	$section->meeting_add(new SectionMeeting($days, school_crawl_time_format($time_start), school_crawl_time_format($time_end),
 
						 $children->item($section_offsets['location'])->textContent,
 
						 $instructor);
 
						 'lecture',
 
						 $instructor));
 

	
 
	/* check if a semester's date range should be increased */
 
	$section_dates = $children->item($section_offsets['dates'])->textContent;
 
	if (preg_match(';^([0-9]+)/([0-9]+)-([0-9]+)/([0-9]+)$;', $section_dates, $section_dates_matches))
 
	  {
 
	    $semester->time_start_set_test(gmmktime(0, 0, 0, $section_dates_matches[1], $section_dates_matches[2], $semester->year_get()));
 
	    $semester->time_end_set_test(gmmktime(0, 0, 0, $section_dates_matches[3], $section_dates_matches[4], $semester->year_get()));
 
	  }
 
      }
 

	
 
      $semester->section_add($section_id_parts['department'], $section_id_parts['course'],
 
			     new Section($section_id_parts['section'], $section_meetings, $registration_number));
 
    }
 

	
 
  return 0;
 
}
 

	
 
function ccbcmd_crawl_curlhook(&$curl)
0 comments (0 inline, 0 general)