Changeset - 7d36a6a1f3b6
[Not reviewed]
default
0 1 0
Nathan Brink (binki) - 15 years ago 2011-02-05 11:40:48
ohnobinki@ohnopublishing.net
calvin: When crawling, add courses to the autocomplete cache even if they have no valid sections. This should reduce user confusion.
1 file changed with 26 insertions and 0 deletions:
0 comments (0 inline, 0 general)
school.d/calvin.crawl.inc
Show inline comments
 
@@ -285,56 +285,63 @@ function calvin_crawl(Semester $semester
 
	   * '01/31/2011-05/18/2011 Practicum Days to be Announced, Times to be AnnouncedTo Be Arranged, Room TBA'
 
	   *
 
	   * OR
 
	   *
 
	   * '01/31/2011-05/12/2011 Music Ensemble Monday, Wednesday, Thursday, Friday 03:30PM - 04:20PM, Covenant Fine Arts Center, Room 135'
 
	   *
 
	   * In the second case.... we'll just ignore the section. In
 
	   * the last case, we have to be careful about parsing out
 
	   * Monday.
 
	   *
 
	   * At this point, we don't parse most tokens. We group them
 
	   * off. We get the first date, the second date, the type
 
	   * ('Lecture', 'Practicum', or some other unknown value),
 
	   * the list of days of week the section meets, the start
 
	   * time, the end time, and then the meeting location.
 
	   */
 
	  if (strpos($sec_meeting_info, 'Times to be Announced') !== FALSE
 
	      || strpos($sec_meeting_info, 'Days to be Announced') !== FALSE)
 
	    {
 
	      if ($verbosity > 2)
 
		error_log('Skipping class because of incomplete meeting time information: '
 
			  . implode('-', $section_id) . ' has meeting info of `'
 
			  . $sec_meeting_info . '\'');
 
	      $skipped_sections['incomplete meeting info'] ++;
 
	      /* Still add to have less confusing autocomplete */
 
	      calvin_crawl_course_add($semester, $section_id['department'], $section_id['course']);
 
	      continue;
 
	    }
 

	
 
	  if (!preg_match(';^([0-9]{2}/[0-9]{2}/[0-9]{4})-([0-9]{2}/[0-9]{2}/[0-9]{4}) (([^ ,]+ )+)([^0-9]+) ([^ ]+) - ([^ ]+), (.*)$;', $sec_meeting_info, $meeting_info_matches))
 
	    {
 
	      error_log('Unable to parse calvin section meeting info string into start/end/days information for '
 
			. implode('-', $section_id) . ': ``' . $sec_meeting_info . '\'\'');
 
	      $skipped_sections['invalid meeting info format'] ++;
 
	      /*
 
	       * Still add at least the course to the semester so that
 
	       * it shows up in autocmoplete.
 
	       */
 
	      calvin_crawl_course_add($semester, $section_id['department'], $section_id['course']);
 
	      continue;
 
	    }
 
	  $date_start = $meeting_info_matches[1];
 
	  $date_end = $meeting_info_matches[2];
 
	  /* e.g., 'Lecture', 'Practicum' */
 
	  $meeting_type = strtolower(trim($meeting_info_matches[3]));
 

	
 
	  $days = school_crawl_days_format(explode(', ', $meeting_info_matches[5]));
 
	  $time_start = school_crawl_time_format(strptime($meeting_info_matches[6], '%I:%M%p'));
 
	  $time_end = school_crawl_time_format(strptime($meeting_info_matches[7], '%I:%M%p'));
 
	  $meeting_place = $meeting_info_matches[8];
 

	
 
	  if ($verbosity > 5)
 
	    foreach (array('date_start', 'date_end', 'meeting_type', 'days', 'time_start', 'time_end', 'meeting_place', 'meeting_type') as $var)
 
	      echo $var . ':' . ${$var} . "\n";
 

	
 
	  $section = new Section($section_id['section'], array(new SectionMeeting($days, $time_start, $time_end, $meeting_place, $meeting_type)), $synonym, $faculty_name);
 
	  $semester->section_add($section_id['department'], $section_id['course'], $section);
 
	}
 

	
 
      if (!preg_match(';Page ([0-9]+) of ([0-9]+)\</td\>$;m', $html, $pages))
 
	{
 
	  error_log('Unable to determine the number of pages in this Calvin resultset');
 
	  break;
 
@@ -407,24 +414,43 @@ function dom_id_content($domdocument, $i
 
  $node = $domdocument->getElementById($id);
 
  if ($node)
 
    {
 
      return $node->nodeValue;
 
    }
 
  return NULL;
 
}
 

	
 
/**
 
 * \brief
 
 *   Searches for and removes a <noscript/> element.
 
 *
 
 * The WebAdvisor likes to put <noscript/> in a docs <head />, which
 
 * is quite bad invalid HTML so that DOM can't handle it.
 
 *
 
 * \param $html
 
 *   The input HTML to filter.
 
 * \return
 
 *   The fixed HTML.
 
 */
 
function calvin_crawl_noscript_filter($html)
 
{
 
  return preg_replace(';\<(noscript)\>.*?\</\1\>;s', '', $html);
 
}
 

	
 
/**
 
 * \brief
 
 *   Add a course to a semester if that semester doesn't yet have this
 
 *   course.
 
 *
 
 * \param $semester
 
 *   The semester to which the course should be appended.
 
 * \param $deparmtent
 
 *   The department of the course to add.
 
 * \param $course_id
 
 *   The course_id which, with the department string, forms a
 
 *   fully-qualified course_id.
 
 */
 
function calvin_crawl_course_add(Semester $semester, $department, $course_id)
 
{
 
  if ($semester->class_get($department, $course_id) == NULL)
 
    $semester->class_add(new Course($department . '-' . $course_id));
 
}
0 comments (0 inline, 0 general)