Changeset - 2a7a5fca904a
[Not reviewed]
default
0 1 0
Nathan Brink (binki) - 15 years ago 2011-03-17 18:18:27
ohnobinki@ohnopublishing.net
cedarville: When crawling, detect a semester's start and end dates based on certain course meetings which have date information.
1 file changed with 24 insertions and 8 deletions:
0 comments (0 inline, 0 general)
school.d/cedarville.crawl.inc
Show inline comments
 
@@ -83,24 +83,26 @@ function cedarville_crawl(array &$semest
 
      $semester_href = $department_a_dom->getAttribute('href');
 
      $semester_href_parts = split('_', $semester_href);
 

	
 
      $semester_name = $department_a_dom->textContent;
 
      if (stripos($semester_name, 'graduate') !== FALSE
 
	  || strpos($semester_href, 'index') === FALSE)
 
	/* cedarville has about 1 graduate course, lol */
 
	continue;
 
      $semester_name_parts = split(' ', $semester_name);
 

	
 
      $semester_year = $semester_name_parts[0];
 
      $semester_season = strtolower($semester_name_parts[1]);
 
      $semester_min_date_start = 0;
 
      $semester_max_date_end = 0;
 

	
 
      $semester = new Semester($semester_year, $semester_season);
 

	
 
      school_crawl_logf($school_crawl_log, 6, "Crawling semester: %s.",
 
			$semester_name);
 

	
 
  /*
 
   * We need two passes because the first department's code name is
 
   * not accessible available in the first pageload.
 
   */
 
  $departments = array();
 
  if (cedarville_crawl_departments_get($basepath . $semester_href, $departments, $semester_href_parts[0], $school_crawl_log))
 
@@ -194,57 +196,71 @@ function cedarville_crawl(array &$semest
 
	   * different lab meetings and stuff...
 
	   */
 
	  $meetings_str = $course_table[6];
 
	  if (strpos($meetings_str, 'TBA') !== FALSE)
 
	    {
 
	      school_crawl_logf($school_crawl_log, 8, "Skipping %s because its meeting time info has `TBA' in it.", implode('-', $section_parts));
 
	      continue;
 
	    }
 
	  $meetings = array();
 
	  $meeting_multiple_types = array();
 
	  while (strlen($meetings_str) > 5)
 
	    {
 
	      if (!preg_match(';^ ([A-Z]+) +([A-Z]+[A-Z0-9]*) +([MTWRF]{1,5}) +([0-9:AP]+)-([0-9:AP]+);',
 
			      $meetings_str, $meeting_matches))
 
	      $meeting_start_regex = ';^';
 
	      $meeting_base_regex = ' ([A-Z]+) +([A-Z]+[A-Z0-9]*) +([MTWRF]{1,5}) +([0-9:AP]+)-([0-9:AP]+)';
 
	      $meeting_date_regex = 'Dates:[^0-9]+([/0-9]{8})-([/0-9]{8})';
 
	      $meeting_end_regex = ';';
 
	      if (!preg_match($meeting_start_regex . $meeting_base_regex . $meeting_date_regex . $meeting_end_regex,
 
			      $meetings_str, $meeting_matches)
 
		  && !preg_match($meeting_start_regex . $meeting_base_regex . $meeting_end_regex,
 
				 $meetings_str, $meeting_matches))
 
		{
 
		  if (preg_match(';^Dates:[^0-9]+([/0-9]{8})-([/0-9]{8});',
 
		  if (preg_match($meeting_start_regex . $meeting_date_regex . $meeting_end_regex,
 
				 $meetings_str, $meeting_matches))
 
		    {
 
		      /**
 
		       * \todo
 
		       *   This is a perfect place to get Semester's
 
		       *   time_start and time_end values.
 
		       */
 

	
 
		      school_crawl_logf($school_crawl_log, 8, "Skipping some meeting data for %s because it is a date range: `%s'.",
 
					implode('-', $section_parts), $meeting_matches[0]);
 
		      $meetings_str = substr($meetings_str, strlen($meeting_matches[0]));
 
		      continue;
 
		    }
 

	
 
		  school_crawl_logf($school_crawl_log, 6, "Error parsing meeting time. Given `%s'. Skipping %s.", $meetings_str, implode('-', $section_parts));
 
		  break;
 
		}
 
	      /* prepare for parsing the next meeting time */
 
	      $meetings_str = substr($meetings_str, strlen($meeting_matches[0]));
 

	
 
	      $days = school_crawl_days_str_format($meeting_matches[3]);
 
	      $time_start = school_crawl_time_format(strptime($meeting_matches[4] . 'M', '%I:%M%p'));
 
	      $time_end = school_crawl_time_format(strptime($meeting_matches[5] . 'M', '%I:%M%p'));
 
	      $room = $meeting_matches[2];
 

	
 
	      $type = $meeting_matches[1];
 
	      while (isset($meeting_type_maps[$type]))
 
		$type = $meeting_type_maps[$type];
 
	      $type = strtolower($type);
 

	
 
	      /* check for daterange information -- i.e., if the first regex successfully matched: */
 
	      if (count($meeting_matches) > 7)
 
		{
 
		  $date_start = school_crawl_mktime(strptime($meeting_matches[6], '%m/%d/%y'));
 
		  $date_end = school_crawl_mktime(strptime($meeting_matches[7], '%m/%d/%y'));
 
		  if (!empty($date_start) && !empty($date_end))
 
		    {
 
		      $semester->time_start_set_test($date_start);
 
		      $semester->time_end_set_test($date_end);
 
		    }
 
		}
 

	
 
	      $meetings[] = new SectionMeeting($days, $time_start, $time_end,
 
					       $room, $type);
 
	    }
 

	
 
	  $semester->section_add($section_parts['department'], $section_parts['course'],
 
				 new Section($section_parts['section'], $meetings,
 
					     $synonym, $instructor));
 
	}
 
    }
 

	
 
  $semesters[] = $semester;
 
    }
0 comments (0 inline, 0 general)