Changeset - b0ade6be0d80
[Not reviewed]
default
0 1 0
Nathan Brink (binki) - 15 years ago 2011-03-25 11:14:20
ohnobinki@ohnopublishing.net
Fix confusion between course_id and course synonyms in umich crawler.
1 file changed with 10 insertions and 9 deletions:
0 comments (0 inline, 0 general)
school.d/umich.crawl.inc
Show inline comments
 
@@ -138,27 +138,27 @@ function umich_crawl_csv($school_crawl_l
 
		    $semester);
 

	
 
  $cookies = array();
 
  $uri = $csv_href;
 

	
 
  /* parse into lines and then each row needs to be individually parsed */
 
  $csv = str_getcsv(school_crawl_geturi($uri, $cookies, $school_crawl_log), PHP_EOL);
 

	
 
  $fields = array(
 
		  'Term' => FALSE /* $semester->season_get() . ' ' . $semester->year_get() */,
 
		  'Session' => FALSE /* "Regular Academic Session", "First 7 Week Session", "Second 7 Week Session" <-- half-semester support? */,
 
		  'Acad Group' => FALSE /* long version of the department sorta, more general than the subject field */,
 
		  'Class Nbr' => FALSE /* unqualified course_id */,
 
		  'Class Nbr' => FALSE /* section synonym */,
 
		  'Subject' => FALSE /* "Mathematics (MATH)" */,
 
		  'Catalog Nbr' => FALSE /* "10001", i.e. section synonym */,
 
		  'Catalog Nbr' => FALSE /* "201", unqualified course_id */,
 
		  'Section' => FALSE /* You still reading these comments? */,
 
		  'Course Title' => FALSE /* for your sake, I hope you aren't */,
 
		  'Component' => FALSE /* "LAB", "LEC", "REC" -- i.e., meeting_type(?) */,
 
		  'Codes' => FALSE /* "P  W", "P   ", "P R ", "PI  ", "A   ", "P RW" ??????? (reminds me of ``svn status''). If flag[3] = 'W', then the class has a meeting times */,
 
		  'M' => FALSE /* if a day is enabled, it is set to itself. I.e., $row['M'] = 'M' or $row['M'] = '' */,
 
		  'T' => FALSE,
 
		  'W' => FALSE,
 
		  'TH' => FALSE,
 
		  'F' => FALSE,
 
		  'S' => FALSE,
 
		  'SU' => FALSE /* OK, we'll have to add Sunday support someday ;-) */,
 
		  'Start Date' => FALSE /* yea! */,
 
@@ -191,55 +191,56 @@ function umich_crawl_csv($school_crawl_l
 
	school_crawl_logf($school_crawl_log, 2, "Unable to find column %s in CSV for %s. Skipping this semester.",
 
			  $field, $semester);
 
	return 1;
 
      }
 

	
 
  /* remove the row with heading from the CSV dataset */
 
  unset($csv[0]);
 

	
 
  /* Now actually parse some data :-). */
 
  foreach ($csv as $row)
 
    {
 
      $row = str_getcsv($row);
 
      $synonym = trim($row[$fields['Catalog Nbr']]);
 
      $synonym = trim($row[$fields['Class Nbr']]);
 
      $course_id = trim($row[$fields['Catalog Nbr']]);
 

	
 
      if (!preg_match(';\(([A-Z]+)\)$;', $row[$fields['Subject']], $matches))
 
	{
 
	  school_crawl_logf($school_crawl_log, 5, "Unable to parse department string `%s'. Skipping section/course (synonym=%s).",
 
			    $row[$fields['Subject']], $synonym);
 
	  continue;
 
	}
 
      $dept = $matches[1];
 

	
 
      $days = '';
 
      foreach (array('M' => 'm', 'T' => 't', 'W' => 'w', 'TH' => 'h', 'F' => 'f', 'S' => 's')
 
	       as $field => $day)
 
	if (!strlen(trim($row[$fields[$field]])))
 
	if (strlen(trim($row[$fields[$field]])))
 
	  $days .= $day;
 

	
 
      if (!preg_match(';^([0-9]+)-([0-9]+)([AP])M$;', $row[$fields['Time']], $matches))
 
	{
 
	  school_crawl_logf($school_crawl_log, 4, "Unable to parse meeting time: `%s'. Skipping section/meeting (synonym=%s).",
 
			    $row[$fields['Time']], $synonym);
 
	  /* ensure that the class is added nonetheless */
 
	  if ($semester->class_get($dept, $row[$fields['Class Nbr']]) === NULL)
 
	    $semester->class_add(new Course($dept . '-' . $row[$fields['Class Nbr']], $row[$fields['Course Title']]));
 
	  if ($semester->class_get($dept, $course_id) === NULL)
 
	    $semester->class_add(new Course($dept . '-' . $course_id, $row[$fields['Course Title']]));
 
	  continue;
 
	}
 
      $time_end = umich_crawl_time($matches[2], $matches[3]);
 
      $time_start = umich_crawl_time($matches[1], FALSE, $time_end);
 

	
 
      $semester->section_meeting_add($dept, $row[$fields['Class Nbr']], $row[$fields['Course Title']],
 
				     $row[$fields['Section']], $row[$fields['Catalog Nbr']], $row[$fields['Instructor']],
 
				     new SectionMeeting($days, $time_start, $time_end, $row[$fields['Location']], school_crawl_meeting_type($row[$fields['Component']])));
 
      $semester->section_meeting_add($dept, $course_id, trim($row[$fields['Course Title']]),
 
				     trim($row[$fields['Section']]), $synonym, trim($row[$fields['Instructor']]),
 
				     new SectionMeeting($days, $time_start, $time_end, trim($row[$fields['Location']]), school_crawl_meeting_type(trim($row[$fields['Component']]))));
 
    }
 
}
 

	
 
/**
 
 * \brief
 
 *   Try to turn a umich-formatted time into something usable.
 
 *
 
 * \param $raw
 
 *   The raw input.
 
 * \param $xm
 
 *   FALSE or, if PM or AM was specified, 'P' for PM and 'A' for AM.
 
 * \param $before
0 comments (0 inline, 0 general)