Changeset - 3b78fdf04ce4
[Not reviewed]
default
0 10 0
Nathan Brink (binki) - 15 years ago 2011-02-08 22:50:38
ohnobinki@ohnopublishing.net
Support multiple semesters. Changed crawler API to accept an array to which semesters are appended instead of passing the crawlers an already-created semester object to fill.
10 files changed with 522 insertions and 121 deletions:
0 comments (0 inline, 0 general)
auto.php
Show inline comments
 
@@ -42,50 +42,52 @@ if (isset($_REQUEST['txt'])) {
 
}
 
else {
 
  header('Content-Type: application/json; encoding=utf-8');
 
}
 

	
 
if (!isset($_REQUEST['term'])) {
 
  clean_empty_exit();
 
}
 

	
 
$getsections = FALSE;
 
if (isset($_REQUEST['getsections'])) {
 
  $getsections = TRUE;
 
}
 

	
 
$term = $_REQUEST['term'];
 
$term_parts = Course::parse($term);
 
if (!count($term_parts)) {
 
  clean_empty_exit();
 
}
 

	
 
$school = school_load_guess();
 
if (!$school['crawled']) {
 
  clean_empty_exit();
 
}
 
$semester = school_semester_guess($school);
 

	
 
$cache_dir = dirname(__FILE__) . DIRECTORY_SEPARATOR . 'cache' . DIRECTORY_SEPARATOR . 'auto' . DIRECTORY_SEPARATOR . $school['id'] . DIRECTORY_SEPARATOR;
 
$cache_dir = dirname(__FILE__) . DIRECTORY_SEPARATOR . 'cache' . DIRECTORY_SEPARATOR . 'auto'
 
  . DIRECTORY_SEPARATOR . $school['id'] . DIRECTORY_SEPARATOR . $semester['id'] . DIRECTORY_SEPARATOR;
 

	
 
/*
 
 * autocomplete the list of departments. If the user has already
 
 * entered a valid department name _and_ delimitted it, however, go on
 
 * to the next autocompletion step.
 
 */
 
$term_strlen = strlen($term);
 
$dept_strlen = strlen($term_parts['department']);
 
$dept = $term_parts['department'];
 
if (!$getsections && count($term_parts) == 1 && $term_strlen == strlen($dept))
 
  {
 
    $dept_file = $cache_dir . '-depts';
 
    if (!file_exists($dept_file)) {
 
      clean_empty_exit();
 
    }
 
    $departments = unserialize(file_get_contents($dept_file));
 
    $json_depts = array();
 
    foreach ($departments as $key => $department) {
 
      if (!strncmp($department, $dept, $dept_strlen)) {
 
	$json_depts[] = $department;
 
      }
 
    }
 

	
 
    echo json_encode($json_depts);
inc/admin.inc
Show inline comments
 
@@ -105,215 +105,247 @@ function school_cache($schools)
 
	      if (!isset($domain_cache_ptr[$domain_part])
 
		  || !is_array($domain_cache_ptr[$domain_part]))
 
		$domain_cache_ptr[$domain_part] = array();
 
	      $domain_cache_ptr =& $domain_cache_ptr[$domain_part];
 
	    }
 
	  /*
 
	   * get the last part which is unambiguously identifies this
 
	   * school combined with the previous parts
 
	   */
 
	  $domain_part = array_shift($domain_parts);
 
	  $domain_cache_ptr[$domain_part] = $school['id'];
 
	}
 

	
 

	
 
      /*
 
       * autocomplete stuff -- per school
 
       *
 
       * We don't do anything if crawled_notreally is set because this
 
       * way we can get incremental crawling. Really useful if one's
 
       * just debugging one of the school crawling scripts and doesn't
 
       * want to run all crawlers ;-).
 
       */
 
      if ($school['crawled'] && !isset($school['crawled_notreally']))
 
	{
 
	  $semester = $school['crawled_semester'];
 

	
 
	  $cache_auto_school_dir_name = $cache_auto_dir_name . $school['id'] . DIRECTORY_SEPARATOR;
 
	  if (!is_dir($cache_auto_school_dir_name))
 
	    {
 
	      if (!mkdir($cache_auto_school_dir_name, 0777, TRUE))
 
	      if (!mkdir($cache_auto_school_dir_name, 0755, TRUE))
 
		error_log('Unable to create needed directory: `' . $cache_auto_dir_name . '\'');
 
	    }
 

	
 
	  $departments = $semester->departments_get();
 
	  sort($departments);
 
	  $semesters = array();
 
	  $semester_weights = 0;
 
	  /*
 
	   * Try to presort the semesters into the proper order based
 
	   * on time_start/time_end. We want the older semesters to be
 
	   * nearer to the end of the list. This way, the crawler
 
	   * doesn't have to decide how to sort the semesters itself:
 
	   */
 
	  usort($semesters, function ($sem_a, $sem_b)
 
		{
 
		  return $sem_a->time_start_get() - $sem_b->time_start_get();
 
		});
 

	
 
	  $dept_file = fopen($cache_auto_school_dir_name . '-depts', 'wb');
 
	  fwrite($dept_file, serialize($departments));
 
	  fclose($dept_file);
 

	
 
	  /* now per-department autocomplete */
 
	  foreach ($departments as $department)
 
	  foreach ($school['crawled_semesters'] as $semester)
 
	    {
 
	      $classes = $semester->department_classes_get($department);
 
	      $classes_file = fopen($cache_auto_school_dir_name . $department . '.sects', 'wb');
 
	      fwrite($classes_file, serialize($classes));
 
	      fclose($classes_file);
 
	      $semesters[$semester->id()] = array(
 
						  'id' => $semester->id(),
 
						  'time_start' => $semester->time_start_get(),
 
						  'time_end' => $semester->time_end_get(),
 
						  'weight' => $semester_weights ++,
 
						  'name' => $semester->name_get(),
 
						  );
 

	
 
	      /* now individual section informations, pre-JSON-ized */
 
	      foreach ($classes as $class)
 
	      $cache_auto_school_semester_dir_name = $cache_auto_school_dir_name . $semester->id() . DIRECTORY_SEPARATOR;
 
	      if (!is_dir($cache_auto_school_semester_dir_name))
 
		{
 
		  if (!is_dir($cache_auto_school_dir_name . $department))
 
		    mkdir($cache_auto_school_dir_name . $department);
 
		  $class_file = fopen($cache_auto_school_dir_name . $department . DIRECTORY_SEPARATOR . $class, 'wb');
 
		  fwrite($class_file, json_encode($semester->class_get($department, $class)->to_json_array()));
 
		  fclose($class_file);
 
		  if (!mkdir($cache_auto_school_semester_dir_name, 0755, TRUE))
 
		    error_log('Unable to create needed directory: `' . $cache_auto_school_semester_dir_name . '\'');
 
		}
 
	    }
 

	
 
	      $departments = $semester->departments_get();
 
	      sort($departments);
 

	
 
	      $dept_file = fopen($cache_auto_school_semester_dir_name . '-depts', 'wb');
 
	      fwrite($dept_file, serialize($departments));
 
	      fclose($dept_file);
 

	
 
	      /* now per-department autocomplete */
 
	      foreach ($departments as $department)
 
		{
 
		  $classes = $semester->department_classes_get($department);
 
		  $classes_file = fopen($cache_auto_school_semester_dir_name . $department . '.sects', 'wb');
 
		  fwrite($classes_file, serialize($classes));
 
		  fclose($classes_file);
 

	
 
		  /* now individual section informations, pre-JSON-ized */
 
		  foreach ($classes as $class)
 
		    {
 
		      if (!is_dir($cache_auto_school_semester_dir_name . $department))
 
			mkdir($cache_auto_school_semester_dir_name . $department);
 
		      $class_file = fopen($cache_auto_school_semester_dir_name . $department . DIRECTORY_SEPARATOR . $class, 'wb');
 
		      fwrite($class_file, json_encode($semester->class_get($department, $class)->to_json_array()));
 
		      fclose($class_file);
 
		    }
 
		}
 
	    } /* foreach ( => $semester) */
 
	  /*
 
	   * Store/cache the semester metadata:
 
	   */
 
	  $semesters_file = fopen($cache_auto_school_dir_name . '-semesters', 'wb');
 
	  fwrite($semesters_file, serialize($semesters));
 
	  fclose($semesters_file);
 
	}
 

	
 

	
 
    }
 
  uasort($list_cache, 'school_cmp');
 

	
 
  $cache = array('list' => $list_cache, 'domains' => $domain_cache);
 

	
 
  $cache_file_name =  $cache_dir_name . 'schools';
 
  $cache_file = fopen($cache_file_name, 'wb');
 
  if ($cache_file === FALSE)
 
    {
 
      fprintf(STDERR, "Unable to open `%s' for writing\n",
 
	      $cache_file_name);
 
      return 1;
 
    }
 
  fwrite($cache_file, serialize($cache));
 
  fclose($cache_file);
 

	
 
  return 0;
 
}
 

	
 
/**
 
 * \brief
 
 *   Invoke a school's registration data crawler.
 
 *
 
 * Each school may export registration data on publically accessible
 
 * websites. Thus, we populate some autocomplete information by
 
 * crawling these pages and storing the information in a special set
 
 * of caches.
 
 *
 
 * Because crawling code can be non-trivial, it should be separated
 
 * from a school's main .inc file. Thus, if a school supports
 
 * crawling, it will have a file called
 
 * schools.d/<school_id>.crawl.inc. In this file, a function called
 
 * <school_id>_crawl($semester) must be defined. It must accept one
 
 * argument, the Semester object which defines the time of year for
 
 * which courses should be retrieved. It must populate this empty
 
 * Semester object with Course object and populate those courses with
 
 * the sections with as much detail as possible.
 
 * <school_id>_crawl(array &$semesters, $verbosity = 1) must be
 
 * defined. It must accept at least one argument, the array to be
 
 * filled with Semester objects. It must populate this array with
 
 * individual Semester objects and fill those with Course objects and
 
 * populate those courses with the sections with as much detail as
 
 * possible. This function may return 1 to indicate an error must
 
 * return 0 to indicate success.
 
 *
 
 * If the crawling is successful, a 'crawl' key is added to the
 
 * If the crawling is successful, a 'crawled' key is added to the
 
 * $school handle. school_cache() will use this to help indicate that
 
 * a school _has_ autocomplete information, which might affect the
 
 * appearance and JS stuff for the input.php page.
 
 *
 
 * \param $school
 
 *   The school which should be checked for crawl functionality and
 
 *   crawled.
 
 * \param $semester_year
 
 *   The year of the semester for which we should grab data.
 
 * \param $semester_season
 
 *   The season of the year of the semester for which we should grab
 
 *   data.
 
 */
 
function school_crawl(&$school, $semester_year, $semester_season, $verbosity = 1)
 
function school_crawl(array &$school, $verbosity = 1)
 
{
 
  $school['crawled'] = FALSE;
 

	
 
  $school_crawl_func = $school['id'] . '_crawl';
 
  if (!function_exists($school_crawl_func))
 
    return;
 

	
 
  $semester = new Semester($semester_year, $semester_season);
 
  $semesters = array();
 

	
 
  if ($verbosity > 0)
 
    fprintf(STDERR, "%s()\n", $school_crawl_func);
 
  $ret = $school_crawl_func($semester, $verbosity);
 
  $ret = $school_crawl_func($semesters, $verbosity);
 
  if ($ret)
 
    {
 
      fprintf(STDERR, "Crawling %s failed: %s() returned nonzero\n",
 
	      $school['id'], $school_crawl_func);
 
      fwrite(STDERR, "\n");
 
      return;
 
    }
 
  $school['crawled'] = TRUE;
 
  $school['crawled_semester'] = $semester;
 
  $school['crawled_semesters'] = $semesters;
 

	
 
  if ($verbosity > 0)
 
    fwrite(STDERR, "\n");
 
}
 

	
 

	
 
/**
 
 * \brief
 
 *   Recreate/update the school section autocomplete cache.
 
 *
 
 * \param $crawl_only
 
 *   If non-NULL, an array of school_ids to limit the cache recreation
 
 *   to. Useful for when developing a certain school's crawling
 
 *   function.
 
 * \param $verbosity
 
 *   An integer indicating how loud to be.
 
 */
 
function school_cache_recreate($crawl_only = NULL, $verbosity = 1)
 
{
 
  $crawl_semester_year = '2011';
 
  $crawl_semester_season = Semester::SEASON_SPRING;
 

	
 
  $school_id_list = school_list();
 
  if (!$school_id_list)
 
    {
 
      fprintf(STDERR, "error: Unable to load schools.\n");
 
      return 1;
 
    }
 

	
 
  if ($crawl_only !== NULL)
 
    foreach ($crawl_only as $crawl_only_school_id)
 
      if (!in_array($crawl_only_school_id, $school_id_list))
 
	{
 
	  fprintf(STDERR, "error: Invalid school_id specified for crawling: %s",
 
		  $crawl_only_school_id);
 
	  return 1;
 
	}
 

	
 
  /* hide libxml errors from the console: */
 
  libxml_use_internal_errors(TRUE);
 

	
 
  $schools = array();
 
  $old_school_cache = _school_cache_load();
 
  foreach ($school_id_list as $school_id)
 
    {
 
      $school = school_load($school_id, TRUE);
 
      if (!$school)
 
	{
 
	  fprintf(STDERR, "Error loading school with school_id=%s\n",
 
		  $school_id);
 
	  return 1;
 
	}
 

	
 
      if ($crawl_only === NULL || in_array($school['id'], $crawl_only))
 
	{
 
	  school_crawl($school, $crawl_semester_year, $crawl_semester_season, $verbosity);
 
	  school_crawl($school, $verbosity);
 
	}
 
      else
 
	{
 
	  /*
 
	   * try to allow incremental crawling by not wiping out old
 
	   * data and preserving the cached $school['crawled'].
 
	   */
 
	  if ($old_school_cache && isset($old_school_cache['list'][$school['id']]))
 
	    {
 
	      $old_school = $old_school_cache['list'][$school['id']];
 
	      $school['crawled'] = FALSE;
 
	      if (isset($old_school['crawled']))
 
		$school['crawled'] = $old_school['crawled'];
 
	      if ($school['crawled'])
 
		$school['crawled_notreally'] = TRUE;
 
	    }
 
	}
 

	
 
      $schools[] = $school;
 
    }
 

	
 
  if (school_cache($schools))
 
    {
 
      fprintf(STDERR, "Error writing out school cache\n");
inc/class.page.php
Show inline comments
 
@@ -131,48 +131,49 @@ class page
 
	 $ga_www = 'https://ssl.';
 

	
 
       $this->trackingcode = '<script type="text/javascript" src="' . $ga_www . 'google-analytics.com/ga.js" />' . "\n"
 
	 . $this->trackingcode
 
	 . '  <script type="text/javascript">' . "\n"
 
	 . '  ' . ($this->xhtml ? '<![CDATA[' : '') . "\n"
 
	 . "     var mytrackers = new Array();";
 

	
 
       $i = 0;
 
       foreach ($ga_trackers as $ga_tracker)
 
	 {
 
	   $this->trackingcode .= "\n"
 
	     . '      mytrackers[' . $i . '] = _gat._getTracker(\'' . $ga_tracker . "');\n"
 
	     . '      mytrackers[' . $i . "]._trackPageview();\n";
 
	 }
 

	
 
       $this->trackingcode .= '  ' . ($this->xhtml ? ']]>'       : '') . "\n"
 
	 . "  </script>\n";
 
     }
 

	
 
    self::session_start();
 
    /* everything that needs sessions started to work: */
 

	
 
    $this->school = school_load_guess();
 
    $this->semester = school_semester_guess($this->school);
 

	
 
    if($immediate
 
       && $ntitle != "NOHEAD")
 
      $this->head();
 
 }
 

	
 
  /**
 
   * \brief
 
   *   Instantiate a new page for the caller.
 
   *
 
   * The caller must explicitly call the page::head() function upon
 
   * the value that is returned. No implicit actions are supported
 
   * anymore.
 
   *
 
   * \param $title
 
   *   The title of the page. Must be completely UTF-8 (will be
 
   *   escaped for you with htmlentitites()).
 
   * \param $scripts
 
   *   A list of scripts which the page desires to be included in the
 
   *   <head /> of the page. Should this param just be moved to the
 
   *   page::head() function?
 
   */
 
  public static function page_create($title, array $scripts = array())
 
  {
 
@@ -208,49 +209,53 @@ class page
 
    if ($this->xhtml) {
 
      echo '<?xml version="1.0" encoding="utf-8"?>' . PHP_EOL;
 
    }
 

	
 
    echo '<!DOCTYPE ' . $this->doctype . '>'. PHP_EOL .
 
	  '<html ' . $this->htmlargs . '>'. PHP_EOL .
 
	  '  <head>'. PHP_EOL .
 
	  '    <title>' . $this->pagetitle . ' - ' . $this->base_title . '</title>'. PHP_EOL .
 
          '    <link rel="stylesheet" href="styles/general.css" type="text/css" media="screen" charset="utf-8" />'.  PHP_EOL .
 
	  '    <link rel="stylesheet" type="text/css" media="print" href="styles/print.css" />'. PHP_EOL .
 
          '    <link rel="shortcut icon" href="images/favicon.png" />'. PHP_EOL;
 

	
 
    // Write out all passed scripts
 
    foreach ($this->scripts as $i)
 
      echo '    ' . $this->headCode["$i"] . "\n";
 

	
 
    echo '  </head>' . PHP_EOL .
 
	 '  <body>'. PHP_EOL .
 
         '    <div id="page">'. PHP_EOL .
 
         '      <div id="header">'. PHP_EOL .
 
	 '        <div id="title">'. PHP_EOL .
 
         '          <h1><a href="index.php"><img src="images/slatepermutate-alpha.png" alt="SlatePermutate" class="noborder" /></a><br /></h1>'. PHP_EOL .
 
         '          <p>'. PHP_EOL .
 
         '            <span id="subtitle">'.$this->pagetitle.'</span>'. PHP_EOL .
 
  	 '            <span id="menu">Profile: '.$this->school['name'].' <a href="input.php?selectschool=1">(change)</a></span>'. PHP_EOL .
 
  	 '            <span id="menu">' . PHP_EOL
 
      . '              Profile: '.$this->school['name'].' <a href="input.php?selectschool=1">(change)</a>' . PHP_EOL;
 
    if ($this->semester !== NULL)
 
      echo  '             Semester: ' . $this->semester['name'] . '<a href="input.php?selectsemester=1">(change)</a>' . PHP_EOL;
 
    echo '            </span>'. PHP_EOL .
 
         '          </p>'. PHP_EOL .
 
         '        </div>'. PHP_EOL .
 
	 '      </div>'. PHP_EOL .
 
         '      <div id="content">'. PHP_EOL;
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Write out the foot of the page and closing divs
 
   */
 
  public function foot(){
 
    echo '      </div> <!-- id="content" -->'. PHP_EOL;
 
    echo '      <div id="footer">'. PHP_EOL .
 
  	 '        <div id="leftfoot" style="float:left; margin-top: 1em;">'. PHP_EOL .
 
	 '          <a href="feedback.php">Submit Feedback</a>'. PHP_EOL .
 
         '        </div>'. PHP_EOL .
 
         '        <div id="rightfoot">'. PHP_EOL .
 
         '          <h5>&copy; '. date('Y').' <a href="http://protofusion.org/~nathang/">Nathan Gelderloos</a><br /><a href="http://ethanzonca.com">Ethan Zonca</a><br /><a href="http://ohnopub.net">Nathan Phillip Brink</a></h5>'. PHP_EOL .
 
	 '        </div>'. PHP_EOL .
 
         '      </div> <!-- id="footer" -->'. PHP_EOL .
 
         '    </div> <!-- id="page" -->'. PHP_EOL;
 
    echo $this->trackingcode;
 
    echo '  </body>'. PHP_EOL .
 
         '</html>' . PHP_EOL;
 
@@ -298,48 +303,94 @@ class page
 
      echo $output;
 
      if ($numsaved > 4)
 
	echo '<div id="showLess"><a href="#">Less...</a></div>' . PHP_EOL
 
	  . '<div id="showMore"><a href="#">More...</a></div>' . PHP_EOL;
 
      echo '</div>' . PHP_EOL;
 
    }
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Display a list of schools the user might be from.
 
   * \param $linkto
 
   *   The to which a &school= or ?school= query string should be
 
   *   appended.
 
   */
 
  public function showSchools($linkto)
 
  {
 
    echo "<p>\n";
 
    echo school_list_html($this->school['id'], $linkto);
 
    echo "</p>\n";
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Display a list of semesters the user might be interested in.
 
   * \param $linkto
 
   *   The link to which a &semester= or ?semester= query string
 
   *   should be appended.
 
   */
 
  public function showSemesters($linkto = 'input.php')
 
  {
 
    if (strpos($linkto, '?'))
 
      $linkto .= '&';
 
    else
 
      $linkto .= '?';
 
    /*
 
     * We can pre-htmlentities() $linkto because we're only appending
 
     * a safe string.
 
     */
 
    $linkto = htmlentities($linkto . 'semester=');
 

	
 
    $time = time();
 

	
 
    echo "    <p>\n";
 
    echo "      <ul>\n";
 
    foreach (school_semesters($this->school) as $semester)
 
      {
 
	$text_extra = array();
 
	$class_extra = '';
 
	if ($semester['id'] == $this->semester['id'])
 
	  {
 
	    $class_extra = ' highlight';
 
	    $text_extra[] = 'selected';
 
	  }
 

	
 
	if ($semester['time_start'] < $time && $semester['time_end'] > $time)
 
	  $text_extra[] = 'current';
 

	
 
	$text_extra = implode($text_extra, ', ');
 
	if (strlen($text_extra))
 
	  $text_extra = ' (' . $text_extra . ')';
 

	
 
	echo '        <li class="semester' . $class_extra . '"><a href="' . $linkto . $semester['id'] . '">' . htmlentities($semester['name']) . '</a>' . $text_extra . "</li>\n";
 
      }
 
    echo "      </ul>\n";
 
    echo "    </p>\n";
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Display school-specific instructions for using slate_permutate.
 
   */
 
  public function showSchoolInstructions()
 
  {
 
    echo '<div id="schoolInstructionsBox">' . PHP_EOL
 
       . school_instructions_html($this->school) . PHP_EOL
 
       . '</div> <!-- id="schoolInstructionsBox" -->' . PHP_EOL;
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Print out a vocative form of a student's identity. For example,
 
   *   Dearborn Christin Schoolers are called ``Knights'' as are
 
   *   Calvin College students.
 
   *
 
   * The third argument is used to determine whether or not this
 
   * address _needs_ to be printed out. For example, in some sentences
 
   * when addressing generic students, it makes no sense to say the
 
   * standard ``Welcome, student'' or ``Dear generic person, how do
 
   * you do today?''. If the third argument is false, we'll refrain
 
   * from outputting anything at all.
 
   *
 
   * \param $prefix
 
   *   If the address is to be printed, output this beforehand. Useful
inc/class.semester.inc
Show inline comments
 
@@ -22,63 +22,77 @@
 
require_once($inc_dir . 'class.course.inc');
 
require_once($inc_dir . 'class.section.php');
 

	
 
/**
 
 * \brief
 
 *   Identifies a school semester and acts as a container for courses
 
 *   offered in a semester.
 
 */
 
class Semester
 
{
 
  /**
 
   * \brief
 
   *   The Fall season.
 
   */
 
  const SEASON_FALL = 'fall';
 

	
 
  /**
 
   * \brief
 
   *   The Spring season.
 
   */
 
  const SEASON_SPRING = 'spring';
 

	
 
  /**
 
   * \brief
 
   *   The Summer season.
 
   */
 
  const SEASON_SUMMER = 'summer';
 

	
 
  /**
 
   * \brief
 
   *   Instantiate an empty Semester.
 
   *
 
   * \param $year
 
   *   The year of this semester. Must be four digits.
 
   * \param $season
 
   *   The season of this semester. Currently, only
 
   *   Semester::SEASON_SPRING and Semester::SEASON_FALL are valid.
 
   *   The season of this semester. Please use the constants
 
   *   Semester::SEASON_FALL, Semester::SEASON_SPRING, or
 
   *   Semester::SEASON_SUMMER if possible.
 
   * \param $time_start
 
   *   Specify a timestamp which roughly estimates when this semester
 
   *   starts to aid the algorithm for guessing the current
 
   *   semester. See Semester::time_start_set(), which may be used
 
   *   instead of this parameter
 
   * \param $time_end
 
   *   This may be specified now or via Semester::time_end_set().
 
   */
 
  function __construct($year, $season)
 
  function __construct($year, $season, $time_start = 0, $time_end = 0)
 
  {
 
    if (!in_array($season, array(self::SEASON_SPRING, self::SEASON_FALL)))
 
      throw new ErrorException('Attempt to construct a Semester with a $season which is neither Semester::SEASON_SPRING nor Semester::SEASON_FALL. `' . $season . '\' was given.');
 
    $this->time_start = 0;
 
    $this->time_end = 0;
 
    $this->season = $season;
 

	
 
    if (strlen($year) != 4)
 
    if (strlen($year) != 4 || !is_numeric($year))
 
      throw new ErrorException('Attempt to construct a Semester with an invalid year. The given year is `' . $year . '\'');
 
    $this->year = $year;
 

	
 
    $this->departments = array();
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Add a class to this Semester.
 
   *
 
   * \param $class
 
   *   The class/course to add.
 
   */
 
  public function class_add(Course $class)
 
  {
 
    $class_parts = Course::parse($class->getName());
 
    if (!isset($class_parts['course']))
 
      throw new ErrorException('I was given a class with an invalid name: `' . $class->getName() . '\'');
 

	
 
    if (!isset($this->departments[$class_parts['department']]))
 
      $this->departments[$class_parts['department']] = array();
 
    $department =& $this->departments[$class_parts['department']];
 

	
 
    $department[$class_parts['course']] = $class;
 
@@ -137,49 +151,153 @@ class Semester
 
   * \param $section
 
   *   The section itself.
 
   */
 
  public function section_add($dept, $class, Section $section)
 
  {
 
    $dept = strtoupper($dept);
 
    $class = strtoupper($class);
 

	
 
    if (!isset($this->departments[$dept])
 
	|| !isset($this->departments[$dept][$class]))
 
      {
 
	$classobj = new Course($dept . '-' . $class);
 
	$this->class_add($classobj);
 
      }
 
    else
 
      {
 
	$classobj = $this->departments[$dept][$class];
 
      }
 

	
 
    $classobj->section_add($section);
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Update the time_end.
 
   *
 
   * The time_end is a unix timestamp roughly estimating the time at
 
   * which a semester starts. It is used when guessing what semester a
 
   * user is interested in.
 
   *
 
   * \param $time_end
 
   *   The new time_end.
 
   */
 
  public function time_end_set($time_end)
 
  {
 
    $this->time_end = $time_end;
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Set the time_end only if it would make the semester end later.
 
   *
 
   * Useful for crawler scripts incrementally guessing the endtime of
 
   * a semester.
 
   *
 
   * \param $time_end
 
   *   The new time_end to consider.
 
   */
 
  public function time_end_set_test($time_end)
 
  {
 
    if ($time_end && $time_end > $this->time_end)
 
      $this->time_end_set($time_end);
 
  }
 

	
 
  public function time_end_get()
 
  {
 
    return $this->time_end;
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Update the time_start.
 
   *
 
   * The time_start is a unix timestamp roughly estimating the time at
 
   * which a semester starts. It is used when guessing what semester a
 
   * user is interested in.
 
   *
 
   * \param $time_start
 
   *   The new time_start.
 
   */
 
  public function time_start_set($time_start)
 
  {
 
    $this->time_start = $time_start;
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Only update the time_start if the time_start isn't yet set or
 
   *   if the given time_start is earlier than the stored one.
 
   *
 
   * This should allow crawlers to easily accumulate proper time_start
 
   * and time_end values, see Semester::time_end_set_test();
 
   *
 
   * \param $time_start
 
   *   The new estimation of the semester's start.
 
   */
 
  public function time_start_set_test($time_start)
 
  {
 
    if ($time_start &&
 
	(!$this->time_start || $time_start < $this->time_start))
 
      $this->time_start_set($time_start);
 
  }
 

	
 
  public function time_start_get()
 
  {
 
    return $this->time_start;
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Get a semester's year.
 
   */
 
  public function year_get()
 
  {
 
    return $this->year;
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Get a semester's season.
 
   */
 
  public function season_get()
 
  {
 
    return $this->season;
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Get a semester's friendly name:
 
   *
 
   * \return
 
   *   A string, the semester's friendly name.
 
   */
 
  public function name_get()
 
  {
 
    return ucfirst($this->season_get()) . ' ' . $this->year_get();
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Return an identification string for this semester.
 
   *
 
   * Hopefully this identification string should be unique. Also, this
 
   * identification string is filesystem-safe.
 
   *
 
   * \return
 
   *   A string which may be used in paths or to uniquely identify
 
   *   this semester in the context of its school.
 
   */
 
  public function id()
 
  {
 
    return $this->year_get() . '_' . $this->season_get();
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Enumerate all valid seasons.
 
   */
 
  public static function seasons_get_all()
 
  {
 
    return array(SEASON_SPRING, SEASON_FALL);
 
    return array(self::SEASON_SPRING, self::SEASON_SUMMER, self::SEASON_FALL);
 
  }
 
}
inc/school.crawl.inc
Show inline comments
 
@@ -20,48 +20,64 @@
 

	
 
/**
 
 * \file
 
 *   Routines that are only useful when crawling schools' websites for
 
 *   autofill section data.
 
 */
 

	
 
/**
 
 * \brief
 
 *   Parse a simple time string into slate_permutate's time
 
 *   representation.
 
 *
 
 * \param $time
 
 *   An array compatible with the return value of strptime(). The only
 
 *   fields we use are 'tm_hour', which is from 0 through 23, and
 
 *   'tm_min', which may be from 0 through 50.
 
 */
 
function school_crawl_time_format($time)
 
{
 
  return sprintf('%02d%02d', $time['tm_hour'], $time['tm_min']);
 
}
 

	
 
/**
 
 * \brief
 
 *   Equivalent of mktime() except that it accepts strptime()'s output
 
 *   format as an input.
 
 *
 
 * \param $tm
 
 *   An array formatted as the output of strptime().
 
 * \return
 
 *   A unix timestamp.
 
 */
 
function school_crawl_mktime(array $tm)
 
{
 
  return mktime($tm['tm_hour'], $tm['tm_min'], $tm['tm_sec'],
 
		$tm['tm_mon'] + 1, $tm['tm_mday'], $tm['tm_year'] + 1900);
 
}
 

	
 
/**
 
 * \brief
 
 *   Take an array of day names and assemble them into
 
 *   slate_permutate's internal (weird) representation of a set of
 
 *   weekdays.
 
 *
 
 * This function is intended to make it easy for one to take the
 
 * output of an explode() call. For example, to decode $days_str =
 
 * 'Monday, Tuesday, Friday', one would do
 
 * school_crawl_days_format(explode(', ', $days_str));
 
 *
 
 * \param $days
 
 *   An array of day names. These may be common abbreviations or
 
 *   truncations (any truncations must be two chars long for
 
 *   simplicity. One-char representations are supported, however, but
 
 *   use 'm', 't', 'w', 'h', 'f' to distinguish Thursday and
 
 *   Tuesday. 'r' may also be used for Thursday.). Case does not
 
 *   matter. 's' is for Saturday, based on CCBCMD.
 
 * \return
 
 *   slate_permutate's strange internal days representation.
 
 */
 
function school_crawl_days_format($days)
 
{
 
  static $daymap_1 = array('m' => 'm', 't' => 't', 'w' => 'w', 'h' => 'h', 'r' => 'h', 'f' => 'f', 's' => 's');
 
  static $daymap_2 = array('th' => 'h');
 

	
inc/school.inc
Show inline comments
 
@@ -22,93 +22,103 @@
 
 * \file
 
 *
 
 * Provide a method of storing and retrieving school-specific
 
 * information. Identifying schools is intended to be useful for
 
 * obtaining and storing preknowledge of the sections a school offers
 
 * to allow easier input.
 
 *
 
 * Anything code specific to a particular school should be placed in a
 
 * file in the school.d directory. The filename shall be the short,
 
 * alphanumeric, machine-usable school identifier followed by
 
 * ``.inc''. This allows optimized loading of school-specific routines
 
 * when the identifier is already known.
 
 */
 

	
 
/**
 
 * \brief
 
 *   Load a school profile based on its identifier.
 
 *
 
 * This function loads the school's description file and asks for info
 
 * from a callback called $school_id . '_info' which must return an
 
 * array with the following keys:
 
 * - name: a friendly name for the school. Must be a valid XHTML attribute string.
 
 * - url: the school's website URL as a valid XHTML attribute string. (i.e., escape ampersands).
 
 * - example_course_id: An example course identifier representative of a school's course IDs. (e.g., CS-101 for Calvin).
 
 * - id: The school's ID.
 
 *
 
 * \param $school_id
 
 *   The school's alphanumeric identifier (which determines the name
 
 *   of the school's *.inc file).
 
 * \param $load_all_inc
 
 *   Asks for a school's extraneous .inc files to be loaded
 
 *   to. Intended for use by rehash.php only.
 
 * \return
 
 *   A school_profile handle or NULL on error.
 
 */
 
function school_load($school_id, $load_all_inc = FALSE)
 
{
 
  $school = array('id' => $school_id);
 

	
 
  /* guard against cracking attempts (protects against '../' and friends) */
 
  if (!preg_match('/^[0-9a-z]+$/', $school_id))
 
    return NULL;
 
  $school_file_name_base = dirname(__FILE__) . DIRECTORY_SEPARATOR
 
    . '..' . DIRECTORY_SEPARATOR . 'school.d' . DIRECTORY_SEPARATOR;
 
  $school_file_name_base = dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR . 'school.d' . DIRECTORY_SEPARATOR;
 
  $school_file_name = $school_file_name_base . $school_id . '.inc';
 

	
 
  if (!file_exists($school_file_name))
 
    return NULL;
 

	
 
  require_once($school_file_name);
 
  if ($load_all_inc)
 
    {
 
      $school_crawl_file_name = $school_file_name_base . $school_id . '.crawl.inc';
 
      if (file_exists($school_crawl_file_name))
 
	require_once($school_crawl_file_name);
 
    }
 

	
 
  $school_info = $school_id . '_info';
 
  $school += $school_info();
 

	
 
  /*
 
   * append small amount of info from the cache entry for this school:
 
   * whether or not it was crawled.
 
   *
 
   * Perhaps this stuff should be just moved into the _info function
 
   * for efficiency.
 
   */
 
  $cache = _school_cache_load();
 
  if ($cache && count($cache['list']) && isset($cache['list'][$school['id']]))
 
    $school['crawled'] = $cache['list'][$school['id']]['crawled'];
 
    {
 
      $school['crawled'] = $cache['list'][$school['id']]['crawled'];
 

	
 
      $school_semesters_filename = dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR . 'cache'
 
	. DIRECTORY_SEPARATOR . 'auto' . DIRECTORY_SEPARATOR . $school['id']
 
	. DIRECTORY_SEPARATOR . '-semesters';
 
      if (file_exists($school_semesters_filename))
 
	$school['semesters'] = unserialize(file_get_contents($school_semesters_filename));
 
      else
 
	$school['semesters'] = array();
 
    }
 

	
 
  return $school;
 
}
 

	
 
/**
 
 * \brief
 
 *   Tries to guess what school a connection comes from.
 
 *
 
 * This function checks if $_REQUEST['school'] is set to a valid
 
 * school, so that the user can manually choose his school. Then it
 
 * chcecks if the user's session specifies what school profile to
 
 * use. Then it tries to make a best guess as to the school he's from
 
 * using the rDNS information provided by the httpd.
 
 *
 
 * \return
 
 *   A school profile or NULL if the school isn't in the session and
 
 *   can't be guessed.
 
 */
 
function school_load_guess()
 
{
 
  if (isset($_REQUEST['school']))
 
    {
 
      $school = school_load($_REQUEST['school']);
 
      if ($school)
 
@@ -251,48 +261,110 @@ function school_instructions_html($schoo
 
     * handling shouldn't be needed...
 
     */
 
    return NULL;
 

	
 
  $school_instructions_html = $school['id'] . '_instructions_html';
 

	
 
  if (!function_exists($school_instructions_html))
 
    {
 
      /* load the default school's _instructions_html() function */
 
      if ($school_default_school === NULL)
 
	$school_default_school = school_load('default');
 
      /* ``hacky'', but preferable to recursion: */
 
      $school_instructions_html = 'default' . '_instructions_html';
 

	
 
      /* be 503-safe */
 
      if (!function_exists($school_instructions_html))
 
	return NULL;
 
    }
 

	
 
  return $school_instructions_html();
 
}
 

	
 
/**
 
 * \brief
 
 *   Return information about available semesters.
 
 *
 
 * \param $school
 
 *   The school.
 
 * \return
 
 *   An array with keys being semester IDs ordered by weights with
 
 *   lowest first and keys of 'id' (the semester's ID), 'name' (the
 
 *   friendly name), and 'weight' (lower numbers mean these semesters
 
 *   should be earlier, may be positive or negative). 'time_start',
 
 *   'time_end' are unix timestamps estimating the begin and end point
 
 *   of each semester.
 
 */
 
function school_semesters(array $school)
 
{
 
  if (!$school['crawled'])
 
    return array();
 
  return $school['semesters'];
 
}
 

	
 
/**
 
 * \brief
 
 *   Return the semester which either the user has selected or which
 
 *   makes the most sense.
 
 *
 
 * \param $school
 
 *   The school for which a semester should be guessed.
 
 * \return
 
 *   An array with the keys 'id', 'name', and 'weight' corresponding
 
 *   to the same keys in the arrays returned by school_semesters() or
 
 *   NULL if no semester can be found.
 
 */
 
function school_semester_guess(array $school)
 
{
 
  $semesters = school_semesters($school);
 

	
 
  if (!empty($_REQUEST['semester'])
 
      && isset($semesters[$_REQUEST['semester']]))
 
    {
 
      $semester = $semesters[$_REQUEST['semester']];
 
      $_SESSION['semester'] = $semester['id'];
 
      return $semester;
 
    }
 

	
 
  if (!empty($_SESSION['semester'])
 
      && isset($semesters[$_SESSION['semester']]))
 
      return $semesters[$_SESSION['semester']];
 

	
 
  $time = time();
 
  $next_semester = FALSE;
 
  $semester = NULL;
 
  foreach ($semesters as $semester)
 
    {
 
      if ($next_semester)
 
	return $semester;
 
      if ($semester['time_start'] < $time)
 
	$next_semester = TRUE;
 
    }
 
  return $semester;
 
}
 

	
 
/**
 
 * \brief
 
 *   Return an array of default classes for a particular school.
 
 *
 
 * \param $school
 
 *   The school's handle.
 
 */
 
function school_default_courses($school)
 
{
 
  $school_default_courses = $school['id'] . '_default_courses';
 
  if (function_exists($school_default_courses))
 
    {
 
      require_once(dirname(__FILE__) . DIRECTORY_SEPARATOR . 'class.course.inc');
 
      return $school_default_courses();
 
    }
 

	
 
  return array();
 
}
 

	
 
/**
 
 * \brief
 
 *   Return an example course id for the school.
 
 *
 
 * Each school may specify an example course ID by placing a key
 
 * called 'example_course_id' into the array returned by its
 
 * <school_id>_info() function. See school_load().
input.php
Show inline comments
 
@@ -80,48 +80,63 @@ if ($qtips_always || !isset($_SESSION['s
 
 * another variable in $_SESSION: $_SESSION['school_chosen'].
 
 */
 
if ($school && (!empty($_REQUEST['school']) || $school['id'] != 'default'))
 
  $_SESSION['school_chosen'] = TRUE;
 
if (!empty($_REQUEST['selectschool'])
 
    || $school['id'] == 'default' && !isset($_SESSION['school_chosen']))
 
  {
 
    $next_page = 'input.php';
 
    if (isset($_GET['s']))
 
      $next_page .= '?s=' . (int)$_GET['s'];
 
?>
 
<h2>School Selection</h2>
 
<p>
 
  Choose the school you attend from the list below. <strong>If you cannot
 
  find your school</strong>, you may proceed using
 
  the <a href="<?php echo $next_page . (strpos($next_page, '?') === FALSE ? '?' : '&amp;'); ?>school=default">generic
 
  settings</a>.
 
</p>
 
<?php
 
    $inputPage->showSchools($next_page);
 
    $inputPage->foot();
 
    exit;
 
  }
 

	
 
if (!empty($_REQUEST['selectsemester']))
 
  {
 
?>
 
<h2>Semester Selection</h2>
 
<p>
 
  Choose the semester for which you wish you make a schedule from the
 
  list below. If any semester is missing, please <a
 
  href="feedback.php?feedback=My+school+is+missing+the+&lt;semester+name&gt;+semester.">let us know</a>.
 
</p>
 
<?php
 
  $inputPage->showSemesters();
 
  $inputPage->foot();
 
  exit;
 
  }
 

	
 
$inputPage->showSavedScheds($_SESSION);
 
?>
 
<p>
 
  Welcome to SlatePermutate<?php $inputPage->addressStudent(', ', '', FALSE); ?>!
 
  <?php if (school_has_auto($inputPage->get_school())): ?>
 
  To get started, enter in some a course identifier (e.g., <em>
 
  <?php echo school_example_course_id($inputPage->get_school()); ?></em>)
 
  and click the autosuggestion to automatically load available sections
 
  for each class.
 
  <?php else: ?>
 
  To get started, enter a course number and add some sections to it.
 
  Then specify each section's letter/number and what times it meets,
 
  add more courses, and click &ldquo;Find a Schedule&rdquo;.
 
  <!--'-->
 
  <?php endif; ?>
 
</p>
 

	
 
<form method="post" action="process.php" id="scheduleForm">
 
<p class="nospace"><label>Schedule Name</label><br />
 
  <input id="scheduleName" style="margin-bottom: 1em;" class="defText required" type="text" size="25" title="Spring 2011" name="postData[name]" <?php if ($sch) echo 'value="' . htmlentities($sch->getName(), ENT_QUOTES) . '"'; /*"*/ ?> />
 
</p>
 

	
 
<table id="container">
 
  <tr>
school.d/calvin.crawl.inc
Show inline comments
 
<?php
 
/*
 
 * Copyright 2010 Nathan Phillip Brink <ohnobinki@ohnopublishing.net>
 
 *
 
 * This file is a part of slate_permutate.
 
 *
 
 * slate_permutate is free software: you can redistribute it and/or modify
 
 * it under the terms of the GNU Affero General Public License as published by
 
 * the Free Software Foundation, either version 3 of the License, or
 
 * (at your option) any later version.
 
 *
 
 * slate_permutate is distributed in the hope that it will be useful,
 
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
 * GNU Affero General Public License for more details.
 
 *
 
 * You should have received a copy of the GNU Affero General Public License
 
 * along with slate_permutate.  If not, see <http://www.gnu.org/licenses/>.
 
 */
 

	
 
/**
 
 * \brief
 
 *   Crawl's Calvin's registration course listing pages.
 
 *
 
 * \param $semester
 
 *   The Semester object which I should populate.
 
 * \param $semesters
 
 *   An array to be filled with Semester objects which I should
 
 *   populate.
 
 * \param $verbosity
 
 *   How verbose I should be. Sensicle range is from 0 through 10.
 
 */
 
function calvin_crawl(Semester $semester, $verbosity = 1)
 
function calvin_crawl(array &$semesters, $verbosity = 1)
 
{
 
  /**
 
   * collect a few pbasic stats
 
   */
 
  $skipped_sections = array('incomplete meeting info' => 0, 'invalid meeting info format' => 0);
 

	
 
  /**
 
   * The first link we start at is the one from KV into WebAdvisor.
 
   *
 
   * 1. https://kvdata.calvin.edu/walive/WebAdvisor?CONSTITUENCY=WBST&type=P&pid=ST-WESTS12A&LASTTOKEN=NULL
 
   *    <body onload="javascript:getWindowHTML();">
 
   *
 
   *    Calls javascript:getWindowHTML(). This merely adds
 
   *    TOKENIDX=NULL to the query string, so we can skip this step
 
   *    and just have TOKENIDX=NULL.
 
   *
 
   * 2. https://kvdata.calvin.edu/walive/WebAdvisor?CONSTITUENCY=WBST&type=P&pid=ST-WESTS12A&LASTTOKEN=NULL&TOKENIDX=NULL
 
   *    <body onload="javascript:setWindowHTML('', '7699844013');">
 
   *
 
   *    In the above, the second argument to setWindowHTML() is
 
   *    random. Thus, we have to capture this value.
 
   */
 

	
 
  $cookies = array();
 

	
 
  $baseuri = 'https://kvdata.calvin.edu/walive/WebAdvisor?CONSTITUENCY=WBST&type=P&pid=ST-WESTS12A&LASTTOKEN=NULL';
 

	
 
  $token_uri = $baseuri . '&TOKENIDX=NULL';
 
  $token_html = calvin_crawl_noscript_filter(school_crawl_geturi($token_uri, $cookies));
 
  if (!preg_match('/setWindowHTML\(\'\', \'([0-9]+)\'\);/', $token_html, $matches))
 
    {
 
@@ -69,138 +65,160 @@ function calvin_crawl(Semester $semester
 
      echo 'token: ' . $token . "\n";
 
      echo "\n";
 
    }
 

	
 
  /*
 
   * here we have arrived at the main webadvisor screen which lists the
 
   * search form. From here, we can get a list of all of the departments
 
   * that Calvin College has and then know enough to query each
 
   * individual department for courses.
 
   */
 
  $uri = $baseuri . '&TOKENIDX=' . $token;
 
  $departments_html = calvin_crawl_noscript_filter(school_crawl_geturi($uri, $cookies));
 

	
 
  $departments_dom = new DOMDocument();
 
  $departments_dom->loadHTML($departments_html);
 

	
 
  /*
 
   * Discover the available semesters
 
   */
 
  $semesters_select_nodes = $departments_dom->getElementById('VAR1')->childNodes;
 
  $semester_strs = array();
 
  foreach ($semesters_select_nodes as $semester_node)
 
    {
 
      if ($semester_node->tagName != 'option'
 
	  || !$semester_node->hasAttribute('value'))
 
	  || !$semester_node->hasAttribute('value')
 
	  || !strlen($semester_node->getAttribute('value')))
 
	continue;
 
      $semester_strs[$semester_node->getAttribute('value')] =
 
	$semester_node->nodeValue;
 
    }
 
  $semester_strs = array_reverse($semester_strs, TRUE);
 

	
 
  $departments_select_nodes = $departments_dom->getElementById('LIST_VAR1_1')->childNodes;
 
  $departments = array();
 
  foreach ($departments_select_nodes as $dept_node)
 
    {
 
      if ($dept_node->tagName != 'option'
 
	  || !$dept_node->hasAttribute('value'))
 
	continue;
 
      $departments[$dept_node->getAttribute('value')] =
 
	$dept_node->nodeValue;
 
    }
 

	
 

	
 
  /*
 
   * get all of the different possible course levels... dynamically
 
   * rather than hardcodedly ;-).
 
   */
 
  $departments_select_nodes = $departments_dom->getElementById('LIST_VAR1_2')->childNodes;
 
  $course_levels = array();
 
  foreach ($departments_select_nodes as $courselevel_node)
 
    {
 
      if ($courselevel_node->tagName != 'option'
 
	  || !$courselevel_node->hasAttribute('value'))
 
	continue;
 
      $course_levels[] = $courselevel_node->getAttribute('value');
 
    }
 

	
 
  $return_url = dom_input_value($departments_dom, 'RETURN.URL');
 

	
 

	
 
  /* ARCT only has >=200 level courses */
 
  $dept = '';
 
  $course_level = '';
 
  $semester_str = substr($semester->year_get(), 2) . '/';
 
  switch ($semester->season_get())
 
  if ($verbosity > 4)
 
    fprintf(STDERR, "Available semesters: %s\n", implode($semester_strs, ', '));
 

	
 
  $semester_start_uri = $uri;
 

	
 
  $season_map = array(
 
		      'FA' => Semester::SEASON_FALL,
 
		      'IN' => 'interim',
 
		      'SP' => Semester::SEASON_SPRING,
 
		      'MA' => 'may',
 
		      /* I don't know if SU is a valid Calvin Smester ID or not */
 
		      'SU' => Semester::SEASON_SUMMER);
 
  foreach ($semester_strs as $semester_str => $semester_info)
 
    {
 
    case Semester::SEASON_SPRING:
 
      $semester_str .= 'SP';
 
      break;
 
      if (empty($season_map[substr($semester_str, 3)]))
 
	{
 
	  fprintf(STDERR, "Warning: Unknown semester identification chars: %s. Skipping this semester.\n",
 
		  $semester_str);
 
	  continue;
 
	}
 
      $season = $season_map[substr($semester_str, 3)];
 
      $year_timespec = strptime(substr($semester_str, 0, 2), '%y');
 
      $year =  $year_timespec['tm_year'] + 1900;
 

	
 
      $semester = new Semester($year, $season);
 

	
 
    case Semester::SEASON_FALL:
 
      $semester_str .= 'FA';
 
      break;
 
    }
 
  if (!isset($semester_strs[$semester_str]))
 
    error_log('Couldn\'t find a semester in Calvin\'s database for ' . $semester_str . ' (' . $semester->season_get() . ', ' . $semester->year_get() . ')');
 
      /* useful and necessary stats */
 
      $skipped_sections = array('incomplete meeting info' => 0, 'invalid meeting info format' => 0);
 

	
 
      $semester_start_min = 0;
 
      $semester_end_max = 0;
 

	
 
      $dept = '';
 
      $course_level = '';
 
      $uri = $semester_start_uri;
 

	
 
      if ($verbosity)
 
	fprintf(STDERR, "Crawling semester %s->%s\n",
 
		$semester_str, $semester_info);
 

	
 
  /*
 
   * LIST.VAR<X>_<N>: <X> is the column, <N> is the row. There
 
   * are apparently a max of 5 rows (see the LIST.VAR<X>_MAX
 
   * below).
 
   *
 
   * Columns:
 
   * LIST.VAR1: department
 
   * LIST.VAR2: course_level
 
   * LIST.VAR3: IIRC, a course identifier, such as 156 from MATH-156
 
   * LIST.VAR4: I forget
 
   *
 
   */
 
  $form = array('VAR1' => $semester_str,
 
		'LIST.VAR1_1' => $dept,
 
		'LIST.VAR2_1' => $course_level,
 
		);
 

	
 
  /*
 
   * other form items we're not querying but which need to be
 
   * sent blankly
 
   */
 
  $form += array(
 
		/*
 
		 * Other form items we're not querying but which need
 
		 * to be sent blankly.
 
		 */
 
		 'RETURN.URL' => $return_url,
 
		 'SUBMIT_OPTIONS' => '',
 
		 /*
 
		  * The submit button... its value="" key is
 
		  * apparently sent with the form... makes a
 
		  * little bit of sense I guess ;-).
 
		  */
 
		 /*'SUBMIT2' => 'SUBMIT',*/
 

	
 
		 'DATE.VAR1' => '',
 
		 'DATE.VAR2' => '',
 

	
 
		 'LIST.VAR1_CONTROLLER' => 'LIST.VAR1',
 
		 'LIST.VAR1_MEMBERS' => 'LIST.VAR1*LIST.VAR2*LIST.VAR3*LIST.VAR4',
 
		 );
 
		);
 
  foreach (array('1', '2', '3', '4') as $list_col)
 
    {
 
      $colname = 'LIST.VAR' . $list_col;
 
      if (!isset($form[$colname . '_MAX']))
 
	$form[$colname . '_MAX'] = '5';
 

	
 
      foreach (array('1', '2', '3', '4', '5') as $list_row)
 
	{
 
	  $rowname = $colname . '_' . $list_row;
 
	  if (!isset($form[$rowname]))
 
	    $form[$rowname] = '';
 
	}
 
    }
 

	
 
  /*
 
   * VAR7 and VAR 8 is a constraint of times during which
 
   * courses meet
 
   */
 
  $form['VAR7'] = '';
 
  $form['VAR8'] = '';
 

	
 
  /* ``course title keywords'' */
 
  $form['VAR3'] = '';
 

	
 
@@ -318,77 +336,105 @@ function calvin_crawl(Semester $semester
 
	      $skipped_sections['invalid meeting info format'] ++;
 
	      /*
 
	       * Still add at least the course to the semester so that
 
	       * it shows up in autocmoplete.
 
	       */
 
	      calvin_crawl_course_add($semester, $section_id['department'], $section_id['course']);
 
	      continue;
 
	    }
 
	  $date_start = $meeting_info_matches[1];
 
	  $date_end = $meeting_info_matches[2];
 
	  /* e.g., 'Lecture', 'Practicum' */
 
	  $meeting_type = strtolower(trim($meeting_info_matches[3]));
 

	
 
	  $days = school_crawl_days_format(explode(', ', $meeting_info_matches[5]));
 
	  $time_start = school_crawl_time_format(strptime($meeting_info_matches[6], '%I:%M%p'));
 
	  $time_end = school_crawl_time_format(strptime($meeting_info_matches[7], '%I:%M%p'));
 
	  $meeting_place = $meeting_info_matches[8];
 

	
 
	  if ($verbosity > 5)
 
	    foreach (array('date_start', 'date_end', 'meeting_type', 'days', 'time_start', 'time_end', 'meeting_place', 'meeting_type') as $var)
 
	      echo $var . ':' . ${$var} . "\n";
 

	
 
	  $section = new Section($section_id['section'], array(new SectionMeeting($days, $time_start, $time_end, $meeting_place, $meeting_type)), $synonym, $faculty_name);
 
	  $semester->section_add($section_id['department'], $section_id['course'], $section);
 

	
 
	  /*
 
	   * Try to update semester's longetivity stats to help the
 
	   * school_semester_guess() function:
 
	   */
 
	  $date_start_time = strptime($date_start, '%m/%d/%Y');
 
	  $date_end_time = strptime($date_end, '%m/%d/%Y');
 
	  if ($date_start_time !== FALSE)
 
	    {
 
	      $date_start_time = school_crawl_mktime($date_start_time);
 
	      if (!$semester_start_min || $semester_start_min > $date_start_time)
 
		$semester_start_min = $date_start_time;
 
	    }
 
	  if ($date_end_time !== FALSE)
 
	    {
 
	      $date_end_time = school_crawl_mktime($date_end_time);
 
	      if ($semester_end_max < $date_end_time)
 
		$semester_end_max = $date_end_time;
 
	    }
 
	}
 

	
 
      if (!preg_match(';Page ([0-9]+) of ([0-9]+)\</td\>$;m', $html, $pages))
 
	{
 
	  error_log('Unable to determine the number of pages in this Calvin resultset');
 
	  break;
 
	}
 

	
 
      if ($verbosity > 0)
 
	{
 
	  echo 'calvin_crawl(): finished page ' . $pages[1] . ' of ' . $pages[2] . ' with ' . ($list_row - 1) . " courses.\n";
 
	}
 

	
 
      $form = array(
 
		    'ACTION*Grp:WSS.COURSE.SECTIONS' => 'NEXT',
 
		    );
 
    }
 

	
 
  $has_stat = FALSE;
 
  if ($verbosity > 1)
 
    foreach ($skipped_sections as $reason => $num)
 
      {
 
	if (!$num)
 
	  continue;
 
	if (!$has_stat)
 
	  error_log('Skipped some sections for <reason>: <number skipped>:');
 
	error_log($reason . ': ' . $num);
 
      }
 

	
 
    $semester->time_end_set($semester_end_max);
 
    $semester->time_start_set($semester_start_min);
 

	
 
    $semesters[] = $semester;
 

	
 
    if ($verbosity)
 
      fprintf(STDERR, "\n");
 
    }
 

	
 
  return 0;
 
}
 

	
 
/**
 
 * \brief
 
 *   Find an <input /> element and return its value attribute.
 
 *
 
 * \param $domdocument
 
 *   The DOMDocument to search.
 
 * \param $name
 
 *   The name attribute of the <input /> element.
 
 * \return
 
 *   The value attribute of the input element or NULL if not found.
 
 */
 
function dom_input_value($domdocument, $name)
 
{
 
  $xpath = new DOMXPath($domdocument);
 
  $input_node_list = $xpath->query('/descendant::input[attribute::name="' . $name . '"]');
 

	
 
  if (!$input_node_list->length)
 
    return NULL;
 
  $input_node = $input_node_list->item(0);
 
  if (!$input_node->hasAttribute('value'))
 
    return NULL;
school.d/ccbcmd.crawl.inc
Show inline comments
 
@@ -8,145 +8,150 @@
 
 * it under the terms of the GNU Affero General Public License as published by
 
 * the Free Software Foundation, either version 3 of the License, or
 
 * (at your option) any later version.
 
 *
 
 * slate_permutate is distributed in the hope that it will be useful,
 
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
 * GNU Affero General Public License for more details.
 
 *
 
 * You should have received a copy of the GNU Affero General Public License
 
 * along with slate_permutate.  If not, see <http://www.gnu.org/licenses/>.
 
 */
 

	
 
/**
 
 * \brief
 
 *   Crawl CCBCMD's registration stuffage.
 
 *
 
 * \param $semester
 
 *   The Semester object which I should populate.
 
 * \param $verbosity
 
 *   A scale from 0 to 10 determining how loud I should be.
 
 * \return
 
 *   1 on failure, 0 on success.
 
 */
 
function ccbcmd_crawl(Semester $semester, $verbosity = 1)
 
function ccbcmd_crawl(array &$semesters, $verbosity = 1)
 
{
 
  $cookies = array();
 

	
 
  /*
 
   * It seems that http://ccbcmd.edu/schedule/sched.html is what we're
 
   * meant to start from. That's just a redirect to some other page
 
   * from which we get a listing of available semesters and choose
 
   * one.
 
   */
 
  $uri = 'http://ccbcmd.edu/schedule/sched.html';
 
  $semesters_dom = new DOMDocument();
 
  $semesters_dom->loadHTML(school_crawl_geturi($uri, $cookies, NULL, TRUE, 'ccbcmd_crawl_curlhook', $verbosity));
 
  $semesters_select_node = $semesters_dom->getElementById('term_input_id');
 
  if ($semesters_select_node === NULL)
 
    {
 
      fprintf(STDERR, "Could not get list of available semesters to choose from\n");
 
      return 1;
 
    }
 

	
 
  $semester_strings = array($semester->year_get(), ucfirst($semester->season_get()));
 
  $semester_value = NULL;
 
  foreach ($semesters_select_node->childNodes as $semesters_option_node)
 
    {
 
      $semester_match = TRUE;
 
      foreach ($semester_strings as $semester_string)
 
  	if (stripos($semesters_option_node->textContent, $semester_string) === FALSE)
 
  	  {
 
  	    $semester_match = FALSE;
 
  	    break;
 
  	  }
 
      if ($semester_match)
 
  	{
 
  	  $semester_value = $semesters_option_node->getAttribute('value');
 
  	  break;
 
  	}
 
    }
 
  $semester_stage_uri = $uri;
 

	
 
  $semester_string = implode(' ', $semester_strings);
 
  if ($semester_value === NULL)
 
    {
 
      fprintf(STDERR, "Could not find the desired semester, ``%s'', in the list of available semesters.\n",
 
  	      $semester_string);
 
      return 1;
 
    }
 

	
 
  if ($verbosity > 1)
 
    fprintf(STDERR, "Found semester: %s=``%s''=``%s''.\n",
 
  	    $semester_value, $semester_string, trim($semesters_option_node->textContent));
 
  $semesters_form = school_crawl_element_ancestor($semesters_select_node, 'form');
 
  if ($semesters_form === NULL)
 
    {
 
      fprintf(STDERR, "Unable to find <form /> associated with semester.\n");
 
      return 1;
 
    }
 
  $semesters_post = school_crawl_form($semesters_form);
 
  $semesters_post_save = school_crawl_form($semesters_form);
 

	
 
  foreach ($semesters_select_node->childNodes as $semesters_option_node)
 
    {
 
      $semester_text = $semesters_option_node->textContent;
 
      $semester_value = $semesters_option_node->getAttribute('value');
 
      if (empty($semester_value))
 
	/* skip the empty ``None'' semester */
 
	continue;
 

	
 
      if (stripos($semester_text, 'continuing') !== FALSE)
 
	/* skip the year-long semesters dedicated to continuing education */
 
	continue;
 

	
 
      $semester_text_parts = explode(' ', $semester_text);
 
      $semester_season = $semester_text_parts[0];
 
      $semester_year = $semester_text_parts[1];
 

	
 
      /* the college has two separate summer sessions, so distinguish between them */
 
      if (preg_match(';session ([0-9]+);i', $semester_text, $matches))
 
	$semester_season .= '_' . $matches[1];
 

	
 
      if ($verbosity)
 
	fprintf(STDERR, "Crawling semester %s:%s -> %s.\n", $semester_year, $semester_season, $semester_text);
 
      $semester = new Semester($semester_year, strtolower($semester_season));
 

	
 
  if ($verbosity > 1)
 
    fprintf(STDERR, "Found semester: %s=``%s''=``%s''.\n",
 
  	    $semester_value, $semester->id(), trim($semesters_option_node->textContent));
 
  /* load stored semester-page URI / form data */
 
  $semesters_post = $semesters_post_save;
 
  $uri = $semester_stage_uri;
 
  $semesters_post[$semesters_select_node->getAttribute('name')] = $semester_value;
 

	
 
  $subjects_dom = new DOMDocument();
 
  $uri = school_crawl_url($uri, $semesters_form->getAttribute('action'));
 
  $subjects_dom->loadHTML(school_crawl_geturi($uri, $cookies, $semesters_post, TRUE, 'ccbcmd_crawl_curlhook', $verbosity));
 

	
 
  $subjects_form_nodelist = $subjects_dom->getElementsByTagName('form');
 
  if (!$subjects_form_nodelist->length)
 
    {
 
      fprintf(STDERR, "Unable to find <form /> to submit for the subjects choosing page.\n");
 
      return 1;
 
    }
 
  $subjects_form_node = $subjects_form_nodelist->item(0);
 
  $subjects_post = school_crawl_form($subjects_form_node);
 

	
 
  $subjects_select_node = $subjects_dom->getElementById('subj_id');
 
  foreach ($subjects_select_node->childNodes as $subjects_option_node)
 
    if (!strcasecmp('all', trim($subjects_option_node->textContent)))
 
      $subjects_post[$subjects_select_node->getAttribute('name')][] = $subjects_option_node->getAttribute('value');
 

	
 
  $courses_dom = new DOMDocument();
 
  $uri = school_crawl_url($uri, $subjects_form_node->getAttribute('action'));
 
  $courses_dom->loadHTML(school_crawl_geturi($uri, $cookies, $subjects_post, TRUE, 'ccbcmd_crawl_curlhook', $verbosity));
 

	
 
  $courses_xpath = new DOMXPath($courses_dom);
 

	
 
  /* The second row of the table has all of the headers in it */
 
  $tr_header_nodelist = $courses_xpath->query('//table[@class="datadisplaytable" and position()=1]//tr[position()=2]');
 
  if (!$tr_header_nodelist->length)
 
    {
 
      fprintf(STDERR, "Unable to find the row of the course/section data table which gives us the mappings of column names onto columns.\n");
 
      return 1;
 
    }
 
  $tr_header_node = $tr_header_nodelist->item(0);
 

	
 
  $section_offsets = array(
 
			   'registration_number' => school_crawl_table_resolve_column($tr_header_node, 'CRN'),
 
			   'section_id' => school_crawl_table_resolve_column($tr_header_node, 'subj/crse/sec'),
 
			   /* there's a boolean column which says whether or not the course has any prerequisites/corequisites.... */
 
			   'credits' => school_crawl_table_resolve_column($tr_header_node, 'credhrs'),
 
			   /* there's a column for the number of contact hours, vs. credit hours */
 
			   'dates' => school_crawl_table_resolve_column($tr_header_node, 'sessiondates'),
 
			   );
 
  foreach (array('title', 'days', 'times', 'instructor', 'location') as $column_key)
 
    $section_offsets[$column_key] = school_crawl_table_resolve_column($tr_header_node, $column_key);
 
  /* there's also a column for ``session dates'' */
 

	
 
  /* error check and calculate the number of children that a node must have to be  */
 
  $max_offset = 0;
 
  foreach ($section_offsets as $name => $value)
 
    {
 
      if ($value === FALSE)
 
	{
 
	  fprintf(STDERR, "Unable to find column offset for `%s'.\n",
 
		  $name);
 
	  return 1;
 
	}
 
      else
 
	if ($verbosity > 6)
 
	  echo $name . ' -> ' . $value . PHP_EOL;
 

	
 
      $max_offset = max($max_offset, $value);
 
    }
 
      
 
  foreach ($courses_xpath->query('//table[@class="datadisplaytable" and position()=1]//tr') as $tr_node)
 
    {
 
@@ -209,46 +214,57 @@ function ccbcmd_crawl(Semester $semester
 
	 * Make sure that _only_ one date range is specified to ensure
 
	 * data integrity. I.e., make sure that the college doesn't
 
	 * suddenly support multiple meeting times without our
 
	 * anticipating that and then cause us to have invalid
 
	 * data. ;-). --binki
 
	 */
 
	if (strpos($time_end_text, '-') !== FALSE)
 
	  {
 
	    fprintf(STDERR, "College seems to support multiple meeting times per semester which we don't know how to parse (even though slate_permutate itself can handle this situation): ``%s'' time_end_text: ``%s''.\n",
 
		    $time_range_text, $time_end_text);
 
	    return 1;
 
	  }
 
	$time_end = strptime($time_end_text, '%I:%M %p');
 
	if ($time_end === FALSE || $time_start === FALSE)
 
	  {
 
	    fprintf(STDERR, "Error parsing start or end time: start: ``%s'' end: ``%s''.\n",
 
		    $time_start_text, $time_end_text);
 
	    return 1;
 
	  }
 

	
 
	$days = school_crawl_days_str_format($children->item($section_offsets['days'])->textContent);
 

	
 
	$section_meetings[] = new SectionMeeting($days, school_crawl_time_format($time_start), school_crawl_time_format($time_end),
 
						 $children->item($section_offsets['location'])->textContent);
 

	
 
	/* check if a semester's date range should be increased */
 
	$section_dates = $children->item($section_offsets['dates'])->textContent;
 
	if (preg_match(';^([0-9]+)/([0-9]+)-([0-9]+)/([0-9]+)$;', $section_dates, $section_dates_matches))
 
	  {
 
	    $semester->time_start_set_test(mktime(0, 0, 0, $section_dates_matches[1], $section_dates_matches[2], $semester->year_get()));
 
	    $semester->time_end_set_test(  mktime(0, 0, 0, $section_dates_matches[3], $section_dates_matches[4], $semester->year_get()));
 
	  }
 
      }
 

	
 
      $semester->section_add($section_id_parts['department'], $section_id_parts['course'],
 
			     new Section($section_id_parts['section'], $section_meetings, $registration_number, $instructor));
 
    }
 

	
 
  $semesters[] = $semester;
 
    }
 

	
 
  return 0;
 
}
 

	
 
function ccbcmd_crawl_curlhook(&$curl)
 
{
 
  /*
 
   * OK, so this must be set to SSLv2 or SSLv3 because of how the
 
   * server's SSL junk is messed up. When curl is built against
 
   * gnutls, though, we can't use SSL2 since it doesn't support that
 
   * old of a protocol. So, we use 3 which works. Apparently, the
 
   * server can't handle gnutls's attempt to use TLS. Even openssl's
 
   * s_client command fails without manually specifying --ssl2 or
 
   * --ssl3. So, this must be a _really_ weird server setup...
 
   */
 
  curl_setopt($curl, CURLOPT_SSLVERSION, 3);
 
}
school.d/cedarville.crawl.inc
Show inline comments
 
@@ -32,83 +32,113 @@
 
 *   HTML that PHP's DOM would willingly would eat.
 
 */
 
function table_parse($html)
 
{
 
  libxml_use_internal_errors(true); // Suppress warnings
 
  $arr = array();
 
  $dom = new DOMDocument;
 
  if(!$html)
 
    return NULL;
 

	
 
  $dom->loadHTML($html);
 
  $dom->preserveWhiteSpace = FALSE;
 
  $tables = $dom->getElementsByTagName('table');
 
  $rows = $tables->item(0)->getElementsByTagName('tr'); // Get first table on page 
 
  foreach ($rows as $rownum => $row) {
 
    $cols = $row->getElementsByTagName('td');
 
    foreach($cols as $colnum => $col){
 
      $arr[$rownum][$colnum] = $col->nodeValue;
 
    }
 
  }
 
  return $arr;
 
}
 

	
 
/** Crawls Cedarville course listings. $season is "fa" or "sp", year is 4-digit year */
 
function cedarville_crawl($semester, $verbosity = 1)
 
function cedarville_crawl(array &$semesters, $verbosity = 1)
 
{  
 

	
 
  $season = strtolower(substr($semester->season_get(), 0, 2));
 
  $year = $semester->year_get();
 
  $season_string = $year . $season;
 

	
 
  $basepath = 'http://cedarville.edu/courses/schedule/';
 

	
 
  if ($verbosity)
 
    echo "cedarville_crawl(): Beginning crawl of Cedarville:\n";
 

	
 
  if ($verbosity > 1)
 
    echo "cedarville_crawl(): Determining list of departments.\n";
 

	
 
  if ($verbosity > 1)
 
    fprintf(STDERR, "cedarville_crawl(): Determining list of semesters.\n");
 
  $semesters_dom = new DOMDocument();
 
  $semesters_dom->loadHTML(file_get_contents($basepath));
 

	
 
  $content_div_dom = $semesters_dom->getElementById('contenttext');
 
  if (!$content_div_dom)
 
    {
 
      fprintf(STDERR, "cedarville_crawl(): Error finding location of the list of departments.\n");
 
      return 1;
 
    }
 
  $departments_xpath = new DOMXPath($semesters_dom);
 
  foreach ($departments_xpath->query('.//li/a') as $department_a_dom)
 
    {
 
      $semester_href = $department_a_dom->getAttribute('href');
 
      $semester_href_parts = split('_', $semester_href);
 

	
 
      $semester_name = $department_a_dom->textContent;
 
      if (stripos($semester_name, 'graduate') !== FALSE
 
	  || strpos($semester_href, 'index') === FALSE)
 
	/* cedarville has about 1 graduate course, lol */
 
	continue;
 
      $semester_name_parts = split(' ', $semester_name);
 

	
 
      $semester_year = $semester_name_parts[0];
 
      $semester_season = strtolower($semester_name_parts[1]);
 

	
 
      $semester = new Semester($semester_year, $semester_season);
 

	
 
      if ($verbosity > 1)
 
	fprintf(STDERR, "cedarville_crawl(): Crawling semester: %s.\n",
 
		$semester_name);
 

	
 
  /*
 
   * We need two passes because the first department's code name is
 
   * not accessible available in the first pageload.
 
   */
 
  $departments = array();
 
  if (cedarville_crawl_departments_get($basepath . $year . $season . '_index.htm', $departments, $season_string))
 
  if (cedarville_crawl_departments_get($basepath . $semester_href, $departments, $semester_href_parts[0]))
 
    return 1;
 
  if (!count($departments))
 
    {
 
      echo "cedarville_crawl(): Unable to get a listing of departments.\n";
 
      return 1;
 
    }
 
  /* find the first department whose name we don't yet know */
 
  if (cedarville_crawl_departments_get($basepath . $year . $season . '_' . current(array_keys($departments)) . '_all.htm', $departments, $season_string))
 
  if (cedarville_crawl_departments_get($basepath . $semester_href_parts[0] . '_' . current(array_keys($departments)) . '_all.htm', $departments, $semester_href_parts[0]))
 
    return 1;
 

	
 
  $tables = array();
 
  foreach ($departments as $department => $dept_name)
 
    {
 
      echo 'cedarville_crawl(): Crawling department ' . $department . ' (' . $dept_name . ")...\n";
 
      $html = file_get_contents($basepath . $year . $season . '_' . $department . '_' . 'all.htm');
 
      if ($verbosity > 2)
 
	echo 'cedarville_crawl(): Crawling department ' . $department . ' (' . $dept_name . ")...\n";
 
      $html = file_get_contents($basepath . $semester_href_parts[0] . '_' . $department . '_' . 'all.htm');
 
      if (!$html)
 
	continue;
 
      $tables[$department] = table_parse(cedarville_html_fix($html));
 
    }
 

	
 
  $meeting_type_maps = array('LAB' => 'lab', 'LECT' => 'lecture');
 

	
 
  foreach ($tables as $dept_table)
 
    {
 
      /*
 
       * Discard the first row, which has the contents of the <th />
 
       * elements.
 
       */
 
      unset($dept_table[0]);
 

	
 
      foreach($dept_table as $course_table)
 
	{
 
	  /*
 
	   * format:
 
	   * 0: course synonym, an unsigned integer.
 
	   * 1: section spec, parsable by Section::parse().
 
	   * 2: friendly course title.
 
	   * 3: Instructor name.
 
	   * 4: Number of credit hours in decimal notation.
 
@@ -188,48 +218,51 @@ function cedarville_crawl($semester, $ve
 
		}
 
	      /* prepare for parsing the next meeting time */
 
	      $meetings_str = substr($meetings_str, strlen($meeting_matches[0]));
 

	
 
	      $days = school_crawl_days_str_format($meeting_matches[3]);
 
	      $time_start = school_crawl_time_format(strptime($meeting_matches[4] . 'M', '%I:%M%p'));
 
	      $time_end = school_crawl_time_format(strptime($meeting_matches[5] . 'M', '%I:%M%p'));
 
	      $room = $meeting_matches[2];
 

	
 
	      $type = $meeting_matches[1];
 
	      while (isset($meeting_type_maps[$type]))
 
		$type = $meeting_type_maps[$type];
 
	      $type = strtolower($type);
 

	
 
	      $meetings[] = new SectionMeeting($days, $time_start, $time_end,
 
					       $room, $type);
 
	    }
 

	
 
	  $semester->section_add($section_parts['department'], $section_parts['course'],
 
				 new Section($section_parts['section'], $meetings,
 
					     $synonym, $instructor));
 
	}
 
    }
 

	
 
  $semesters[] = $semester;
 
    }
 

	
 
  return 0;
 
}
 

	
 
/**
 
 * \brief
 
 *   Scan cedarville's course listing pages for departments.
 
 *
 
 * \return
 
 *   An associative array mapping department codes onto department
 
 *   friendly names.
 
 */
 
function cedarville_crawl_departments_get($dept_url, array &$departments, $season_string)
 
{
 
  $html = file_get_contents($dept_url);
 
  $dept_dom = new DOMDocument();
 
  if (!$dept_dom->loadHTML(cedarville_html_fix($html)))
 
    {
 
      echo "cedarville_crawl(): Error determining list of available departments: Unable to parse HTML.\n";
 
      return 1;
 
    }
 
  $xpath = new DOMXPath($dept_dom);
 

	
 
  $dept_node_list = $xpath->query('/descendant::div[@id="contenttext"]/child::span[position()=1 or position()=2]/child::a');
 
  foreach ($dept_node_list as $dept_node)
0 comments (0 inline, 0 general)