Changeset - 6cb196f112d9
[Not reviewed]
default
0 7 4
Nathan Brink (binki) - 15 years ago 2010-10-16 11:57:07
ohnobinki@ohnopublishing.net
A school website-crawling infrastructure. Supports crawling Calvin's website and producing JSON for jqueryui's autocomplete functionality. Also creates a JSON description of the list sections for each course, awaiting JS-support for AJAX section autocreation.
11 files changed with 1280 insertions and 18 deletions:
0 comments (0 inline, 0 general)
admin/rehash.php
Show inline comments
 
@@ -26,11 +26,19 @@
 
 * school listing used for the ``choose your school list''.
 
 */
 

	
 
require_once(dirname(__FILE__) . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'inc' . DIRECTORY_SEPARATOR . 'school.inc');
 
$inc_base = dirname(__FILE__) . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'inc' . DIRECTORY_SEPARATOR;
 
require_once($inc_base . 'school.inc');
 
require_once($inc_base . 'school.crawl.inc');
 
require_once($inc_base . 'class.semester.inc');
 

	
 
return main($argc, $argv);
 

	
 
function main($argc, $argv)
 
{
 
  $crawl = TRUE;
 
  $crawl_semester_year = '2011';
 
  $crawl_semester_season = Semester::SEASON_SPRING;
 

	
 
  $school_id_list = school_list();
 
  if (!$school_id_list)
 
    return 1;
 
@@ -38,13 +46,16 @@ function main($argc, $argv)
 
  $schools = array();
 
  foreach ($school_id_list as $school_id)
 
    {
 
      $school = school_load($school_id);
 
      $school = school_load($school_id, TRUE);
 
      if (!$school)
 
	{
 
	  fprintf(STDERR, "Error loading school with school_id=%s\n",
 
		  $school_id);
 
	  return 1;
 
	}
 

	
 
      school_crawl($school, $crawl_semester_year, $crawl_semester_season);
 

	
 
      $schools[] = $school;
 
    }
 

	
 
@@ -106,10 +117,6 @@ function school_cmp($school_a, $school_b
 
 *   Write out the cache file which remembers the list of available
 
 *   schools.
 
 *
 
 * \todo
 
 *   If the list of displayed schools is to be sorted, this is the
 
 *   place to do it.
 
 *
 
 * \param $schools
 
 *   An array of school handles.
 
 */
 
@@ -117,11 +124,17 @@ function school_cache($schools)
 
{
 
  $list_cache = array();
 
  $domain_cache = array();
 

	
 
  $cache_dir_name = dirname(__FILE__) . DIRECTORY_SEPARATOR . '..'
 
    . DIRECTORY_SEPARATOR . 'cache' . DIRECTORY_SEPARATOR;
 
  $cache_auto_dir_name = $cache_dir_name . 'auto' . DIRECTORY_SEPARATOR;
 

	
 
  foreach ($schools as $school)
 
    {
 
      $list_cache[$school['id']] = array(
 
					 'name' => $school['name'],
 
					 'url' => $school['url'],
 
					 'crawled' => $school['crawled'],
 
					 );
 
      foreach ($school['domains'] as $school_domain)
 
	{
 
@@ -143,14 +156,54 @@ function school_cache($schools)
 
	  $domain_part = array_shift($domain_parts);
 
	  $domain_cache_ptr[$domain_part] = $school['id'];
 
	}
 

	
 

	
 
      /* autocomplete stuff -- per school */
 
      if ($school['crawled'])
 
	{
 
	  $semester = $school['crawled_semester'];
 

	
 
	  $cache_auto_school_dir_name = $cache_auto_dir_name . $school['id'] . DIRECTORY_SEPARATOR;
 
	  if (!is_dir($cache_auto_school_dir_name))
 
	    {
 
	      if (!mkdir($cache_auto_school_dir_name, 0777, TRUE))
 
		error_log('Unable to create needed directory: `' . $cache_auto_dir_name . '\'');
 
	    }
 

	
 
	  $departments = $semester->departments_get();
 
	  sort($departments);
 

	
 
	  $dept_file = fopen($cache_auto_school_dir_name . '-depts', 'wb');
 
	  fwrite($dept_file, serialize($departments));
 
	  fclose($dept_file);
 

	
 
	  /* now per-department autocomplete */
 
	  foreach ($departments as $department)
 
	    {
 
	      $classes = $semester->department_classes_get($department);
 
	      $classes_file = fopen($cache_auto_school_dir_name . $department . '.sects', 'wb');
 
	      fwrite($classes_file, serialize($classes));
 
	      fclose($classes_file);
 

	
 
	      /* now individual section informations, pre-JSON-ized */
 
	      foreach ($classes as $class)
 
		{
 
		  if (!is_dir($cache_auto_school_dir_name . $department))
 
		    mkdir($cache_auto_school_dir_name . $department);
 
		  $class_file = fopen($cache_auto_school_dir_name . $department . DIRECTORY_SEPARATOR . $class, 'wb');
 
		  fwrite($class_file, json_encode($semester->class_get($department, $class)->to_json_array()));
 
		  fclose($class_file);
 
		}
 
	    }
 
	}
 

	
 

	
 
    }
 
  uasort($list_cache, 'school_cmp');
 

	
 
  $cache = array('list' => $list_cache, 'domains' => $domain_cache);
 

	
 

	
 
  $cache_file_name = dirname(__FILE__) . DIRECTORY_SEPARATOR . '..'
 
    . DIRECTORY_SEPARATOR . 'cache' . DIRECTORY_SEPARATOR . 'schools';
 
  $cache_file_name =  $cache_dir_name . 'schools';
 
  $cache_file = fopen($cache_file_name, 'wb');
 
  if ($cache_file === FALSE)
 
    {
 
@@ -163,3 +216,63 @@ function school_cache($schools)
 

	
 
  return 0;
 
}
 

	
 
/**
 
 * \brief
 
 *   Invoke a school's registration data crawler.
 
 *
 
 * Each school may export registration data on publically accessible
 
 * websites. Thus, we populate some autocomplete information by
 
 * crawling these pages and storing the information in a special set
 
 * of caches.
 
 *
 
 * Because crawling code can be non-trivial, it should be separated
 
 * from a school's main .inc file. Thus, if a school supports
 
 * crawling, it will have a file called
 
 * schools.d/<school_id>.crawl.inc. In this file, a function called
 
 * <school_id>_crawl($semester) must be defined. It must accept one
 
 * argument, the Semester object which defines the time of year for
 
 * which courses should be retrieved. It must populate this empty
 
 * Semester object with Course object and populate those courses with
 
 * the sections with as much detail as possible.
 
 *
 
 * If the crawling is successful, a 'crawl' key is added to the
 
 * $school handle. school_cache() will use this to help indicate that
 
 * a school _has_ autocomplete information, which might affect the
 
 * appearance and JS stuff for the input.php page.
 
 *
 
 * \param $school
 
 *   The school which should be checked for crawl functionality and
 
 *   crawled.
 
 * \param $semester_year
 
 *   The year of the semester for which we should grab data.
 
 * \param $semester_season
 
 *   The season of the year of the semester for which we should grab
 
 *   data.
 
 */
 
function school_crawl(&$school, $semester_year, $semester_season, $verbosity = 1)
 
{
 
  $school['crawled'] = FALSE;
 

	
 
  $school_crawl_func = $school['id'] . '_crawl';
 
  if (!function_exists($school_crawl_func))
 
    return;
 

	
 
  $semester = new Semester($semester_year, $semester_season);
 

	
 
  if ($verbosity > 0)
 
    fprintf(STDERR, "%s()\n", $school_crawl_func);
 
  $ret = $school_crawl_func($semester, $verbosity);
 
  if ($ret)
 
    {
 
      fprintf(STDERR, "Crawling %s failed: %s() returned nonzero\n",
 
	      $school['id'], $school_crawl_func);
 
      fwrite(STDERR, "\n");
 
      return;
 
    }
 
  $school['crawled'] = TRUE;
 
  $school['crawled_semester'] = $semester;
 

	
 
  if ($verbosity > 0)
 
    fwrite(STDERR, "\n");
 
}
auto.php
Show inline comments
 
new file 100644
 
<?php
 
/*
 
 * Copyright 2010 Nathan Phillip Brink <ohnobinki@ohnopublishing.net>
 
 *
 
 * This file is a part of slate_permutate.
 
 *
 
 * slate_permutate is free software: you can redistribute it and/or modify
 
 * it under the terms of the GNU Affero General Public License as published by
 
 * the Free Software Foundation, either version 3 of the License, or
 
 * (at your option) any later version.
 
 *
 
 * slate_permutate is distributed in the hope that it will be useful,
 
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
 * GNU Affero General Public License for more details.
 
 *
 
 * You should have received a copy of the GNU Affero General Public License
 
 * along with slate_permutate.  If not, see <http://www.gnu.org/licenses/>.
 
 */
 

	
 
/**
 
 * \file
 
 *   This file's purpose is to autocomplete class names for supporting
 
 *   the autocomplete JS based off of crawling schools' registration
 
 *   websites. This shall only perform the autocompletion of class
 
 *   names.
 
 *
 
 *   Since we output JSON, no special Page classes and stuff
 
 *   :-p. Except we still call the Page class's session_start()
 
 *   function because we apparently need sessions.... oh yeah, for
 
 *   school profile supports ;-).
 
 */
 

	
 
require_once('inc/school.inc');
 
require_once('inc/class.page.php');
 
require_once('class.class.php');
 

	
 
Page::session_start();
 

	
 
if (isset($_REQUEST['txt']))
 
  header('Content-Type: text/plain; encoding=utf-8');
 
else
 
  header('Content-Type: application/json; encoding=utf-8');
 

	
 
if (!isset($_REQUEST['term']))
 
  clean_empty_exit();
 

	
 
$getsections = FALSE;
 
if (isset($_REQUEST['getsections']))
 
  $getsections = TRUE;
 

	
 
$term = $_REQUEST['term'];
 
$term_parts = Classes::parse($term);
 
if (!count($term_parts))
 
  clean_empty_exit();
 

	
 
$school = school_load_guess();
 
if (!$school['crawled'])
 
  clean_empty_exit();
 

	
 
$cache_dir = dirname(__FILE__) . DIRECTORY_SEPARATOR . 'cache' . DIRECTORY_SEPARATOR . 'auto' . DIRECTORY_SEPARATOR . $school['id'] . DIRECTORY_SEPARATOR;
 

	
 
/*
 
 * autocomplete the list of departments. If the user has already
 
 * entered a valid department name _and_ delimitted it, however, go on
 
 * to the next autocompletion step.
 
 */
 
$term_strlen = strlen($term);
 
$dept_strlen = strlen($term_parts['department']);
 
$dept = $term_parts['department'];
 
if (!$getsections && count($term_parts) == 1 && $term_strlen == strlen($dept))
 
  {
 
    $dept_file = $cache_dir . '-depts';
 
    if (!file_exists($dept_file))
 
      clean_empty_exit();
 
    $deptartments = unserialize(file_get_contents($dept_file));
 
    foreach ($deptartments as $key => $department)
 
      {
 
	if (!strncmp($department, $dept, $term_strlen))
 
	  $departments[$key] = $department . '-';
 
	else
 
	  unset($departments[$key]);
 
      }
 
    echo json_encode($departments);
 
    exit(0);
 
  }
 

	
 
if ($getsections)
 
  {
 
    $section_file = $cache_dir . $dept . DIRECTORY_SEPARATOR . $term_parts['course'];
 
    if (file_exists($section_file))
 
      {
 
	readfile($section_file);
 
	exit(0);
 
      }
 
    /* section not found! */
 
    header('HTTP/1.1 404: Not found');
 
    header('Content-Type: text/plain; encoding=utf-8');
 
    echo 'Could not find course ' . implode('-', $term_parts) . "\n";
 
    exit(0);
 
  }
 

	
 
/*
 
 * if a department is fully entered, life gets slightly more
 
 * complicated. I suppose I only want to autocomplete the first digit
 
 * of the course/class number. I.e., CS-2 for CS-262 for when the
 
 * student has entered CS- or 'CS'. But for now we can just dump the entire department at the user ;-).
 
 */
 
$classes_file = $cache_dir . $dept . '.sects';
 
if (file_exists($classes_file))
 
  {
 
    $classes = unserialize(file_get_contents($classes_file));
 
    $class_start = '';
 
    if (count($term_parts) > 1)
 
      $class_start = $term_parts['course'];
 
    $class_start_strlen = strlen($class_start);
 

	
 
    /* reduce/create resultset */
 
    $json_classes = array();
 
    foreach ($classes as $class)
 
      if (!strncmp($class, $class_start, $class_start_strlen))
 
	{
 
	  $json_classes[] = $dept . '-' . $class;
 
	}
 

	
 
    echo json_encode($json_classes);
 
    exit(0);
 
  }
 

	
 
/**
 
 * Nothing caught..
 
 */
 
echo '["Oops"]';
 
exit(0);
 

	
 
/**
 
 * \brief
 
 *   Send an empty JSON array and exit.
 
 */
 
function clean_empty_exit()
 
{
 
  echo '[]';
 
  exit(0);
 
}
class.class.php
Show inline comments
 
@@ -31,7 +31,17 @@ class Classes
 
    $this->sections[$this->nsections] = new Section($l, $p, $s, $e, $d);
 
    $this->nsections++;
 
  }
 
	
 

	
 
  /**
 
   * \brief
 
   *   Adds an already-instantiated section to this class.
 
   */
 
  public function section_add(Section $section)
 
  {
 
    $this->sections[$this->nsections] = $section;
 
    $this->nsections ++;
 
  }
 

	
 
  //--------------------------------------------------
 
  // Returns the number of sections in the class.
 
  //--------------------------------------------------
 
@@ -84,4 +94,55 @@ class Classes
 

	
 
    return $out;
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Split up a user-friendly course specification into components.
 
   *
 
   * This will only return the 'department' and 'course' components of
 
   * the given course identifier. Otherwise, it acts the same as
 
   * Section::parse.
 
   *
 
   * \see Section::parse()
 
   *
 
   * \param $course_spec
 
   *   A course specifier to parse, such as 'cs262' or 'MATH-156'.
 
   * \return
 
   *   An array with normalized output having keys of 'department' and
 
   *   'course'. If the user's input has less than these two keys of
 
   *   information, the returned array may have zero or one elements.
 
   */
 
  public static function parse($course_spec)
 
  {
 
    $section_parts = Section::parse($course_spec);
 
    if (isset($section_parts['section']))
 
      unset($section_parts['section']);
 

	
 
    return $section_parts;
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Represent this class as a string.
 
   */
 
  public function __toString()
 
  {
 
    return $this->getName();
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Represent this class as an array of sections ready to be JSONized.
 
   */
 
  public function to_json_array()
 
  {
 
    $json_array = array('class' => $this->getName(),
 
			'sections' => array());
 
    foreach ($this->sections as $section)
 
      {
 
	$json_array['sections'][] = $section->to_json_array();
 
      }
 

	
 
    return $json_array;
 
  }
 
}
class.section.php
Show inline comments
 
@@ -278,4 +278,82 @@ class Section
 

	
 
    return $out;
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Splits up a section specifier into dept, course number, and
 
   *   section.
 
   *
 
   * For example, will return array('CS', '262', 'A') for 'CS-262-A'
 
   * or 'CS262A' or 'cs-262a'. This function is not for dealing with
 
   * course synonyms.
 
   *
 
   * \param $section_spec
 
   *   A string starting with a section specifier. If only the
 
   *   department is found, an array of size one is returned. If the
 
   *   course number is also found, both department and course id are
 
   *   returned. If all three are found, the array has three elements.
 
   *
 
   *   This array is keyed, so the found items may be referred to as
 
   *   'deptartment', 'course', and 'section'.
 
   *
 
   * \return
 
   *   An array with the department, course number, and section
 
   *   identifier. This array may be empty or have from one through
 
   *   three elements depending on the validity and precision of the
 
   *   $section_spec.
 
   */
 
  public static function parse($section_spec)
 
  {
 
    $ret = array();
 

	
 
    $section_spec = trim($section_spec);
 
    if (!preg_match(';([a-zA-Z]+)[^0-9]*;', $section_spec, $dept_matches))
 
      return $ret;
 

	
 
    /*
 
     * remove away the already-parsed stuff, including gunk between the
 
     * dept and the course num.
 
     */
 
    $section_spec = trim(substr($section_spec, strlen($dept_matches[0])));
 
    $ret['department'] = strtoupper($dept_matches[1]);
 

	
 
    if (!preg_match(';([0-9]+)[^a-zA-Z0-9]*;', $section_spec, $course_matches))
 
      return $ret;
 

	
 
    /* skip gunk */
 
    $section_spec = trim(substr($section_spec, strlen($course_matches[0])));
 
    $ret['course'] = $course_matches[1];
 

	
 
    /*
 
     * we accept _either_ alphabetic section _or_ numeric section (the
 
     * latter is for cedarville, particulaly)
 
     */
 
    if (!preg_match(';([0-9]+|[a-zA-Z]+);', $section_spec, $section_matches))
 
      return $ret;
 

	
 
    $ret['section'] = strtoupper($section_matches[1]);
 

	
 
    return $ret;
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Get an array of information needed by the AJAX stuff.
 
   */
 
  public function to_json_array()
 
  {
 
    static $daymap = array(0 => 'm', 1 => 't', 2 => 'w', 3 => 'u', 4 => 'f');
 

	
 
    $json_array = array('section' => $this->letter,
 
			'prof' => $this->prof,
 
			'time_start' => $this->start,
 
			'time_end' => $this->tend,
 
			'days' => array(),
 
			);
 
    for ($day = 0; $day < 5; $day ++)
 
      $json_array['days'][$daymap[$day]] = $this->getDay($day);
 

	
 
    return $json_array;
 
  }
 
}
inc/class.semester.inc
Show inline comments
 
new file 100644
 
<?php
 
/*
 
 * Copyright 2010 Nathan Phillip Brink <ohnobinki@ohnopublishing.net>
 
 *
 
 * This file is a part of slate_permutate.
 
 *
 
 * slate_permutate is free software: you can redistribute it and/or modify
 
 * it under the terms of the GNU Affero General Public License as published by
 
 * the Free Software Foundation, either version 3 of the License, or
 
 * (at your option) any later version.
 
 *
 
 * slate_permutate is distributed in the hope that it will be useful,
 
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
 * GNU Affero General Public License for more details.
 
 *
 
 * You should have received a copy of the GNU Affero General Public License
 
 * along with slate_permutate.  If not, see <http://www.gnu.org/licenses/>.
 
 */
 

	
 
$root_dir = dirname(__FILE__) . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR;
 
require_once($root_dir . 'class.class.php');
 
require_once($root_dir . 'class.section.php');
 

	
 
/**
 
 * \brief
 
 *   Identifies a school semester and acts as a container for courses
 
 *   offered in a semester.
 
 */
 
class Semester
 
{
 
  /**
 
   * \brief
 
   *   The Fall season.
 
   */
 
  const SEASON_FALL = 'fall';
 

	
 
  /**
 
   * \brief
 
   *   The Spring season.
 
   */
 
  const SEASON_SPRING = 'spring';
 

	
 
  /**
 
   * \brief
 
   *   Instantiate an empty Semester.
 
   *
 
   * \param $year
 
   *   The year of this semester. Must be four digits.
 
   * \param $season
 
   *   The season of this semester. Currently, only
 
   *   Semester::SEASON_SPRING and Semester::SEASON_FALL are valid.
 
   */
 
  function __construct($year, $season)
 
  {
 
    if (!in_array($season, array(self::SEASON_SPRING, self::SEASON_FALL)))
 
      throw new ErrorException('Attempt to construct a Semester with a $season which is neither Semester::SEASON_SPRING nor Semester::SEASON_FALL. `' . $season . '\' was given.');
 
    $this->season = $season;
 

	
 
    if (strlen($year) != 4)
 
      throw new ErrorException('Attempt to construct a Semester with an invalid year. The given year is `' . $year . '\'');
 
    $this->year = $year;
 

	
 
    $this->departments = array();
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Add a class to this Semester.
 
   *
 
   * \param $class
 
   *   The class/course to add.
 
   */
 
  public function class_add(Classes $class)
 
  {
 
    $class_parts = Classes::parse($class->getName());
 
    if (!isset($class_parts['course']))
 
      throw ErrorException('I was given a class with an invalid name: `' . $class->getName() . '\'');
 

	
 
    if (!isset($this->departments[$class_parts['department']]))
 
      $this->departments[$class_parts['department']] = array();
 
    $department =& $this->departments[$class_parts['department']];
 

	
 
    $department[$class_parts['course']] = $class;
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Retrieve a class.
 
   *
 
   * \param $dept
 
   *   The class's department. 'CS' for 'CS-262'.
 
   * \param $class
 
   *   The course/class number. '262' for 'cs-262'.
 
   * \return
 
   *   A Classes or NULL if not found.
 
   */
 
  public function class_get($dept, $class)
 
  {
 
    if (!isset($this->departments[$dept][$class]))
 
      return NULL;
 

	
 
    return $this->departments[$dept][$class];
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Gets a list of departments available in this semester.
 
   */
 
  public function departments_get()
 
  {
 
    return array_keys($this->departments);
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Gets a list of class/course numbers available for a particular
 
   *   department.
 
   */
 
  public function department_classes_get($dept)
 
  {
 
    if (!isset($this->departments[$dept]))
 
      throw new ErrorException('I was asked for a department I don\'t own: ' . $dept);
 

	
 
    return array_keys($this->departments[$dept]);
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Utility function to add a section to the semester,
 
   *   automatically creating classes as necessary.
 
   *
 
   * \param $dept
 
   *   The department this section belongs to.
 
   * \param $class
 
   *   The class this section belongs to.
 
   * \param $section
 
   *   The section itself.
 
   */
 
  public function section_add($dept, $class, Section $section)
 
  {
 
    $dept = strtoupper($dept);
 
    $class = strtoupper($class);
 

	
 
    if (!isset($this->departments[$dept])
 
	|| !isset($this->departments[$dept][$class]))
 
      {
 
	$classobj = new Classes($dept . '-' . $class);
 
	$this->class_add($classobj);
 
      }
 
    else
 
      {
 
	$classobj = $this->departments[$dept][$class];
 
      }
 

	
 
    $classobj->section_add($section);
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Get a semester's year.
 
   */
 
  public function year_get()
 
  {
 
    return $this->year;
 
  }
 

	
 
  /**
 
   * \brief
 
   *   Get a semester's season.
 
   */
 
  public function season_get()
 
  {
 
    return $this->season;
 
  }
 
}
inc/school.crawl.inc
Show inline comments
 
new file 100644
 
<?php
 
/*
 
 * Copyright 2010 Nathan Phillip Brink <ohnobinki@ohnopublishing.net>
 
 *
 
 * This file is a part of slate_permutate.
 
 *
 
 * slate_permutate is free software: you can redistribute it and/or modify
 
 * it under the terms of the GNU Affero General Public License as published by
 
 * the Free Software Foundation, either version 3 of the License, or
 
 * (at your option) any later version.
 
 *
 
 * slate_permutate is distributed in the hope that it will be useful,
 
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
 * GNU Affero General Public License for more details.
 
 *
 
 * You should have received a copy of the GNU Affero General Public License
 
 * along with slate_permutate.  If not, see <http://www.gnu.org/licenses/>.
 
 */
 

	
 
/**
 
 * \file
 
 *   Routines that are only useful when crawling schools' websites for
 
 *   autofill section data.
 
 */
 

	
 
/**
 
 * \brief
 
 *   Parse a simple time string into slate_permutate's time
 
 *   representation.
 
 *
 
 * \param $time
 
 *   An array compatible with the return value of strptime(). The only
 
 *   fields we use are 'tm_hour', which is from 0 through 23, and
 
 *   'tm_min', which may be from 0 through 50.
 
 */
 
function school_crawl_time_format($time)
 
{
 
  return sprintf('%02d%02d', $time['tm_hour'], $time['tm_min']);
 
}
 

	
 
/**
 
 * \brief
 
 *   Take an array of day names and assemble them into
 
 *   slate_permutate's internal (weird) representation of a set of
 
 *   weekdays.
 
 *
 
 * This function is intended to make it easy for one to take the
 
 * output of an explode() call. For example, to decode $days_str =
 
 * 'Monday, Tuesday, Friday', one would do
 
 * school_crawl_days_format(explode(', ', $days_str));
 
 *
 
 * \param $days
 
 *   An array of day names. These may be common abbreviations or
 
 *   truncations (any truncations must be two chars long for
 
 *   simplicity. One-char representations are supported, however, but
 
 *   use 'm', 't', 'w', 'h', 'f' to distinguish thursday and
 
 *   friday). Case does not matter.
 
 * \return
 
 *   slate_permutate's strange internal days representation.
 
 */
 
function school_crawl_days_format($days)
 
{
 
  static $daymap_1 = array('m' => 1, 't' => 2, 'w' => 3, 'h' => 4, 'f' => 5);
 
  static $daymap_2 = array('th' => 'h');
 

	
 
  $my_days = array();
 
  foreach ($days as $day)
 
    {
 
      $day_orig = $day;
 
      $day = strtolower(substr(trim($day), 0, 2));
 

	
 
      /*
 
       * convert from two-char representation to one-char
 
       * representation.n
 
       */
 
      if (strlen($day) > 1)
 
	{
 
	  if (isset($daymap_2[$day]))
 
	    $day = $daymap_2[$day];
 
	  else
 
	    $day = substr($day, 0, 1);
 
	}
 
      if (isset($daymap_1[$day]))
 
	$my_days[$daymap_1[$day]] = TRUE;
 
      else
 
	error_log('school_crawl_days_format() got invalid day specifier:'
 
		  . ' `' . $day_orig . '\' => `' . $day . '\'');
 
    }
 

	
 
  $day_str = '';
 
  foreach ($my_days as $day_val => $junk)
 
    $day_str .= $day_val;
 

	
 
  return $day_str;
 
}
inc/school.inc
Show inline comments
 
@@ -46,27 +46,48 @@
 
 * \param $school_id
 
 *   The school's alphanumeric identifier (which determines the name
 
 *   of the school's *.inc file).
 
 * \param $load_all_inc
 
 *   Asks for a school's extraneous .inc files to be loaded
 
 *   to. Intended for use by rehash.php only.
 
 * \return
 
 *   A school_profile handle or NULL on error.
 
 */
 
function school_load($school_id)
 
function school_load($school_id, $load_all_inc = FALSE)
 
{
 
  $school = array('id' => $school_id);
 

	
 
  /* guard against cracking attempts (protects against '../' and friends) */
 
  if (!preg_match('/^[0-9a-z]+$/', $school_id))
 
    return NULL;
 
  $school_file_name = dirname(__FILE__) . DIRECTORY_SEPARATOR
 
    . '..' . DIRECTORY_SEPARATOR . 'school.d' . DIRECTORY_SEPARATOR . $school_id . '.inc';
 
  $school_file_name_base = dirname(__FILE__) . DIRECTORY_SEPARATOR
 
    . '..' . DIRECTORY_SEPARATOR . 'school.d' . DIRECTORY_SEPARATOR;
 
  $school_file_name = $school_file_name_base . $school_id . '.inc';
 

	
 
  if (!file_exists($school_file_name))
 
    return NULL;
 

	
 
  require_once($school_file_name);
 
  if ($load_all_inc)
 
    {
 
      $school_crawl_file_name = $school_file_name_base . $school_id . '.crawl.inc';
 
      if (file_exists($school_crawl_file_name))
 
	require_once($school_crawl_file_name);
 
    }
 

	
 
  $school_info = $school_id . '_info';
 
  $school += $school_info();
 

	
 
  /*
 
   * append small amount of info from the cache entry for this school:
 
   * whether or not it was crawled.
 
   *
 
   * Perhaps this stuff should be just moved into the _info function
 
   * for efficiency.
 
   */
 
  $cache = _school_cache_load();
 
  if ($cache && count($cache['list']))
 
    $school['crawled'] = $cache['list'][$school['id']]['crawled'];
 

	
 
  return $school;
 
}
 

	
school.d/calvin.crawl.inc
Show inline comments
 
new file 100644
 
<?php
 
/*
 
 * Copyright 2010 Nathan Phillip Brink <ohnobinki@ohnopublishing.net>
 
 *
 
 * This file is a part of slate_permutate.
 
 *
 
 * slate_permutate is free software: you can redistribute it and/or modify
 
 * it under the terms of the GNU Affero General Public License as published by
 
 * the Free Software Foundation, either version 3 of the License, or
 
 * (at your option) any later version.
 
 *
 
 * slate_permutate is distributed in the hope that it will be useful,
 
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
 * GNU Affero General Public License for more details.
 
 *
 
 * You should have received a copy of the GNU Affero General Public License
 
 * along with slate_permutate.  If not, see <http://www.gnu.org/licenses/>.
 
 */
 

	
 
/**
 
 * \brief
 
 *   Crawl's Calvin's registration course listing pages.
 
 *
 
 * \param $semester
 
 *   The Semester object which I should populate.
 
 * \param $verbosity
 
 *   How verbose I should be. Sensicle range is from 0 through 10.
 
 */
 
function calvin_crawl(Semester $semester, $verbosity = 1)
 
{
 
  /**
 
   * collect a few pbasic stats
 
   */
 
  $skipped_sections = array('incomplete meeting info' => 0, 'invalid meeting info format' => 0);
 

	
 
  /**
 
   * The first link we start at is the one from KV into WebAdvisor.
 
   *
 
   * 1. https://kvdata.calvin.edu/walive/WebAdvisor?CONSTITUENCY=WBST&type=P&pid=ST-WESTS12A&LASTTOKEN=NULL
 
   *    <body onload="javascript:getWindowHTML();">
 
   *
 
   *    Calls javascript:getWindowHTML(). This merely adds
 
   *    TOKENIDX=NULL to the query string, so we can skip this step
 
   *    and just have TOKENIDX=NULL.
 
   *
 
   * 2. https://kvdata.calvin.edu/walive/WebAdvisor?CONSTITUENCY=WBST&type=P&pid=ST-WESTS12A&LASTTOKEN=NULL&TOKENIDX=NULL
 
   *    <body onload="javascript:setWindowHTML('', '7699844013');">
 
   *
 
   *    In the above, the second argument to setWindowHTML() is
 
   *    random. Thus, we have to capture this value.
 
   */
 

	
 
  $cookies = array();
 

	
 
  $baseuri = 'https://kvdata.calvin.edu/walive/WebAdvisor?CONSTITUENCY=WBST&type=P&pid=ST-WESTS12A&LASTTOKEN=NULL';
 

	
 
  $token_uri = $baseuri . '&TOKENIDX=NULL';
 
  $token_html = calvin_crawl_noscript_filter(geturi($token_uri, $cookies));
 
  if (!preg_match('/setWindowHTML\(\'\', \'([0-9]+)\'\);/', $token_html, $matches))
 
    {
 
      fprintf(STDERR, "Could not steal the token\n");
 
      return 1;
 
    }
 
  $token = $matches[1];
 

	
 
  if ($verbosity > 5)
 
    {
 
      echo 'token: ' . $token . "\n";
 
      echo "\n";
 
    }
 

	
 
  /*
 
   * here we have arrived at the main webadvisor screen which lists the
 
   * search form. From here, we can get a list of all of the departments
 
   * that Calvin College has and then know enough to query each
 
   * individual department for courses.
 
   */
 
  $uri = $baseuri . '&TOKENIDX=' . $token;
 
  $departments_html = calvin_crawl_noscript_filter(geturi($uri, $cookies));
 

	
 
  $departments_dom = new DOMDocument();
 
  $departments_dom->loadHTML($departments_html);
 

	
 
  /*
 
   * Discover the available semesters
 
   */
 
  $semesters_select_nodes = $departments_dom->getElementById('VAR1')->childNodes;
 
  $semester_strs = array();
 
  foreach ($semesters_select_nodes as $semester_node)
 
    {
 
      if ($semester_node->tagName != 'option'
 
	  || !$semester_node->hasAttribute('value'))
 
	continue;
 
      $semester_strs[$semester_node->getAttribute('value')] =
 
	$semester_node->nodeValue;
 
    }
 

	
 
  $departments_select_nodes = $departments_dom->getElementById('LIST_VAR1_1')->childNodes;
 
  $departments = array();
 
  foreach ($departments_select_nodes as $dept_node)
 
    {
 
      if ($dept_node->tagName != 'option'
 
	  || !$dept_node->hasAttribute('value'))
 
	continue;
 
      $departments[$dept_node->getAttribute('value')] =
 
	$dept_node->nodeValue;
 
    }
 

	
 

	
 
  /*
 
   * get all of the different possible course levels... dynamically
 
   * rather than hardcodedly ;-).
 
   */
 
  $departments_select_nodes = $departments_dom->getElementById('LIST_VAR1_2')->childNodes;
 
  $course_levels = array();
 
  foreach ($departments_select_nodes as $courselevel_node)
 
    {
 
      if ($courselevel_node->tagName != 'option'
 
	  || !$courselevel_node->hasAttribute('value'))
 
	continue;
 
      $course_levels[] = $courselevel_node->getAttribute('value');
 
    }
 

	
 
  $return_url = dom_input_value($departments_dom, 'RETURN.URL');
 

	
 

	
 
  /* ARCT only has >=200 level courses */
 
  $dept = '';
 
  $course_level = '';
 
  $semester_str = substr($semester->year_get(), 2) . '/';
 
  switch ($semester->season_get())
 
    {
 
    case Semester::SEASON_SPRING:
 
      $semester_str .= 'SP';
 
      break;
 

	
 
    case Semester::SEASON_FALL:
 
      $semester_str .= 'FA';
 
      break;
 
    }
 
  if (!isset($semester_strs[$semester_str]))
 
    error_log('Couldn\'t find a semester in Calvin\'s database for ' . $semester_str . ' (' . $semester->season_get() . ', ' . $semester->year_get() . ')');
 

	
 

	
 
  /*
 
   * LIST.VAR<X>_<N>: <X> is the column, <N> is the row. There
 
   * are apparently a max of 5 rows (see the LIST.VAR<X>_MAX
 
   * below).
 
   *
 
   * Columns:
 
   * LIST.VAR1: department
 
   * LIST.VAR2: course_level
 
   * LIST.VAR3: IIRC, a course identifier, such as 156 from MATH-156
 
   * LIST.VAR4: I forget
 
   *
 
   */
 
  $form = array('VAR1' => $semester_str,
 
		'LIST.VAR1_1' => $dept,
 
		'LIST.VAR2_1' => $course_level,
 
		);
 

	
 
  /*
 
   * other form items we're not querying but which need to be
 
   * sent blankly
 
   */
 
  $form += array(
 
		 'RETURN.URL' => $return_url,
 
		 'SUBMIT_OPTIONS' => '',
 
		 /*
 
		  * The submit button... its value="" key is
 
		  * apparently sent with the form... makes a
 
		  * little bit of sense I guess ;-).
 
		  */
 
		 /*'SUBMIT2' => 'SUBMIT',*/
 

	
 
		 'DATE.VAR1' => '',
 
		 'DATE.VAR2' => '',
 

	
 
		 'LIST.VAR1_CONTROLLER' => 'LIST.VAR1',
 
		 'LIST.VAR1_MEMBERS' => 'LIST.VAR1*LIST.VAR2*LIST.VAR3*LIST.VAR4',
 
		 );
 
  foreach (array('1', '2', '3', '4') as $list_col)
 
    {
 
      $colname = 'LIST.VAR' . $list_col;
 
      if (!isset($form[$colname . '_MAX']))
 
	$form[$colname . '_MAX'] = '5';
 

	
 
      foreach (array('1', '2', '3', '4', '5') as $list_row)
 
	{
 
	  $rowname = $colname . '_' . $list_row;
 
	  if (!isset($form[$rowname]))
 
	    $form[$rowname] = '';
 
	}
 
    }
 

	
 
  /*
 
   * VAR7 and VAR 8 is a constraint of times during which
 
   * courses meet
 
   */
 
  $form['VAR7'] = '';
 
  $form['VAR8'] = '';
 

	
 
  /* ``course title keywords'' */
 
  $form['VAR3'] = '';
 

	
 
  /* ? */
 
  $form['VAR6'] = '';
 
  $form['VAR21'] = '';
 

	
 
  /* instructor's last name */
 
  $form['VAR9'] = '';
 

	
 
  /*
 
   * VAR10 through VAR16 are Monday through Sunday checkboxes
 
   * for days of the week that classes meet.
 
   *
 
   * But we specify no days of the week to avoid this being a
 
   * constraint ;-).
 
   */
 
  /*
 
    for ($day = 10; $day <= 16; $day ++)
 
    $form['VAR' . $day] = '';
 
  */
 

	
 
  /*
 
   * pages is populated by preg_match() below after the first looping.
 
   */
 
  $pages = array(1 => 0, 2=> 1);
 
  while ($pages[1] < $pages[2])
 
    {
 
      $html = calvin_crawl_noscript_filter(geturi($uri, $cookies, $form));
 

	
 
      $results_dom = new DOMDocument();
 
      $results_dom->loadHTML($html);	
 

	
 
      $list_done = FALSE;
 
      for ($list_row = 1; !$list_done; $list_row ++)
 
	{
 
	  /* either 'Open' (or 'Closed'?) */
 
	  $openness = dom_input_value($results_dom, 'LIST.VAR1_' . $list_row);
 
	  $sec_short_title = dom_id_content($results_dom, 'SEC_SHORT_TITLE_' . $list_row);
 
	  $sec_meeting_info = dom_id_content($results_dom, 'SEC_MEETING_INFO_' . $list_row);
 

	
 
	  /* check if we're done with this particular page */
 
	  if (!strlen($openness) && !strlen($sec_short_title) && !strlen($sec_meeting_info))
 
	    {
 
	      $list_done = TRUE;
 
	      break;
 
	    }
 

	
 
	  /*
 
	   * the same info below should be gettable with 
 
	   * dom_id_content($results_dom, 'SEC_FACULTY_INFO_' . $list_row);
 
	   */
 
	  $faculty_name = dom_input_value($results_dom, 'SEC.FACULTY.INFO_' . $list_row);
 
	  $credits = dom_input_value($results_dom, 'SEC.MIN.CRED_' . $list_row); /* or id="SEC_FACULTY_INFO_$list_row" */
 
	  $comment = dom_id_content($results_dom, 'SEC_COMMENTS_' . $list_row); /* or name="SEC.COMMENTS_$list_row" */
 

	
 
	  /* parse */
 
	  $section_id = Section::parse($sec_short_title);
 

	
 
	  if ($verbosity > 6)
 
	    {
 
	      echo "\n";
 
	      echo implode('-', $section_id) . ': ' . $sec_short_title . "\n";
 
	      echo $openness . "\n";
 
	      echo $sec_meeting_info . "\n";
 
	      echo $faculty_name . "\n";
 
	      echo $credits . "\n";
 
	      echo $comment . "\n";
 
	    }
 

	
 
	  /*
 
	   * The input format for this is, thankfully, pretty rigid
 
	   * :-D. Example input format:
 
	   *
 
	   * '01/31/2011-05/11/2011 Lecture Monday, Wednesday 01:00PM - 03:50PM, Spoelhof Center, Room 101'
 
	   *
 
	   * OR
 
	   *
 
	   * '01/31/2011-05/18/2011 Practicum Days to be Announced, Times to be AnnouncedTo Be Arranged, Room TBA'
 
	   *
 
	   * In this latter case.... we'll just ignore the section.
 
	   *
 
	   * At this point, we don't parse most tokens. We group them
 
	   * off. We get the first date, the second date, the type
 
	   * ('Lecture', 'Practicum', or some other unknown value),
 
	   * the list of days of week the section meets, the start
 
	   * time, the end time, and then the meeting location.
 
	   */
 
	  if (strpos($sec_meeting_info, 'Times to be Announced') !== FALSE
 
	      || strpos($sec_meeting_info, 'Days to be Announced') !== FALSE)
 
	    {
 
	      if ($verbosity > 2)
 
		error_log('Skipping class because of incomplete meeting time information: '
 
			  . implode('-', $section_id) . ' has meeting info of `'
 
			  . $sec_meeting_info . '\'');
 
	      $skipped_sections['incomplete meeting info'] ++;
 
	      continue;
 
	    }
 

	
 
	  if (!preg_match(';^([0-9]{2}/[0-9]{2}/[0-9]{4})-([0-9]{2}/[0-9]{2}/[0-9]{4}) ([^ ]+) ([^0-9]+) ([^ ]+) - ([^ ]+), (.*)$;', $sec_meeting_info, $meeting_info_matches))
 
	    {
 
	      error_log('Unable to parse calvin section meeting info string into start/end/days information for '
 
			. implode('-', $section_id) . ': ``' . $sec_meeting_info . '\'\'');
 
	      $skipped_sections['invalid meeting info format'] ++;
 
	      continue;
 
	    }
 
	  $date_start = $meeting_info_matches[1];
 
	  $date_end = $meeting_info_matches[2];
 
	  /* e.g., 'Lecture', 'Practicum' */
 
	  $meeting_type = $meeting_info_matches[3];
 
	  $days = school_crawl_days_format(explode(', ', $meeting_info_matches[4]));
 
	  $time_start = school_crawl_time_format(strptime($meeting_info_matches[5], '%I:%M%p'));
 
	  $time_end = school_crawl_time_format(strptime($meeting_info_matches[6], '%I:%M%p'));
 
	  $meeting_place = $meeting_info_matches[7];
 

	
 
	  if ($verbosity > 5)
 
	    foreach (array('date_start', 'date_end', 'meeting_type', 'days', 'time_start', 'time_end', 'meeting_place') as $var)
 
	      echo $var . ':' . ${$var} . "\n";
 

	
 
	  $section = new Section($section_id['section'], $faculty_name, $time_start, $time_end, $days);
 
	  $semester->section_add($section_id['department'], $section_id['course'], $section);
 
	}
 

	
 
      if (!preg_match(';Page ([0-9]+) of ([0-9]+)\</td\>$;m', $html, $pages))
 
	{
 
	  error_log('Unable to determine the number of pages in this Calvin resultset');
 
	  break;
 
	}
 

	
 
      if ($verbosity > 0)
 
	{
 
	  echo 'calvin_crawl(): finished page ' . $pages[1] . ' of ' . $pages[2] . ' with ' . ($list_row - 1) . " courses.\n";
 
	}
 

	
 
      $form = array(
 
		    'ACTION*Grp:WSS.COURSE.SECTIONS' => 'NEXT',
 
		    );
 
    }
 

	
 
  $has_stat = FALSE;
 
  if ($verbosity > 1)
 
    foreach ($skipped_sections as $reason => $num)
 
      {
 
	if (!$num)
 
	  continue;
 
	if (!$has_stat)
 
	  error_log('Skipped some sections for <reason>: <number skipped>:');
 
	error_log($reason . ': ' . $num);
 
      }
 

	
 
  return 0;
 
}
 

	
 
/**
 
 * \brief
 
 *   Simulate some aspects of a web browser while retreiving a
 
 *   document.
 
 *
 
 * This allows us to view our cookies in an associative array and to
 
 * have the server's response automatically update our cookies.
 
 *
 
 * If $post is specified as an associative array, an HTTP POST is
 
 * performed and the data is encoded properly as if we were performing
 
 * a form submission.
 
 *
 
 * Follows redirects. If there is a redirect, the page from which you
 
 * are redirected is lost... but few people put any information on
 
 * those pages anyways ;-).
 
 *
 
 * \param $uri
 
 *   The URL to fetch. If a redirect occurs, this is updated.
 
 * \param $cookies
 
 *   An associative array of cookies and where to save new cookies.
 
 * \param $post
 
 *   If not NULL, causes an HTTP POST. In that case, should be an
 
 *   associative array of form keys/values.
 
 * \param $verbosity
 
 *   How verbose to be.
 
 * \param $loopspin
 
 *   An internal variable to prevent us from following perpetual
 
 *   redirects.
 
 * \return
 
 *   The body of the document returned by the server (normally
 
 *   malformed HTML, especially with Calvin's WebAdvisor
 
 *   installation).
 
 */
 
function geturi(&$uri, &$cookies, $post = NULL, $verbosity = 0, $loopspin = 0)
 
{
 
  global $geturi_write_buf, $geturi_headers_buf, $geturi_verbosity;
 

	
 
  if ($verbosity > 5)
 
    {
 
      echo "\n";
 
      echo 'geturi(' . $uri . ")\n";
 
      echo "\n";
 
    }
 

	
 
  $curl = curl_init();
 

	
 
  $geturi_verbosity = $verbosity;
 
  $geturi_write_buf = '';
 
  $geturi_headers_buf = '';
 
  curl_setopt($curl, CURLOPT_URL, $uri);
 

	
 
  $cookies_str = '';
 
  foreach ($cookies as $key => $val)
 
    {
 
      if (strlen($cookies_str))
 
	$cookies_str .= ';';
 
      $cookies_str .= $key . '=' . $val;
 
    }
 

	
 
  if ($verbosity > 8)
 
    echo 'cookies sent: ' . $cookies_str . "\n";
 
  curl_setopt($curl, CURLOPT_COOKIE, $cookies_str);
 
  curl_setopt($curl, CURLOPT_HEADERFUNCTION, 'geturi_header_cb');
 
  curl_setopt($curl, CURLOPT_WRITEFUNCTION, 'geturi_write_cb');
 

	
 
  if ($post != NULL && is_array($post))
 
    {
 

	
 
      /* var_dump($post); */
 

	
 
      $posttxt = '';
 
      foreach ($post as $postkey => $postval)
 
	{
 
	  $posttxt .= (strlen($posttxt) ? '&' : '')
 
	    . urlencode($postkey) . '=' . (strpos($postkey, 'MEMBER') === FALSE ? urlencode($postval) : $postval);
 
	}
 
      if ($verbosity > 8)
 
	echo 'setting POST to ' . $posttxt . "\n";
 

	
 
      /* curl_setopt($curl, CURLOPT_POST, TRUE); */
 
      curl_setopt($curl, CURLOPT_POSTFIELDS, $posttxt);
 
    }
 

	
 
  curl_exec($curl);
 
  curl_close($curl);
 

	
 
  $location = NULL;
 
  foreach (explode("\r\n", $geturi_headers_buf) as $header)
 
    {
 
      /*
 
       * yes, we don't want the line if the first char is a ':' or if it has no ':'
 
       */
 
      if (!strpos($header, ':'))
 
	continue;
 
      list($header_name, $header_val) = explode(': ', $header, 2);
 

	
 
      if ($verbosity > 8)
 
	echo $header_name . ' : ' . $header_val . "\n";
 

	
 
      switch($header_name)
 
	{
 
	case 'Set-Cookie':
 
	  list($cookie_name, $cookie_val) = explode('=', $header_val, 2);
 
	  if ($verbosity > 9)
 
	    {
 
	      if (isset($cookies[$cookie_name]))
 
		echo 'Replacing cookie ' . $cookie_name . '=' . $cookies[$cookie_name]
 
		  . ' with ';
 
	      echo 'new cookie ' . $cookie_name . '=' . $cookie_val . "\n";
 
	    }
 
	  $cookies[$cookie_name] = $cookie_val;
 
	  break;
 

	
 
	case 'Location':
 
	  $location = $header_val;
 
	  $location = preg_replace(';(kvdata\.calvin\.edu/)(WebAdvisor);', '\1walive/\2', $location) . "\n";
 
	  $post = NULL;
 
	  break;
 
	}
 
    }
 

	
 
  if ($verbosity > 9)
 
    echo $geturi_write_buf;
 
  if ($location && $loopspin < 6)
 
    {
 
      $uri = $location;
 
      return geturi($uri, $cookies, $post, $loopspin + 1);
 
    }
 
  return $geturi_write_buf;
 
}
 

	
 
function geturi_header_cb($curl, $header_buf)
 
{
 
  global $geturi_headers_buf;
 
  $geturi_headers_buf .= $header_buf;
 
  return strlen($header_buf);
 
}
 

	
 
function geturi_write_cb($curl, $write_buf)
 
{
 
  global $geturi_write_buf;
 
  $geturi_write_buf .= $write_buf;
 
  return strlen($write_buf);
 
}
 

	
 
/**
 
 * \brief
 
 *   Find an <input /> element and return its value attribute.
 
 *
 
 * \param $domdocument
 
 *   The DOMDocument to search.
 
 * \param $name
 
 *   The name attribute of the <input /> element.
 
 * \return
 
 *   The value attribute of the input element or NULL if not found.
 
 */
 
function dom_input_value($domdocument, $name)
 
{
 
  $xpath = new DOMXPath($domdocument);
 
  $input_node_list = $xpath->query('/descendant::input[attribute::name="' . $name . '"]');
 

	
 
  if (!$input_node_list->length)
 
    return NULL;
 
  $input_node = $input_node_list->item(0);
 
  if (!$input_node->hasAttribute('value'))
 
    return NULL;
 
  return $input_node->getAttribute('value');
 
}
 

	
 
/**
 
 * \brief
 
 *   Returns the content of an element with the given ID.
 
 *
 
 * A convenience function.
 
 *
 
 * \param $domdocument
 
 *   A DOMDocument to search.
 
 * \param $id
 
 *   The id attribute of the element whose content are requested.
 
 * \return
 
 *   A UTF-8 string of the contents of the given element or NULL if
 
 *   the element isn't found.
 
 */
 
function dom_id_content($domdocument, $id)
 
{
 
  $node = $domdocument->getElementById($id);
 
  if ($node)
 
    {
 
      return $node->nodeValue;
 
    }
 
  return NULL;
 
}
 

	
 
/**
 
 * \brief
 
 *   Searches for and removes a <noscript/> element.
 
 *
 
 * The WebAdvisor likes to put <noscript/> in a docs <head />, which
 
 * is quite bad invalid HTML so that DOM can't handle it.
 
 *
 
 * \param $html
 
 *   The input HTML to filter.
 
 * \return
 
 *   The fixed HTML.
 
 */
 
function calvin_crawl_noscript_filter($html)
 
{
 
  return preg_replace(';\<(noscript)\>.*?\</\1\>;s', '', $html);
 
}
school.d/cedarville.inc
Show inline comments
 
@@ -24,7 +24,7 @@ function cedarville_instructions_html()
 
  <li>Submit your schedule and view all of the different permutations of your schedule which would work with the sections you specified.</li>
 
  <li>Print out your preferred schedule by choosing "print" and selecting a schedule.</li>
 
  <li>Wait until it's your turn to register and grab your preferred sections before they fill up!</li>
 
</ol>
 
</ol> <!--'-->
 
EOF;
 
}
 

	
 
@@ -50,7 +50,11 @@ function table_parse($url) {
 
}
 

	
 
/** Crawls Cedarville course listings. $season is "fa" or "sp", year is 4-digit year */
 
function cedarville_crawl($season, $year) {
 
function cedarville_crawl($semester)
 
{  
 
  $season = strtolower(substr($semester->season_get(), 0, 2));
 
  $year = $semester->year_get();
 

	
 
  /* Current academic departments. Update as needed. */
 
  $departments = array('be','ba','ca','ed','eg','es','hg','id','ll','ms','mu','ns','ph','py','sm','sw');
 
  $basepath = "http://cedarville.edu/courses/schedule/";
school.d/umich.inc
Show inline comments
 
@@ -25,7 +25,7 @@ function umich_instructions_html()
 
  <li>Submit your schedule and view all of the different permutations of your schedule which would work with the sections you specified.</li>
 
  <li>Print out your preferred schedule by choosing "print" and selecting a schedule.</li>
 
  <li>Wait until it's your turn to register and grab your preferred sections before they fill up!</li>
 
</ol>
 
</ol> <!-- ' -->
 
EOF;
 
}
 

	
 
@@ -75,7 +75,11 @@ function umich_table_parse($url) {
 
}
 

	
 
/** Crawls uMich course listings. $season is "f" or "s", year is 2-digit year */
 
function umich_crawl($season, $year) {
 
function umich_crawl($semester)
 
{
 
  $year = substr($semester->year_get(), 2);
 
  $season = strtolower(substr($semester->season_get(), 0, 1));
 

	
 
  /* Current academic departments. Update as needed. */
 
  $departments = array('AAPTIS','ACABS','AERO','AEROSP','AMCULT','ANTHRARC','ANTHRBIO','ANTHRCUL','AOSS','APPPHYS','ARCH','ARMENIAN','ARTDES','ASIAN','ASIANLAN','ASTRO','AUTO','BCS','BIOINF','BIOLCHEM','BIOLOGY','BIOMEDE','BIOPHYS','CAAS','CEE','CHE','CHEM','CIC','CICS','CJS','CLARCH','CLCIV','CMPLXSYS','COMM','COMP','COMPLIT','CSP','CZECH','DANCE','DUTCH','ECON','EDCURINS','EDUC','EEB','EECS','ELI','ENGLISH','ENGR','ENSCEN','ENVIRON','ESENG','FRENCH','GEOG','GEOSCI','GERMAN','GREEK','GTBOOKS','HBEHED','HISTART','HISTORY','HJCS','HMP','HONORS','INTMED','IOE','ITALIAN','JAZZ','JUDAIC','KINESLGY','LACS','LATIN','LHC','LHSP','LING','MACROMOL','MATH','MATSCIE','MCDB','MECHENG','MEDADM','MEDCHEM','MEMS','MENAS','MFG','MICROBIOL','MILSCI','MKT','MODGREEK','MOVESCI','MUSEUMS','MUSICOL','MUSMETH','MUSTHTRE','NAVARCH','NAVSCI','NERS','NEUROSCI','NRE','NURS','OMS','ORGSTUDY','PAT','PATH','PHARMACY','PHIL','PHRMACOL','PHYSICS','PHYSIOL','POLISH','POLSCI','PORTUG','PSYCH','PUBHLTH','PUBPOL','RCARTS','RCCORE','RCHUMS','RCIDIV','RCLANG','RCNSCI','RCSSCI','REEES','RELIGION','ROMLANG','ROMLING','RUSSIAN','SAC','SAS','SCAND','SEAS','SI','SLAVIC','SOC','SPANISH','STATS','STDABRD','SWC','TCHNCLCM','THEORY','THTREMUS','UC','UKRAINE','UP','WOMENSTD','YIDDISH');
 

	
scripts/scheduleInput.js
Show inline comments
 
@@ -142,7 +142,7 @@
 
		sectionsOfClass[classNum] = 0; // This is class 0, initialize at 0
 
		jQuery('#jsrows').append('<tr title="' + classNum + '" class="class class' + classNum + '"><td><input type="text" class="className required defText className'+classNum+'" title="Class Name" name="postData[' + classNum + '][name]" /></td><td colspan="8"></td><td class="tdInput"><div class="addSection"><input type="button" value="Add section" class="gray" /></div></td><td class="tdInput"><div class="deleteClass"><input type="button" value="Remove" class="gray" /></div></td></tr>');
 
		jQuery('.className' + classNum).autocomplete({
 
			source: "sample-json-data.txt"
 
			source: "auto.php"
 
		});
 
		classNum++;
 
	};
0 comments (0 inline, 0 general)