Files @ 36411b81240b
Branch filter:

Location: SlatePermutate/school.d/umich.crawl.inc

binki
Remove silly ampersand mangling.
<?php
/*
 * Copyright 2011 Nathan Gelderloos, Ethan Zonca, Nathan Phillip Brink
 *
 * This file is part of SlatePermutate.
 *
 * SlatePermutate is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * SlatePermutate is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with SlatePermutate.  If not, see <http://www.gnu.org/licenses/>.
 */


/** Filter out whitepace items */
function umich_arrayfilter_callback($item)
{
  if(ltrim($item) == '')
    return TRUE;
  else
    return TRUE;
}

/** Parse html at URL into array, first row is row headers */
function umich_table_parse($url)
{
  $arr = array();
  $dom = new DOMDocument;
  $html = file_get_contents($url);
  if(!$html){
    return 1;
  }
  $dom->loadHTML($html);
  $dom->preserveWhiteSpace = false;
  $tables = $dom->getElementsByTagName('table');
  $rows = $tables->item(3)->getElementsByTagName('tr'); // Get first table on page 
  foreach ($rows as $rownum => $row) {
    if($rownum > 5) {
      $cols = $row->getElementsByTagName('td');
      foreach($cols as $colnum => $col){
        $arr[$rownum][$colnum] = $col->nodeValue;
      }
    }
  }
  foreach($arr as &$item) {
    $item = array_filter($item, "umich_arrayfilter_callback");
  }

  $arr = array_values($arr); // Reindex array
 
  // Strip navigation and trailing garbage
  $arr[count($arr)-3] = NULL;
  $arr[count($arr)-2] = NULL;
  $arr[count($arr)-1] = NULL;

  $arr = array_filter($arr);
  return $arr;
}

/**
 * \brief
 *  Crawls University of Michigan's schedule.
 *
 * \param $semesters
 *   An array to be filled with semesters.
 * \param $school_crawl_log
 *   The school_crawl_log handle.
 * \return
 *   1 on failure, 0 on success.
 */
function umich_crawl(array &$semesters, $school_crawl_log)
{
  $url = 'http://lsa.umich.edu/cg/cg_advsearch.aspx';
  $cookies = array();

  /* determine list of semesters: */
  $semesters_dom = new DOMDocument();
  $semesters_dom->loadHTML(school_crawl_geturi($url, $cookies, $school_crawl_log));

  $year = substr($semester->year_get(), 2);
  $season = strtolower(substr($semester->season_get(), 0, 1));

  /* Current academic departments. Update as needed. */
  $departments = array('AAPTIS','ACABS','AERO','AEROSP','AMCULT','ANTHRARC','ANTHRBIO','ANTHRCUL','AOSS','APPPHYS','ARCH','ARMENIAN','ARTDES','ASIAN','ASIANLAN','ASTRO','AUTO','BCS','BIOINF','BIOLCHEM','BIOLOGY','BIOMEDE','BIOPHYS','CAAS','CEE','CHE','CHEM','CIC','CICS','CJS','CLARCH','CLCIV','CMPLXSYS','COMM','COMP','COMPLIT','CSP','CZECH','DANCE','DUTCH','ECON','EDCURINS','EDUC','EEB','EECS','ELI','ENGLISH','ENGR','ENSCEN','ENVIRON','ESENG','FRENCH','GEOG','GEOSCI','GERMAN','GREEK','GTBOOKS','HBEHED','HISTART','HISTORY','HJCS','HMP','HONORS','INTMED','IOE','ITALIAN','JAZZ','JUDAIC','KINESLGY','LACS','LATIN','LHC','LHSP','LING','MACROMOL','MATH','MATSCIE','MCDB','MECHENG','MEDADM','MEDCHEM','MEMS','MENAS','MFG','MICROBIOL','MILSCI','MKT','MODGREEK','MOVESCI','MUSEUMS','MUSICOL','MUSMETH','MUSTHTRE','NAVARCH','NAVSCI','NERS','NEUROSCI','NRE','NURS','OMS','ORGSTUDY','PAT','PATH','PHARMACY','PHIL','PHRMACOL','PHYSICS','PHYSIOL','POLISH','POLSCI','PORTUG','PSYCH','PUBHLTH','PUBPOL','RCARTS','RCCORE','RCHUMS','RCIDIV','RCLANG','RCNSCI','RCSSCI','REEES','RELIGION','ROMLANG','ROMLING','RUSSIAN','SAC','SAS','SCAND','SEAS','SI','SLAVIC','SOC','SPANISH','STATS','STDABRD','SWC','TCHNCLCM','THEORY','THTREMUS','UC','UKRAINE','UP','WOMENSTD','YIDDISH');

  $basepath = "http://www.lsa.umich.edu/cg/cg_results.aspx";
  $yearsyn = 1800 + $year; // Weird year synonym name where 2000 == 1800
  $basepath .= "?termArray={$season}_{$year}_${yearsyn}&cgtype=ug";
  $season = strtolower($season);
  $tables = array();
  foreach($departments as $department) {
   $tables[$department] = umich_table_parse($basepath . '&department=' . $department . '&allsections=true&show=1000');
  }
  return $tables;
}