Files
        @ 828706182e2f
    
        
              Branch filter: 
        
    Location: SlatePermutate/school.d/umich.crawl.inc - annotation
        
            
            828706182e2f
            4.2 KiB
            text/x-povray
        
        
    
    Add support for entering, storing, and displaying a course title per bug 95.
    0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 828706182e2f 828706182e2f 828706182e2f 828706182e2f 828706182e2f 828706182e2f 0d981269f3d6 0d981269f3d6 0d981269f3d6 828706182e2f 828706182e2f 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 828706182e2f 828706182e2f 828706182e2f 828706182e2f 828706182e2f 828706182e2f 828706182e2f 828706182e2f 828706182e2f 828706182e2f 828706182e2f 828706182e2f 0d981269f3d6 828706182e2f 828706182e2f 828706182e2f 828706182e2f 828706182e2f 828706182e2f 828706182e2f 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6 0d981269f3d6  | <?php
/*
 * Copyright 2011 Nathan Gelderloos, Ethan Zonca, Nathan Phillip Brink
 *
 * This file is part of SlatePermutate.
 *
 * SlatePermutate is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * SlatePermutate is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with SlatePermutate.  If not, see <http://www.gnu.org/licenses/>.
 */
/** Filter out whitepace items */
function umich_arrayfilter_callback($item)
{
  if(ltrim($item) == '')
    return TRUE;
  else
    return TRUE;
}
/** Parse html at URL into array, first row is row headers */
function umich_table_parse($url)
{
  $arr = array();
  $dom = new DOMDocument;
  $html = file_get_contents($url);
  if(!$html){
    return 1;
  }
  $dom->loadHTML($html);
  $dom->preserveWhiteSpace = false;
  $tables = $dom->getElementsByTagName('table');
  $rows = $tables->item(3)->getElementsByTagName('tr'); // Get first table on page 
  foreach ($rows as $rownum => $row) {
    if($rownum > 5) {
      $cols = $row->getElementsByTagName('td');
      foreach($cols as $colnum => $col){
        $arr[$rownum][$colnum] = $col->nodeValue;
      }
    }
  }
  foreach($arr as &$item) {
    $item = array_filter($item, "umich_arrayfilter_callback");
  }
  $arr = array_values($arr); // Reindex array
 
  // Strip navigation and trailing garbage
  $arr[count($arr)-3] = NULL;
  $arr[count($arr)-2] = NULL;
  $arr[count($arr)-1] = NULL;
  $arr = array_filter($arr);
  return $arr;
}
/**
 * \brief
 *  Crawls University of Michigan's schedule.
 *
 * \param $semesters
 *   An array to be filled with semesters.
 * \param $school_crawl_log
 *   The school_crawl_log handle.
 * \return
 *   1 on failure, 0 on success.
 */
function umich_crawl(array &$semesters, $school_crawl_log)
{
  $url = 'http://lsa.umich.edu/cg/cg_advsearch.aspx';
  $cookies = array();
  /* determine list of semesters: */
  $semesters_dom = new DOMDocument();
  $semesters_dom->loadHTML(school_crawl_geturi($url, $cookies, $school_crawl_log));
  $year = substr($semester->year_get(), 2);
  $season = strtolower(substr($semester->season_get(), 0, 1));
  /* Current academic departments. Update as needed. */
  $departments = array('AAPTIS','ACABS','AERO','AEROSP','AMCULT','ANTHRARC','ANTHRBIO','ANTHRCUL','AOSS','APPPHYS','ARCH','ARMENIAN','ARTDES','ASIAN','ASIANLAN','ASTRO','AUTO','BCS','BIOINF','BIOLCHEM','BIOLOGY','BIOMEDE','BIOPHYS','CAAS','CEE','CHE','CHEM','CIC','CICS','CJS','CLARCH','CLCIV','CMPLXSYS','COMM','COMP','COMPLIT','CSP','CZECH','DANCE','DUTCH','ECON','EDCURINS','EDUC','EEB','EECS','ELI','ENGLISH','ENGR','ENSCEN','ENVIRON','ESENG','FRENCH','GEOG','GEOSCI','GERMAN','GREEK','GTBOOKS','HBEHED','HISTART','HISTORY','HJCS','HMP','HONORS','INTMED','IOE','ITALIAN','JAZZ','JUDAIC','KINESLGY','LACS','LATIN','LHC','LHSP','LING','MACROMOL','MATH','MATSCIE','MCDB','MECHENG','MEDADM','MEDCHEM','MEMS','MENAS','MFG','MICROBIOL','MILSCI','MKT','MODGREEK','MOVESCI','MUSEUMS','MUSICOL','MUSMETH','MUSTHTRE','NAVARCH','NAVSCI','NERS','NEUROSCI','NRE','NURS','OMS','ORGSTUDY','PAT','PATH','PHARMACY','PHIL','PHRMACOL','PHYSICS','PHYSIOL','POLISH','POLSCI','PORTUG','PSYCH','PUBHLTH','PUBPOL','RCARTS','RCCORE','RCHUMS','RCIDIV','RCLANG','RCNSCI','RCSSCI','REEES','RELIGION','ROMLANG','ROMLING','RUSSIAN','SAC','SAS','SCAND','SEAS','SI','SLAVIC','SOC','SPANISH','STATS','STDABRD','SWC','TCHNCLCM','THEORY','THTREMUS','UC','UKRAINE','UP','WOMENSTD','YIDDISH');
  $basepath = "http://www.lsa.umich.edu/cg/cg_results.aspx";
  $yearsyn = 1800 + $year; // Weird year synonym name where 2000 == 1800
  $basepath .= "?termArray={$season}_{$year}_${yearsyn}&cgtype=ug";
  $season = strtolower($season);
  $tables = array();
  foreach($departments as $department) {
   $tables[$department] = umich_table_parse($basepath . '&department=' . $department . '&allsections=true&show=1000');
  }
  return $tables;
}
 |