. */ /** Filter out whitepace items */ function umich_arrayfilter_callback($item) { if(ltrim($item) == '') return TRUE; else return TRUE; } /** Parse html at URL into array, first row is row headers */ function umich_table_parse($url) { $arr = array(); $dom = new DOMDocument; $html = file_get_contents($url); if(!$html){ return 1; } $dom->loadHTML($html); $dom->preserveWhiteSpace = false; $tables = $dom->getElementsByTagName('table'); $rows = $tables->item(3)->getElementsByTagName('tr'); // Get first table on page foreach ($rows as $rownum => $row) { if($rownum > 5) { $cols = $row->getElementsByTagName('td'); foreach($cols as $colnum => $col){ $arr[$rownum][$colnum] = $col->nodeValue; } } } foreach($arr as &$item) { $item = array_filter($item, "umich_arrayfilter_callback"); } $arr = array_values($arr); // Reindex array // Strip navigation and trailing garbage $arr[count($arr)-3] = NULL; $arr[count($arr)-2] = NULL; $arr[count($arr)-1] = NULL; $arr = array_filter($arr); return $arr; } /** * \brief * Crawls University of Michigan's schedule. * * \param $semesters * An array to be filled with semesters. * \param $school_crawl_log * The school_crawl_log handle. * \return * 1 on failure, 0 on success. */ function umich_crawl(array &$semesters, $school_crawl_log) { $url = 'http://lsa.umich.edu/cg/cg_advsearch.aspx'; $cookies = array(); /* determine list of semesters: */ $semesters_dom = new DOMDocument(); $semesters_dom->loadHTML(school_crawl_geturi($url, $cookies, $school_crawl_log)); $year = substr($semester->year_get(), 2); $season = strtolower(substr($semester->season_get(), 0, 1)); /* Current academic departments. Update as needed. */ $departments = array('AAPTIS','ACABS','AERO','AEROSP','AMCULT','ANTHRARC','ANTHRBIO','ANTHRCUL','AOSS','APPPHYS','ARCH','ARMENIAN','ARTDES','ASIAN','ASIANLAN','ASTRO','AUTO','BCS','BIOINF','BIOLCHEM','BIOLOGY','BIOMEDE','BIOPHYS','CAAS','CEE','CHE','CHEM','CIC','CICS','CJS','CLARCH','CLCIV','CMPLXSYS','COMM','COMP','COMPLIT','CSP','CZECH','DANCE','DUTCH','ECON','EDCURINS','EDUC','EEB','EECS','ELI','ENGLISH','ENGR','ENSCEN','ENVIRON','ESENG','FRENCH','GEOG','GEOSCI','GERMAN','GREEK','GTBOOKS','HBEHED','HISTART','HISTORY','HJCS','HMP','HONORS','INTMED','IOE','ITALIAN','JAZZ','JUDAIC','KINESLGY','LACS','LATIN','LHC','LHSP','LING','MACROMOL','MATH','MATSCIE','MCDB','MECHENG','MEDADM','MEDCHEM','MEMS','MENAS','MFG','MICROBIOL','MILSCI','MKT','MODGREEK','MOVESCI','MUSEUMS','MUSICOL','MUSMETH','MUSTHTRE','NAVARCH','NAVSCI','NERS','NEUROSCI','NRE','NURS','OMS','ORGSTUDY','PAT','PATH','PHARMACY','PHIL','PHRMACOL','PHYSICS','PHYSIOL','POLISH','POLSCI','PORTUG','PSYCH','PUBHLTH','PUBPOL','RCARTS','RCCORE','RCHUMS','RCIDIV','RCLANG','RCNSCI','RCSSCI','REEES','RELIGION','ROMLANG','ROMLING','RUSSIAN','SAC','SAS','SCAND','SEAS','SI','SLAVIC','SOC','SPANISH','STATS','STDABRD','SWC','TCHNCLCM','THEORY','THTREMUS','UC','UKRAINE','UP','WOMENSTD','YIDDISH'); $basepath = "http://www.lsa.umich.edu/cg/cg_results.aspx"; $yearsyn = 1800 + $year; // Weird year synonym name where 2000 == 1800 $basepath .= "?termArray={$season}_{$year}_${yearsyn}&cgtype=ug"; $season = strtolower($season); $tables = array(); foreach($departments as $department) { $tables[$department] = umich_table_parse($basepath . '&department=' . $department . '&allsections=true&show=1000'); } return $tables; }