# HG changeset patch # User ethanzonca # Date 2010-10-10 20:16:50 # Node ID 14764b9316398e8635b12b1220897678f508576c # Parent f393b6d03cb1d42458a6a7c7352160134ee1d799 Added initial Cedarville crawler diff --git a/inc/class.page.php b/inc/class.page.php --- a/inc/class.page.php +++ b/inc/class.page.php @@ -167,9 +167,13 @@ class page echo ' '; $this->pageGenTime = round(microtime(), 3); echo '
'; echo $this->trackingcode; diff --git a/school.d/cedarville.inc b/school.d/cedarville.inc --- a/school.d/cedarville.inc +++ b/school.d/cedarville.inc @@ -27,3 +27,39 @@ function cedarville_instructions_html() EOF; } + +/** Parse html at URL into array, first row is row headers */ +function table_parse($url) { + $arr = array(); + $dom = new DOMDocument; + $html = file_get_contents($url); + if(!$html){ + return 1; + } + $dom->loadHTML($html); + $dom->preserveWhiteSpace = false; + $tables = $dom->getElementsByTagName('table'); + $rows = $tables->item(0)->getElementsByTagName('tr'); // Get first table on page + foreach ($rows as $rownum => $row) { + $cols = $row->getElementsByTagName('td'); + foreach($cols as $colnum => $col){ + $arr[$rownum][$colnum] = $col->nodeValue; + } + } + return $arr; +} + +/** Crawls Cedarville course listings. $season is "fa" or "sp", year is 4-digit year */ +function cedarville_crawl($season, $year) { + /* Current academic departments. Update as needed. */ + $departments = array('be','ba','ca','ed','eg','es','hg','id','ll','ms','mu','ns','ph','py','sm','sw'); + $basepath = "http://cedarville.edu/courses/schedule/"; + + $season = strtolower($season); + $tables = array(); + foreach($departments as $department) { + $tables[$department] = table_parse($basepath . $year . $season . '_' . $department . '_' . 'all.htm'); + } + return $tables; +} + diff --git a/styles/general.css b/styles/general.css --- a/styles/general.css +++ b/styles/general.css @@ -166,7 +166,7 @@ td.center { /* General Classes */ .clear { - clear: all; + clear: both; } .noborder { border: none!important;