diff --git a/school.d/cedarville.inc b/school.d/cedarville.inc --- a/school.d/cedarville.inc +++ b/school.d/cedarville.inc @@ -27,3 +27,39 @@ function cedarville_instructions_html() EOF; } + +/** Parse html at URL into array, first row is row headers */ +function table_parse($url) { + $arr = array(); + $dom = new DOMDocument; + $html = file_get_contents($url); + if(!$html){ + return 1; + } + $dom->loadHTML($html); + $dom->preserveWhiteSpace = false; + $tables = $dom->getElementsByTagName('table'); + $rows = $tables->item(0)->getElementsByTagName('tr'); // Get first table on page + foreach ($rows as $rownum => $row) { + $cols = $row->getElementsByTagName('td'); + foreach($cols as $colnum => $col){ + $arr[$rownum][$colnum] = $col->nodeValue; + } + } + return $arr; +} + +/** Crawls Cedarville course listings. $season is "fa" or "sp", year is 4-digit year */ +function cedarville_crawl($season, $year) { + /* Current academic departments. Update as needed. */ + $departments = array('be','ba','ca','ed','eg','es','hg','id','ll','ms','mu','ns','ph','py','sm','sw'); + $basepath = "http://cedarville.edu/courses/schedule/"; + + $season = strtolower($season); + $tables = array(); + foreach($departments as $department) { + $tables[$department] = table_parse($basepath . $year . $season . '_' . $department . '_' . 'all.htm'); + } + return $tables; +} +