# HG changeset patch # User normaldotcom # Date 2010-11-13 20:40:08 # Node ID 39cea00b7ea4f41d36f19afe179be343af6362ad # Parent 7a6777d84d07014b5ff1df44ac27fc98d2ad8a3b Cedarville crawler: make output cleaner diff --git a/school.d/cedarville.inc b/school.d/cedarville.inc --- a/school.d/cedarville.inc +++ b/school.d/cedarville.inc @@ -72,6 +72,7 @@ function cedarville_default_classes() */ function table_parse($html) { + libxml_use_internal_errors(true); // Suppress warnings $arr = array(); $dom = new DOMDocument; if(!$html) @@ -93,6 +94,7 @@ function table_parse($html) /** Crawls Cedarville course listings. $season is "fa" or "sp", year is 4-digit year */ function cedarville_crawl($semester, $verbosity = 1) { + $season = strtolower(substr($semester->season_get(), 0, 2)); $year = $semester->year_get(); @@ -100,10 +102,13 @@ function cedarville_crawl($semester, $ve $departments = array('be','ba','ca','ed','eg','es','hg','id','ll','ms','mu','ns','ph','py','sm','sw'); $basepath = "http://cedarville.edu/courses/schedule/"; + echo "cedarville_crawl(): Beginning crawl of Cedarville:\n"; + $season = strtolower($season); $tables = array(); foreach($departments as $department) { + echo "cedarville_crawl(): Crawling department \"$department\"...\n"; $html = file_get_contents($basepath . $year . $season . '_' . $department . '_' . 'all.htm'); if (!$html) continue;