Changeset - 39cea00b7ea4
[Not reviewed]
default
0 1 0
Ethan Zonca (ethanzonca) - 15 years ago 2010-11-13 20:40:08
e@ethanzonca.com
Cedarville crawler: make output cleaner
1 file changed with 5 insertions and 0 deletions:
0 comments (0 inline, 0 general)
school.d/cedarville.inc
Show inline comments
 
@@ -69,12 +69,13 @@ function cedarville_default_classes()
 
 *
 
 * \param $html
 
 *   HTML that PHP's DOM would willingly would eat.
 
 */
 
function table_parse($html)
 
{
 
  libxml_use_internal_errors(true); // Suppress warnings
 
  $arr = array();
 
  $dom = new DOMDocument;
 
  if(!$html)
 
    return NULL;
 

	
 
  $dom->loadHTML($html);
 
@@ -90,23 +91,27 @@ function table_parse($html)
 
  return $arr;
 
}
 

	
 
/** Crawls Cedarville course listings. $season is "fa" or "sp", year is 4-digit year */
 
function cedarville_crawl($semester, $verbosity = 1)
 
{  
 

	
 
  $season = strtolower(substr($semester->season_get(), 0, 2));
 
  $year = $semester->year_get();
 

	
 
  /* Current academic departments. Update as needed. */
 
  $departments = array('be','ba','ca','ed','eg','es','hg','id','ll','ms','mu','ns','ph','py','sm','sw');
 
  $basepath = "http://cedarville.edu/courses/schedule/";
 

	
 
  echo "cedarville_crawl(): Beginning crawl of Cedarville:\n";
 

	
 
  $season = strtolower($season);
 
  $tables = array();
 
  foreach($departments as $department)
 
    {
 
      echo "cedarville_crawl(): Crawling department \"$department\"...\n";
 
      $html = file_get_contents($basepath . $year . $season . '_' . $department . '_' . 'all.htm');
 
      if (!$html)
 
	continue;
 
      $tables[$department] = table_parse(cedarville_html_fix($html));
 
    }
 

	
0 comments (0 inline, 0 general)