Changeset - 14764b931639
[Not reviewed]
default
0 3 0
ethanzonca - 15 years ago 2010-10-10 20:16:50

Added initial Cedarville crawler
3 files changed with 44 insertions and 4 deletions:
0 comments (0 inline, 0 general)
inc/class.page.php
Show inline comments
 
@@ -167,9 +167,13 @@ class page
 
    echo '</div> <!-- id="content" -->';
 
    $this->pageGenTime = round(microtime(), 3);
 
    echo '  <div id="footer">
 
          <h5>&copy; '. date('Y').' <a href="http://protofusion.org/~nathang/">Nathan Gelderloos</a><br />
 
            <a href="http://ethanzonca.com">Ethan Zonca</a>
 
          </h5>
 
  	    <div id="leftfoot" style="float:left; margin-top: 1em;">
 
		<a href="feedback.php">Submit Feedback</a>
 
            </div>
 
            <div id="rightfoot"><h5>&copy; '. date('Y').' <a href="http://protofusion.org/~nathang/">Nathan Gelderloos</a><br />
 
              <a href="http://ethanzonca.com">Ethan Zonca</a>
 
            </h5>
 
	  </div>
 
        </div> <!-- id="footer" -->
 
      </div>';
 
    echo $this->trackingcode;
school.d/cedarville.inc
Show inline comments
 
@@ -27,3 +27,39 @@ function cedarville_instructions_html()
 
</ol>
 
EOF;
 
}
 

	
 
/** Parse html at URL into array, first row is row headers */
 
function table_parse($url) {
 
  $arr = array();
 
  $dom = new DOMDocument;
 
  $html = file_get_contents($url);
 
  if(!$html){
 
    return 1;
 
  }
 
  $dom->loadHTML($html);
 
  $dom->preserveWhiteSpace = false;
 
  $tables = $dom->getElementsByTagName('table');
 
  $rows = $tables->item(0)->getElementsByTagName('tr'); // Get first table on page 
 
  foreach ($rows as $rownum => $row) {
 
    $cols = $row->getElementsByTagName('td');
 
    foreach($cols as $colnum => $col){
 
      $arr[$rownum][$colnum] = $col->nodeValue;
 
    }
 
  }
 
  return $arr;
 
}
 

	
 
/** Crawls Cedarville course listings. $season is "fa" or "sp", year is 4-digit year */
 
function cedarville_crawl($season, $year) {
 
  /* Current academic departments. Update as needed. */
 
  $departments = array('be','ba','ca','ed','eg','es','hg','id','ll','ms','mu','ns','ph','py','sm','sw');
 
  $basepath = "http://cedarville.edu/courses/schedule/";
 

	
 
  $season = strtolower($season);
 
  $tables = array();
 
  foreach($departments as $department) {
 
    $tables[$department] = table_parse($basepath . $year . $season . '_' . $department . '_' . 'all.htm');
 
  }
 
  return $tables;
 
}
 

	
styles/general.css
Show inline comments
 
@@ -166,7 +166,7 @@ td.center {
 
/* General Classes */
 

	
 
.clear {
 
  clear: all;
 
  clear: both;
 
}
 
.noborder {
 
  border: none!important;
0 comments (0 inline, 0 general)