Changeset - 30e159e1b597
[Not reviewed]
default
0 1 0
Nathan Brink (binki) - 14 years ago 2012-02-15 23:23:08
ohnobinki@ohnopublishing.net
Add support to cedarville's crawler for retreiving friendly subject/department names for the autocomplete user.
1 file changed with 29 insertions and 0 deletions:
0 comments (0 inline, 0 general)
school.d/cedarville.crawl.inc
Show inline comments
 
@@ -299,48 +299,77 @@ function cedarville_crawl_semester(array
 
		    {
 
		      $semester->time_start_set_test($date_start);
 
		      $semester->time_end_set_test($date_end);
 
		    }
 
		}
 

	
 
	      /*
 
	       * The tables are made for humans, not computers. If
 
	       * there aren't enough instructors for the number of
 
	       * section meetings, just reuse the first listed
 
	       * instructor:
 
	       */
 
	      if ($meeting_i >= count($instructors))
 
		$instructors[$meeting_i] = $instructors[0];
 

	
 
	      $meetings[] = new SectionMeeting($days, $time_start, $time_end,
 
					       $room, $type, $instructors[$meeting_i]);
 

	
 
	      $meeting_i ++;
 
	    }
 

	
 
	  $semester->section_add($section_parts['department'], $section_parts['course'],
 
				 new Section($section_parts['section'], $meetings,
 
					     $synonym, $credit_hours), $title);
 

	
 
	  /*
 
	   * Get the full subject's name from the course's page if we
 
	   * don't have it already.
 
	   */
 
	  if (!$semester->department_name_has($section_parts['department']))
 
	    {
 
	      foreach ($course_table[1]->childNodes as $course_a)
 
		if ($course_a instanceof DOMElement
 
		&& $course_a->tagName == 'a')
 
		  break;
 
	      if ($course_a instanceof DOMElement
 
		  && $course_a->tagName == 'a'
 
		  && strlen($course_href = $course_a->getAttribute('href')))
 
		{
 
		  $course_uri = school_crawl_url($uri, $course_href);
 
		  $course_html = school_crawl_geturi($course_uri, $cookies, $school_crawl_log);
 
		  if (!empty($course_html))
 
		    {
 
		      $course_dom = new DOMDocument();
 
		      $course_dom->loadHTML($course_html);
 
		      if ($subject_td = $course_dom->getElementById('main_0_subjectLink'))
 
			{
 
			  $subject_name = preg_replace('/ *\\[[A-Z]*\\]$/', '', $subject_td->nodeValue);
 
			  $semester->department_name_set($section_parts['department'], $subject_name);
 
			}
 
		    }
 
		}
 
	    }
 
	}
 
    }
 

	
 
  return 0;
 
}
 

	
 
/**
 
 * \brief
 
 *   Look up the URI used to access information about a particular
 
 *   Cedarville semester.
 
 *
 
 * \param $semester
 
 *   The semester whose URI is being retrieved.
 
 * \param $document
 
 *   Optional DOMDocument of the Cedarville semester listing page, to
 
 *   aid seeding the cache. To prime the cache, just set $semester to
 
 *   NULL and pass in $document.
 
 * \return
 
 *   The URI for that semester's courses relative to
 
 *   CEDARVILLE_BASE_URI.
 
 */
 
function cedarville_semester_uri(Semester $semester = NULL, &$school_crawl_log, DOMDocument $document = NULL)
 
{
 
  static $semester_to_uri = array();
0 comments (0 inline, 0 general)