Changeset - 136360f35050
[Not reviewed]
default
0 1 0
Nathan Brink (binki) - 12 years ago 2013-04-23 15:58:12
ohnobinki@ohnopublishing.net
Fix WebAdvisor (particularly cornerstone) crawling when some searches by semester+department are specific enough to return no results.
1 file changed with 9 insertions and 0 deletions:
0 comments (0 inline, 0 general)
inc/school.crawl.webadvisor.inc
Show inline comments
 
@@ -427,24 +427,33 @@ function school_crawl_webadvisor_semeste
 

	
 
  $skipped_sections = array('incomplete meeting info' => 0, 'invalid meeting info format' => 0);
 
  /*
 
   * pages is populated by preg_match() below after the first looping.
 
   */
 
  $pages = array(1 => 0, 2 => 1);
 
  while ($pages[1] < $pages[2])
 
    {
 
      $html = school_crawl_webadvisor_noscript_filter(school_crawl_geturi($uri, $cookies, $school_crawl_log, $form, FALSE, $options['curlsetup_hook']));
 

	
 
      $results_dom = new DOMDocument();
 
      $results_dom->loadHTML($html);
 
      $results_xpath = new DOMXPath($results_dom);
 

	
 
      $results_errorText = $results_xpath->query('//div[@class="errorText"]');
 
      if ($results_errorText->length)
 
	{
 
	  school_crawl_logf($school_crawl_log, 6, "Skipping a page because ā€œ%sā€.", $results_errorText->item(0)->textContent);
 
	  break;
 
	}
 

	
 
      $results_form = _school_crawl_webadvisor_form($results_dom, $school_crawl_log);
 
      if (empty($results_form))
 
	return 1;
 

	
 
      $list_done = FALSE;
 
      for ($list_row = 1; !$list_done; $list_row ++)
 
	{
 
	  /* either 'Open' (or 'Closed'?) */
 
	  $openness = empty($results_form['LIST.VAR1_' . $list_row]) ? NULL : reset($results_form['LIST.VAR1_' . $list_row]);
 
	  $sec_short_title = _school_crawl_webadvisor_dom_id_content($results_dom, 'SEC_SHORT_TITLE_' . $list_row);
 
	  $sec_meetings_info = _school_crawl_webadvisor_dom_id_content($results_dom, 'SEC_MEETING_INFO_' . $list_row);
 

	
0 comments (0 inline, 0 general)