Changeset - 3487a5b3cbfd
[Not reviewed]
default
0 1 0
Nathan Brink (binki) - 13 years ago 2012-10-01 20:19:09
ohnobinki@ohnopublishing.net
Fix some crawler utility functions to handle relative URI resolving correctly and be more flexible when parsing days.
1 file changed with 13 insertions and 4 deletions:
0 comments (0 inline, 0 general)
inc/school.crawl.inc
Show inline comments
 
@@ -209,29 +209,36 @@ function school_crawl_days_format(array 
 
 * \brief
 
 *   Take a string of day initials and format it.
 
 *
 
 * \param $school_crawl_log
 
 *   The school_crawl_log handle to write errors out to.
 
 * \param $days_str
 
 *   Example input: 'mwf', 'TR'.
 
 * \return
 
 *   Same as school_crawl_days_format()
 
 */
 
function school_crawl_days_str_format(array $school_crawl_log, $days_str)
 
{
 
  $day_initials = array();
 
  for ($i = 0; $i < strlen($days_str); $i ++)
 
    $day_initials[] = $days_str[$i];
 
  $days_str = preg_replace('/[^[:alnum:]]+/', '', $days_str);
 

	
 
  return school_crawl_days_format($school_crawl_log, $day_initials);
 
  /*
 
   * str_split() will produce an array with an empty string in it if
 
   * the input string is empty. We just want an empty array in that
 
   * case, but also if there are no input days we can just shortcut
 
   * and return no days.
 
   */
 
  if (empty($days_str))
 
    return '';
 

	
 
  return school_crawl_days_format($school_crawl_log, str_split($days_str));
 
}
 

	
 
/**
 
 * \brief
 
 *   Try to guess a more standardized section_meeting type.
 
 *
 
 * \param $meeting_type
 
 *   The upstream's meeting_type, such as 'LEC', 'lec', 'LAB',
 
 *   etc. New mappings should be added to this function as long as
 
 *   they are general enough.
 
 */
 
function school_crawl_meeting_type($meeting_type = 'lecture')
 
@@ -595,24 +602,26 @@ function school_crawl_url($orig_url, $ur
 
      }
 

	
 
  /* check if we have an absolute relative path */
 
  if (!strncmp($url, '/', 1))
 
    $new_url['path'] = $url;
 

	
 
  /* relative */
 
  while (!strncmp($url, '../', 3))
 
    {
 
      $new_url['path'] = preg_replace(';[^/]+/[^/]+$;', '/', dirname($new_url['path']));
 
      $url = substr($url, 3);
 
    }
 
  if (strncmp($url, '/', 1) && strpos($url, '://') === FALSE)
 
    $new_url['path'] .= $url;
 

	
 
  return $new_url['schema'] . '://' . $new_url['hostname'] . $new_url['path'];
 
}
 

	
 
/**
 
 * \brief
 
 *   Map a name onto a column of the table with the help of <th />.
 
 *
 
 * This should be a quite reliable way of matching the data that a
 
 * user sees onto the actual data because, in most cases, HTML writers
 
 * are forced to properly align <th /> and the following hundreds of
 
 * <td />s for there to be a visual alignment.
0 comments (0 inline, 0 general)