Changeset - c9c12e4e603a
[Not reviewed]
default
0 1 0
Nathan Brink (binki) - 14 years ago 2012-02-16 10:08:56
ohnobinki@ohnopublishing.net
Set the User-Agent header in our crawler.
1 file changed with 1 insertions and 0 deletions:
0 comments (0 inline, 0 general)
inc/school.crawl.inc
Show inline comments
 
@@ -282,48 +282,49 @@ function school_crawl_meeting_type($meet
 
 *   associative array of form keys/values.
 
 * \param $follow_meta_refresh
 
 *   Parse the resultant HTML with http://docs.php.net/dom and if it
 
 *   contains a line that looks like ``<meta http-equiv="Refresh" content="0; url=https://simon.ccbcmd.edu/pls/PROD/bwckschd.p_disp_dyn_sched">'',
 
 *   follow that URL.
 
 * \param $curlsetup_hook
 
 *   A function which is passed a curl handle which allows the caller
 
 *   to do silly things like setting CURLOPT_SSLVERSION for silly
 
 *   sites like ccbcmd's registration site.
 
 * \param $loopspin
 
 *   An internal variable to prevent us from following perpetual
 
 *   redirects.
 
 * \return
 
 *   The body of the document returned by the server (normally
 
 *   malformed HTML, especially with Calvin's WebAdvisor
 
 *   installation).
 
 */
 
function school_crawl_geturi(&$uri, &$cookies, array &$school_crawl_log, $post = NULL, $follow_meta_refresh = FALSE, $curlsetup_hook = NULL, $loopspin = 0)
 
{
 
  global $school_crawl_geturi_write_buf, $school_crawl_geturi_headers_buf;
 

	
 
  school_crawl_logf($school_crawl_log, 7, "school_crawl_geturi('%s').", $uri);
 

	
 
  $curl = curl_init();
 
  curl_setopt($curl, CURLOPT_USERAGENT, SP_PACKAGE_NAME . '/' . SP_PACKAGE_VERSION);
 

	
 
  if ($curlsetup_hook !== NULL)
 
    $curlsetup_hook($curl);
 

	
 
  $school_crawl_geturi_write_buf = '';
 
  $school_crawl_geturi_headers_buf = '';
 
  curl_setopt($curl, CURLOPT_URL, $uri);
 

	
 
  $cookies_str = '';
 
  foreach ($cookies as $key => $val)
 
    {
 
      if (strlen($cookies_str))
 
	$cookies_str .= ';';
 
      $cookies_str .= $key . '=' . $val;
 
    }
 

	
 
  school_crawl_logf($school_crawl_log, 10, "cookies sent: %s", $cookies_str);
 
  curl_setopt($curl, CURLOPT_COOKIE, $cookies_str);
 
  curl_setopt($curl, CURLOPT_HEADERFUNCTION, 'school_crawl_geturi_header_cb');
 
  curl_setopt($curl, CURLOPT_WRITEFUNCTION, 'school_crawl_geturi_write_cb');
 

	
 
  if ($post != NULL && is_array($post))
 
    {
 

	
0 comments (0 inline, 0 general)