* * This file is a part of slate_permutate. * * slate_permutate is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * slate_permutate is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with slate_permutate. If not, see . */ $inc_dir = dirname(__FILE__) . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'inc' . DIRECTORY_SEPARATOR; require_once($inc_dir . 'schedule_store.inc'); require_once($inc_dir . 'class.semester.inc'); require_once($inc_dir . 'school.inc'); require_once($inc_dir . 'school.crawl.inc'); /** * \file * Functions which are only needed when recreating the cache. */ /** * \brief * Returns the list of available school IDs or NULL on error. */ function school_list() { $schoold_dir_name = dirname(__FILE__) . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'school.d'; $schoold_dir = opendir($schoold_dir_name); if ($schoold_dir === FALSE) { fprintf(STDERR, "Unable to open school.d directory. Was using path: `%s'\n", $schoold_dir_Name); return NULL; } $school_id_list = array(); while ($filename = readdir($schoold_dir)) { if (!preg_match('/^([a-z0-9]+)\.inc$/', $filename, $matches)) continue; $school_id_list[] = $matches[1]; } closedir($schoold_dir); return $school_id_list; } /** * \brief * Compare the two schools by their names. * * \see strcmp() */ function school_cmp($school_a, $school_b) { return strcmp($school_a['name'], $school_b['name']); } /** * \brief * Write out the cache file which remembers the list of available * schools. * * \param $schools * An array of school handles. */ function school_cache($schools) { $list_cache = array(); $domain_cache = array(); $cache_dir_name = dirname(__FILE__) . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'cache' . DIRECTORY_SEPARATOR; $cache_auto_dir_name = $cache_dir_name . 'auto' . DIRECTORY_SEPARATOR; foreach ($schools as $school) { $list_cache[$school['id']] = array( 'name' => $school['name'], 'url' => $school['url'], 'crawled' => !empty($school['crawled']), ); foreach ($school['domains'] as $school_domain) { $domain_cache_ptr =& $domain_cache; $domain_parts = array_reverse(explode('.', $school_domain)); while (count($domain_parts) > 1) { $domain_part = array_shift($domain_parts); if (!isset($domain_cache_ptr[$domain_part]) || !is_array($domain_cache_ptr[$domain_part])) $domain_cache_ptr[$domain_part] = array(); $domain_cache_ptr =& $domain_cache_ptr[$domain_part]; } /* * get the last part which is unambiguously identifies this * school combined with the previous parts */ $domain_part = array_shift($domain_parts); $domain_cache_ptr[$domain_part] = $school['id']; } /* * autocomplete stuff -- per school * * We don't do anything if crawled_notreally is set because this * way we can get incremental crawling. Really useful if one's * just debugging one of the school crawling scripts and doesn't * want to run all crawlers ;-). */ if (!empty($school['crawled']) && !isset($school['crawled_notreally'])) { $cache_auto_school_dir_name = $cache_auto_dir_name . $school['id'] . DIRECTORY_SEPARATOR; if (!is_dir($cache_auto_school_dir_name)) { if (!mkdir($cache_auto_school_dir_name, 0755, TRUE)) error_log('Unable to create needed directory: `' . $cache_auto_dir_name . '\''); } $semesters = array(); $semester_weights = 0; /* * Try to presort the semesters into the proper order based * on time_start/time_end. We want the older semesters to be * nearer to the end of the list. This way, the crawler * doesn't have to decide how to sort the semesters itself: */ usort($semesters, 'school_cache_semesters_sort'); foreach ($school['crawled_semesters'] as $semester) { $semesters[$semester->id()] = array( 'id' => $semester->id(), 'time_start' => $semester->time_start_get(), 'time_end' => $semester->time_end_get(), 'weight' => $semester_weights ++, 'name' => $semester->name_get(), ); } /* foreach ( => $semester) */ /* * Store/cache the semester metadata: */ $semesters_file = fopen($cache_auto_school_dir_name . '-semesters', 'wb'); fwrite($semesters_file, serialize($semesters)); fclose($semesters_file); } } uasort($list_cache, 'school_cmp'); $cache = array('list' => $list_cache, 'domains' => $domain_cache); $cache_file_name = $cache_dir_name . 'schools'; $cache_file = fopen($cache_file_name, 'wb'); if ($cache_file === FALSE) { fprintf(STDERR, "Unable to open `%s' for writing\n", $cache_file_name); return 1; } fwrite($cache_file, serialize($cache)); fclose($cache_file); return 0; } /** * \brief * To support usort() call in school_cache() to sort semesters. */ function school_cache_semesters_sort (Semester $sem_a, Semester $sem_b) { return $sem_a->time_start_get() - $sem_b->time_start_get(); } /** * \brief * Invoke a school's registration data crawler. * * Each school may export registration data on publically accessible * websites. Thus, we populate some autocomplete information by * crawling these pages and storing the information in a special set * of caches. * * Because crawling code can be non-trivial, it should be separated * from a school's main .inc file. Thus, if a school supports * crawling, it will have a file called * schools.d/.crawl.inc. In this file, a function called * _crawl(array &$semesters, $verbosity = 1) must be * defined. It must accept at least one argument, the array to be * filled with Semester objects. It must populate this array with * individual Semester objects and fill those with Course objects and * populate those courses with the sections with as much detail as * possible. This function may return 1 to indicate an error must * return 0 to indicate success. * * If the crawling is successful, a 'crawled' key is added to the * $school handle. school_cache() will use this to help indicate that * a school _has_ autocomplete information, which might affect the * appearance and JS stuff for the input.php page. * * \param $school * The school which should be checked for crawl functionality and * crawled. * \param $page * The Page object for which HTML formatted logs should be outputted * \param $verbosity * How verbose to be. Sane values are from 0 through 10. * \return * A school_crawl_log handle, upopn which school_crawl_log_fetch() * may be used. */ function school_crawl(array &$school, Page $page = NULL, $verbosity = 1) { $cache_dir_name = dirname(__FILE__) . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'cache' . DIRECTORY_SEPARATOR; $cache_auto_dir_name = $cache_dir_name . 'auto' . DIRECTORY_SEPARATOR; $cache_auto_school_dir_name = $cache_auto_dir_name . $school['id'] . DIRECTORY_SEPARATOR; if (!is_dir($cache_auto_school_dir_name)) { if (!mkdir($cache_auto_school_dir_name, 0755, TRUE)) error_log('Unable to create needed directory: `' . $cache_auto_dir_name . '\''); } $school['crawled'] = FALSE; $school_crawl_semesters_list_func = $school['id'] . '_crawl_semester_list'; if (!function_exists($school_crawl_semesters_list_func)) return; $school_crawl_log_opts = array('verbosity' => $verbosity); if (defined('STDERR')) $school_crawl_log_opts['stream'] = STDERR; if ($page !== NULL) $school_crawl_log_opts['page'] = $page; $school_crawl_log = school_crawl_log_init($school, $school_crawl_log_opts); $semesters = array(); if ($verbosity > 0) school_crawl_logf($school_crawl_log, 2, "Calling %s()...", $school_crawl_semesters_list_func); $ret = $school_crawl_semesters_list_func($school, $semesters, $school_crawl_log); if ($ret) { school_crawl_logf($school_crawl_log, 1, "Crawling %s failed: %s() returned nonzero", $school['id'], $school_crawl_semesters_list_func); school_crawl_logf($school_crawl_log, 6, ""); return; } $successful_semesters = array(); $school_crawl_semester_func = $school['id'] . '_crawl_semester'; if (!function_exists($school_crawl_semester_func)) { school_crawl_logf($school_crawl_log, 3, "%s() is defined but %s() isn't.", $school_crawl_semesters_list_func, $school_crawl_semester_func); return; } foreach ($semesters as $semester) { school_crawl_logf($school_crawl_log, 2, "Calling %s(%s)...", $school_crawl_semester_func, $semester); $ret = $school_crawl_semester_func($school, $semester, $school_crawl_log); if ($ret) { school_crawl_logf($school_crawl_log, 1, "Failed to crawl semester %s. Skipping semester.", $semester); continue; } /* * Write out this semester's cache now that we're here. */ $cache_auto_school_semester_dir_name = $cache_auto_school_dir_name . $semester->id() . DIRECTORY_SEPARATOR; if (!is_dir($cache_auto_school_semester_dir_name)) { if (!mkdir($cache_auto_school_semester_dir_name, 0755, TRUE)) error_log('Unable to create needed directory: `' . $cache_auto_school_semester_dir_name . '\''); } $departments = $semester->departments_get(); sort($departments); $dept_file = fopen($cache_auto_school_semester_dir_name . '-depts', 'wb'); fwrite($dept_file, serialize($departments)); fclose($dept_file); /* now per-department autocomplete */ foreach ($departments as $department) { $classes = $semester->department_classes_get($department); $classes_file = fopen($cache_auto_school_semester_dir_name . $department . '.sects', 'wb'); fwrite($classes_file, serialize($classes)); fclose($classes_file); /* now individual section informations, pre-JSON-ized */ foreach ($classes as $class) { if (!is_dir($cache_auto_school_semester_dir_name . $department)) mkdir($cache_auto_school_semester_dir_name . $department); $class_file = fopen($cache_auto_school_semester_dir_name . $department . DIRECTORY_SEPARATOR . $class, 'wb'); fwrite($class_file, json_encode($semester->class_get($department, $class)->to_json_array())); fclose($class_file); } } /* Purge the data written to disk from memory */ $semester->purge(); school_crawl_logf($school_crawl_log, 6, ""); $successful_semesters[] = $semester; } $school['crawled'] = TRUE; $school['crawled_semesters'] = $successful_semesters; return $school_crawl_log; } /** * \brief * Recreate/update the school section autocomplete cache. * * \param $crawl_only * If non-NULL, an array of school_ids to limit the cache recreation * to. Useful for when developing a certain school's crawling * function. * \param $verbosity * An integer indicating how loud to be. */ function school_cache_recreate($crawl_only = NULL, Page $page = NULL, $verbosity = 5) { $school_id_list = school_list(); if (!$school_id_list) { fprintf(STDERR, "error: Unable to load schools.\n"); return 1; } if ($crawl_only !== NULL) foreach ($crawl_only as $crawl_only_school_id) if (!in_array($crawl_only_school_id, $school_id_list)) { fprintf(STDERR, "error: Invalid school_id specified for crawling: %s\n", $crawl_only_school_id); return 1; } /* hide libxml errors from the console: */ libxml_use_internal_errors(TRUE); $schools = array(); $old_school_cache = _school_cache_load(); foreach ($school_id_list as $school_id) { $school = school_load($school_id, TRUE); if (!$school) { fprintf(STDERR, "Error loading school with school_id=%s\n", $school_id); return 1; } if ($crawl_only === NULL || in_array($school['id'], $crawl_only)) { $school_crawl_log = school_crawl($school, $page, $verbosity); } else { /* * try to allow incremental crawling by not wiping out old * data and preserving the cached $school['crawled']. */ if ($old_school_cache && isset($old_school_cache['list'][$school['id']])) { $old_school = $old_school_cache['list'][$school['id']]; $school['crawled'] = FALSE; if (isset($old_school['crawled'])) $school['crawled'] = $old_school['crawled']; if ($school['crawled']) $school['crawled_notreally'] = TRUE; } } $schools[] = $school; } if (school_cache($schools)) { fprintf(STDERR, "Error writing out school cache\n"); return 1; } return 0; } /** * \brief * Purge a range of saved_schedules. * * \param $schedule_store * The schedule_store handle for which a range of saved schedules * must be deleted. * \param $time_min * The minimum unix timestamp for the range of schedules to be * purged. * \param $time_max * The maximum unix timestamp for the range of schedules to be * purged or NULL for no limit. * \return * FALSE on failure, an integer indicating the number of deleted * saved_schedules on success. */ function schedule_store_purge_range($schedule_store, $time_min = 0, $time_max = NULL) { global $admin_enable_purge; $schedule_id_max = schedule_store_getmaxid($schedule_store); $num_deleted = 0; if (!$admin_enable_purge) return FALSE; for ($schedule_id = 0; $schedule_id < $schedule_id_max; $schedule_id ++) { $filename = $schedule_store['dir'] . DIRECTORY_SEPARATOR . $schedule_id; if (!file_exists($filename)) continue; $statbuf = stat($filename); if ($statbuf['ctime'] >= $time_min && ($time_max === NULL || $statbuf['ctime'] <= $time_max)) { unlink($filename); $num_deleted ++; } } return $num_deleted; } /** * \brief * A small testsuite to help developers. * * \return * Number of failures. */ function test() { $ideal = array('department' => 'CS', 'course' => '262', 'section' => 'A'); $ideal_c = $ideal; unset($ideal_c['section']); $n = 0; $t1 = 'CS-262-A'; $n += assert_equal($t1, Section::parse($t1), $ideal); $n += assert_equal($t1 . '_class', Course::parse($t1), $ideal_c); $t2 = 'cs262 a'; $n += assert_equal($t2, Section::parse($t2), $ideal); $n += assert_equal($t2 . '_class', Course::parse($t2), $ideal_c); $t3 = 'cs 262 a'; $n += assert_equal($t3, Section::parse($t2), $ideal); $n += assert_equal($t3 . '_class', Course::parse($t3), $ideal_c); $ideal['course'] .= 'L'; $ideal_c['course'] = $ideal['course']; $t1 = 'CS-262L-A'; $n += assert_equal($t1, Section::parse($t1), $ideal); $n += assert_equal($t1 . '_class', Course::parse($t1), $ideal_c); $t2 = 'cs262l a'; $n += assert_equal($t2, Section::parse($t2), $ideal); $n += assert_equal($t2 . '_class', Course::parse($t2), $ideal_c); $t3 = 'cs 262l a'; $n += assert_equal($t3, Section::parse($t2), $ideal); $n += assert_equal($t3 . '_class', Course::parse($t3), $ideal_c); $csv = '1,2,3,4 1,2 ,3,4 1,"2,",3,4 "1 1",2,3,4 """1""",2,3,4 4'; $csv_parsed = array( array('1', '2', '3', '4'), array('1', '2 ', '3', '4'), array('1', '2,', '3', '4'), array("1\n1", '2', '3', '4'), array('"1"', '2', '3', '4'), array(''), array('4'), ); $n += assert_equal('csv', school_crawl_csv_parse($csv, array('eof' => TRUE)), $csv_parsed); $n += assert_equal('csv_buffer', $csv, ''); $csv_partial = '1,2 3'; /* * Check partial parsing support; give a situation where we * supposedly don't have eof. */ $n += assert_equal('csv_partial', school_crawl_csv_parse($csv_partial), array(array('1', '2'))); $n += assert_equal('csv_partial_buffer', $csv_partial, '3'); return $n; } /** * \brief * A reimplementation of a standard testsuite utility. * * \return * TRUE if the test failed. */ function assert_equal($name, $a, $b) { if (is_array($a)) { $bad = FALSE; if (!is_array($b)) { fprintf(STDERR, "Test ``%s'' failed: \$a is an array while \$b isn't.\n", $name); return TRUE; } foreach ($a as $key => $val) if (!$bad && isset($b[$key])) $bad = assert_equal($name . '[' . $key . ']', $a[$key], $b[$key]); elseif (!$bad) { $bad = TRUE; fprintf(STDERR, "\$b is missing key ``%s'' which \$a has.\n", $key); } foreach ($b as $key => $val) if (!$bad && isset($a[$key])) $bad = assert_equal($name . '[' . $key . ']', $a[$key], $b[$key]); elseif (!$bad) { $bad = TRUE; fprintf(STDERR, "\$a is missing key ``%s'' which \$b has.\n", $key); } if ($bad) { fprintf(STDERR, "Test ``%s'' failed, see previous error message\n", $name); return TRUE; } return FALSE; } elseif (is_array($b)) { fprintf(STDERR, "Test ``%s'' failed: \$b is an array; \$a isn't.\n", $name); return TRUE; } elseif ($a === $b && !strcmp($a, $b)) { return FALSE; } else { fprintf(STDERR, "Test ``%s'' failed: `%s' === `%s' => %s, strcmp() == %d\n", $name, $a, $b, $a === $b ? 'TRUE' : 'FALSE', strcmp($a, $b)); return TRUE; } return TRUE; }