diff --git a/inc/math.inc b/inc/math.inc new file mode 100644 --- /dev/null +++ b/inc/math.inc @@ -0,0 +1,107 @@ + + * + * This file is a part of slate_permutate. + * + * slate_permutate is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * slate_permutate is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with slate_permutate. If not, see . + */ + +if (!function_exists('mean')) + { + /** + * \brief + * Calculate the mean of a set of numerical values without + * overflowing stuff. + */ + function mean(array $values) + { + /* + * As the influence of each element reduces with each iteration + * in the used algorithm, shuffling the array should give a + * better idea of what the actual mean is for larger arrays. + */ + shuffle($values); + + $val = 0; + $i = 0; + foreach ($values as $value) + { + $val = $val * $i / ($i + 1) + + $value / ($i + 1); + $i ++; + } + + return $val; + } + } + +if (!function_exists('stddev')) + { + function stddev(array $values) + { + $mean = mean($values); + + $squares = 0; + $i = 0; + foreach ($values as $value) + $squares += pow($mean - $value, 2); + return sqrt($squares / (count($values) - 1)); + } + } + +/** + * \brief + * Return the four quartile points of an array of sorted values with + * normal integral indexes. + */ +function sp_iqr(array $values) +{ + $count = count($values); + if (!$count) + return array(0, 0, 0, 0); + return array( + $values[0], + $values[(int)($count / 4)], + $values[(int)($count / 2)], + $values[(int)(3 * $count / 4)], + $values[$count - 1], + ); +} + +/** + * \brief + * Remove any `outliers' from an array of values. + * + * An outlier is defined as any value that falls further than 1.5 + * standard deviations outside of some sort of inter-quartile range. + */ +function filter_outliers(array $values) +{ + sort($values, SORT_NUMERIC); + $values = array_values($values); + + $stddev = stddev($values); + list(, $iqr_min, $iqr_max, ) = sp_iqr($values); + + $min = $iqr_min - 1.5 * $stddev; + $max = $iqr_max + 1.5 * $stddev; + + $count = count($values); + for ($i = 0; $i < $count; $i ++) + if ($values[$i] < $min + || $values[$i] > $max) + unset($values[$i]); + return array_values($values); +}