Files
@ a45f0dabc59c
Branch filter:
Location: hot67beta/libraries/phputf8/utils/specials.php - annotation
a45f0dabc59c
7.3 KiB
text/x-php
mneh
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 c7d7e38b2269 | <?php
/**
* Utilities for processing "special" characters in UTF-8. "Special" largely means anything which would
* be regarded as a non-word character, like ASCII control characters and punctuation. This has a "Roman"
* bias - it would be unaware of modern Chinese "punctuation" characters for example.
* Note: requires utils/unicode.php to be loaded
* @version $Id: specials.php 10381 2008-06-01 03:35:53Z pasamio $
* @package utf8
* @subpackage utils
* @see utf8_is_valid
*/
//--------------------------------------------------------------------
/**
* Used internally. Builds a PCRE pattern from the $UTF8_SPECIAL_CHARS
* array defined in this file
* This function adds the control chars 0x00 to 0x19 to the array of
* special chars (they are not included in $UTF8_SPECIAL_CHARS)
* @package utf8
* @subpackage utils
* @return string
* @see utf8_from_unicode
* @see utf8_is_word_chars
* @see utf8_strip_specials
*/
function utf8_specials_pattern() {
static $pattern = NULL;
if ( !$pattern ) {
global $UTF8_SPECIAL_CHARS;
$pattern = preg_quote(utf8_from_unicode($UTF8_SPECIAL_CHARS), '/');
$pattern = '/[\x00-\x19'.$pattern.']/u';
}
return $pattern;
}
//--------------------------------------------------------------------
/**
* Checks a string for whether it contains only word characters. This
* is logically equivalent to the \w PCRE meta character. Note that
* this is not a 100% guarantee that the string only contains alpha /
* numeric characters but just that common non-alphanumeric are not
* in the string, including ASCII device control characters.
* @package utf8
* @subpackage utils
* @param string to check
* @return boolean TRUE if the string only contains word characters
* @see utf8_specials_pattern
*/
function utf8_is_word_chars($str) {
return !(bool)preg_match(utf8_specials_pattern(),$str);
}
//--------------------------------------------------------------------
/**
* Removes special characters (nonalphanumeric) from a UTF-8 string
*
* This can be useful as a helper for sanitizing a string for use as
* something like a file name or a unique identifier. Be warned though
* it does not handle all possible non-alphanumeric characters and is
* not intended is some kind of security / injection filter.
*
* @package utf8
* @subpackage utils
* @author Andreas Gohr <andi@splitbrain.org>
* @param string $string The UTF8 string to strip of special chars
* @param string (optional) $repl Replace special with this string
* @return string with common non-alphanumeric characters removed
* @see utf8_specials_pattern
*/
function utf8_strip_specials($string, $repl=''){
return preg_replace(utf8_specials_pattern(), $repl, $string);
}
//--------------------------------------------------------------------
/**
* UTF-8 array of common special characters
* This array should contain all special characters (not a letter or digit)
* defined in the various local charsets - it's not a complete list of
* non-alphanum characters in UTF-8. It's not perfect but should match most
* cases of special chars.
* The controlchars 0x00 to 0x19 are _not_ included in this array. The space
* 0x20 is! These chars are _not_ in the array either: _ (0x5f), : 0x3a,
* . 0x2e, - 0x2d
* @package utf8
* @subpackage utils
* @author Andreas Gohr <andi@splitbrain.org>
* @see utf8_specials_pattern
*/
$UTF8_SPECIAL_CHARS = array(
0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023,
0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c,
0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b,
0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e,
0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088,
0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092,
0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c,
0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6,
0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0,
0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba,
0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9,
0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384,
0x0385, 0x0387, 0x03b2, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1,
0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc,
0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c,
0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651,
0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015,
0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022,
0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab,
0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193,
0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202,
0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212,
0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229,
0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265,
0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310,
0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514,
0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553,
0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d,
0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567,
0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590,
0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7,
0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702,
0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f,
0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719,
0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723,
0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e,
0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738,
0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742,
0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d,
0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c,
0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f,
0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e,
0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8,
0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3,
0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd,
0x27be, 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc,
0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6,
0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0,
0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa,
0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d,
);
|