Files @ a45f0dabc59c
Branch filter:

Location: hot67beta/libraries/phputf8/utils/ascii.php - annotation

ethanzonca@localhost.localdomain
mneh
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
c7d7e38b2269
<?php
/**
* Tools to help with ASCII in UTF-8
* @version $Id: ascii.php 10381 2008-06-01 03:35:53Z pasamio $
* @package utf8
* @subpackage ascii
*/

//---------------------------------------------------------------
/**
* UTF-8 lookup table for lower case accented letters
* This lookuptable defines replacements for accented characters from the ASCII-7
* range. This are lower case letters only.
* @author Andreas Gohr <andi@splitbrain.org>
* @see utf8_deaccent()
* @package utf8
* @subpackage ascii
*/
$GLOBALS['UTF8_LOWER_ACCENTS'] = array(
    'à ' => 'a', 'ô' => 'o', '�' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o',
    'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', '�' => 'a', 'ķ' => 'k',
    '�' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o',
    'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', '�' => 'w', 'ċ' => 'c', 'õ' => 'o',
    'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c',
    'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't',
    'ū' => 'u', '�' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l',
    'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z',
    'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't',
    'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o',
    'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', '�' => 'g', 'đ' => 'd', 'ĵ' => 'j',
    'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o',
    'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g',
    '�' => 'm', '�' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a',
    'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u',
);


//---------------------------------------------------------------
/**
* UTF-8 lookup table for upper case accented letters
* This lookuptable defines replacements for accented characters from the ASCII-7
* range. This are upper case letters only.
* @author Andreas Gohr <andi@splitbrain.org>
* @see utf8_deaccent()
* @package utf8
* @subpackage ascii
 */

$GLOBALS['UTF8_UPPER_ACCENTS'] = array(
    'à ' => 'A', 'ô' => 'O', '�' => 'D', 'ḟ' => 'F', 'ë' => 'E', 'š' => 'S', 'ơ' => 'O',
    'ß' => 'Ss', 'ă' => 'A', 'ř' => 'R', 'ț' => 'T', 'ň' => 'N', '�' => 'A', 'ķ' => 'K',
    '�' => 'S', 'ỳ' => 'Y', 'ņ' => 'N', 'ĺ' => 'L', 'ħ' => 'H', 'ṗ' => 'P', 'ó' => 'O',
    'ú' => 'U', 'ě' => 'E', 'é' => 'E', 'ç' => 'C', '�' => 'W', 'ċ' => 'C', 'õ' => 'O',
    'ṡ' => 'S', 'ø' => 'O', 'ģ' => 'G', 'ŧ' => 'T', 'ș' => 'S', 'ė' => 'E', 'ĉ' => 'C',
    'ś' => 'S', 'î' => 'I', 'ű' => 'U', 'ć' => 'C', 'ę' => 'E', 'ŵ' => 'W', 'ṫ' => 'T',
    'ū' => 'U', '�' => 'C', 'ö' => 'Oe', 'è' => 'E', 'ŷ' => 'Y', 'ą' => 'A', 'ł' => 'L',
    'ų' => 'U', 'ů' => 'U', 'ş' => 'S', 'ğ' => 'G', 'ļ' => 'L', 'ƒ' => 'F', 'ž' => 'Z',
    'ẃ' => 'W', 'ḃ' => 'B', 'å' => 'A', 'ì' => 'I', 'ï' => 'I', 'ḋ' => 'D', 'ť' => 'T',
    'ŗ' => 'R', 'ä' => 'Ae', 'í' => 'I', 'ŕ' => 'R', 'ê' => 'E', 'ü' => 'Ue', 'ò' => 'O',
    'ē' => 'E', 'ñ' => 'N', 'ń' => 'N', 'ĥ' => 'H', '�' => 'G', 'đ' => 'D', 'ĵ' => 'J',
    'ÿ' => 'Y', 'ũ' => 'U', 'ŭ' => 'U', 'ư' => 'U', 'ţ' => 'T', 'ý' => 'Y', 'ő' => 'O',
    'â' => 'A', 'ľ' => 'L', 'ẅ' => 'W', 'ż' => 'Z', 'ī' => 'I', 'ã' => 'A', 'ġ' => 'G',
    '�' => 'M', '�' => 'O', 'ĩ' => 'I', 'ù' => 'U', 'į' => 'I', 'ź' => 'Z', 'á' => 'A',
    'û' => 'U', 'Þ' => 'Th', '�' => 'Dh', 'Æ' => 'Ae',
);

//--------------------------------------------------------------------
/**
* Tests whether a string contains only 7bit ASCII bytes.
* You might use this to conditionally check whether a string
* needs handling as UTF-8 or not, potentially offering performance
* benefits by using the native PHP equivalent if it's just ASCII e.g.;
*
* <code>
* <?php
* if ( utf8_is_ascii($someString) ) {
*     // It's just ASCII - use the native PHP version
*     $someString = strtolower($someString);
* } else {
*     $someString = utf8_strtolower($someString);
* }
* ?>
* </code>
*
* @param string
* @return boolean TRUE if it's all ASCII
* @package utf8
* @subpackage ascii
* @see utf8_is_ascii_ctrl
*/
function utf8_is_ascii($str) {
    if ( strlen($str) > 0 ) {
        // Search for any bytes which are outside the ASCII range...
        return (preg_match('/[^\x00-\x7F]/',$str) !== 1);
    }
    return FALSE;
}

//--------------------------------------------------------------------
/**
* Tests whether a string contains only 7bit ASCII bytes with device
* control codes omitted. The device control codes can be found on the
* second table here: http://www.w3schools.com/tags/ref_ascii.asp
*
* @param string
* @return boolean TRUE if it's all ASCII without device control codes
* @package utf8
* @subpackage ascii
* @see utf8_is_ascii
*/
function utf8_is_ascii_ctrl($str) {
    if ( strlen($str) > 0 ) {
        // Search for any bytes which are outside the ASCII range,
        // or are device control codes
        return (preg_match('/[^\x09\x0A\x0D\x20-\x7E]/',$str) !== 1);
    }
    return FALSE;
}

//--------------------------------------------------------------------
/**
* Strip out all non-7bit ASCII bytes
* If you need to transmit a string to system which you know can only
* support 7bit ASCII, you could use this function.
* @param string
* @return string with non ASCII bytes removed
* @package utf8
* @subpackage ascii
* @see utf8_strip_non_ascii_ctrl
*/
function utf8_strip_non_ascii($str) {
    ob_start();
    while ( preg_match(
        '/^([\x00-\x7F]+)|([^\x00-\x7F]+)/S',
            $str, $matches) ) {
        if ( !isset($matches[2]) ) {
            echo $matches[0];
        }
        $str = substr($str, strlen($matches[0]));
    }
    $result = ob_get_contents();
    ob_end_clean();
    return $result;
}

//--------------------------------------------------------------------
/**
* Strip out all non 7bit ASCII bytes and ASCII device control codes.
* For a list of ASCII device control codes see the 2nd table here:
* http://www.w3schools.com/tags/ref_ascii.asp
*
* @param string
* @return boolean TRUE if it's all ASCII
* @package utf8
* @subpackage ascii
*/
function utf8_strip_non_ascii_ctrl($str) {
    ob_start();
    while ( preg_match(
        '/^([\x09\x0A\x0D\x20-\x7E]+)|([^\x09\x0A\x0D\x20-\x7E]+)/S',
            $str, $matches) ) {
        if ( !isset($matches[2]) ) {
            echo $matches[0];
        }
        $str = substr($str, strlen($matches[0]));
    }
    $result = ob_get_contents();
    ob_end_clean();
    return $result;
}

//---------------------------------------------------------------
/**
* Replace accented UTF-8 characters by unaccented ASCII-7 "equivalents".
* The purpose of this function is to replace characters commonly found in Latin
* alphabets with something more or less equivalent from the ASCII range. This can
* be useful for converting a UTF-8 to something ready for a filename, for example.
* Following the use of this function, you would probably also pass the string
* through utf8_strip_non_ascii to clean out any other non-ASCII chars
* Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1)
* letters. Default is to deaccent both cases ($case = 0)
* @param string UTF-8 string
* @param int (optional) -1 lowercase only, +1 uppercase only, 1 both cases
* @param string UTF-8 with accented characters replaced by ASCII chars
* @return string accented chars replaced with ascii equivalents
* @author Andreas Gohr <andi@splitbrain.org>
* @package utf8
* @subpackage ascii
*/
function utf8_accents_to_ascii( $str, $case=0 ){

	  if($case <= 0){
        global $UTF8_LOWER_ACCENTS;
        $string = str_replace(
                array_keys($UTF8_LOWER_ACCENTS),
                array_values($UTF8_LOWER_ACCENTS),
                $str
            );
    }

    if($case >= 0){
        global $UTF8_UPPER_ACCENTS;
        $string = str_replace(
                array_keys($UTF8_UPPER_ACCENTS),
                array_values($UTF8_UPPER_ACCENTS),
                $str
            );
    }

    return $str;

}