converts text to cooluri usable format... myString::urilize('Příliš žluťoučký kůň') → prilis-zlutoucky-kun
<?php
class myString
{
public static function urilize($s) {
// cut long text
$s = mb_substr($s, 0, 50);
$s = self::cs_utf2ascii($s);
$s = mb_strtolower($s);
// fixing some charset mess
$s = mb_convert_encoding($s, 'ASCII', 'UTF-8');
// not mch efective replacing
$s = preg_replace('/ /','_',$s);
$s = preg_replace('/\n/','_',$s);
$s = preg_replace('/\W/','-',$s);
$s = preg_replace('/_/','-',$s);
$s = preg_replace('/--/','-',$s);
$s = preg_replace('/--/','-',$s);
if(preg_match('/-$/' ,$s)) {
$s = substr($s, 0, -1);
}
return $s;
}
// credits: dgx - http://latrine.dgx.cz/odstraneni-diakritiky-z-ruznych-kodovani
// UTF-8 to ASCII for diacritic chars
public static function cs_utf2ascii($s)
{
static $tbl =
array(
"\xc3\xa1"=>"a","\xc3\xa4"=>"a","\xc4\x8d"=>"c","\xc4\x8f"=>"d",
"\xc3\xa9"=>"e","\xc4\x9b"=>"e","\xc3\xad"=>"i","\xc4\xbe"=>"l",
"\xc4\xba"=>"l","\xc5\x88"=>"n","\xc3\xb3"=>"o","\xc3\xb6"=>"o",
"\xc5\x91"=>"o","\xc3\xb4"=>"o","\xc5\x99"=>"r","\xc5\x95"=>"r",
"\xc5\xa1"=>"s","\xc5\xa5"=>"t","\xc3\xba"=>"u","\xc5\xaf"=>"u",
"\xc3\xbc"=>"u","\xc5\xb1"=>"u","\xc3\xbd"=>"y","\xc5\xbe"=>"z",
"\xc3\x81"=>"A","\xc3\x84"=>"A","\xc4\x8c"=>"C","\xc4\x8e"=>"D",
"\xc3\x89"=>"E","\xc4\x9a"=>"E","\xc3\x8d"=>"I","\xc4\xbd"=>"L",
"\xc4\xb9"=>"L","\xc5\x87"=>"N","\xc3\x93"=>"O","\xc3\x96"=>"O",
"\xc5\x90"=>"O","\xc3\x94"=>"O","\xc5\x98"=>"R","\xc5\x94"=>"R",
"\xc5\xa0"=>"S","\xc5\xa4"=>"T","\xc3\x9a"=>"U","\xc5\xae"=>"U",
"\xc3\x9c"=>"U","\xc5\xb0"=>"U","\xc3\x9d"=>"Y","\xc5\xbd"=>"Z");
return strtr($s, $tbl);
}
}
?>