function cleanString($text, $convert = false, $cp1252 = false, $strip_tags = false, $html_entity = false, $windows_1252 = false, $new_line = false, $xml = false) { // 1) convert á ô => a o if($convert == true){ $text = preg_replace("/[áàâãªä]/u","a",$text); $text = preg_replace("/[ÁÀÂÃÄ]/u","A",$text); $text = preg_replace("/[ÍÌÎÏ]/u","I",$text); $text = preg_replace("/[íìîï]/u","i",$text); $text = preg_replace("/[éèêë]/u","e",$text); $text = preg_replace("/[ÉÈÊË]/u","E",$text); $text = preg_replace("/[óòôõºö]/u","o",$text); $text = preg_replace("/[ÓÒÔÕÖ]/u","O",$text); $text = preg_replace("/[úùûü]/u","u",$text); $text = preg_replace("/[ÚÙÛÜ]/u","U",$text); $text = preg_replace("/[’‘‹›‚]/u","'",$text); $text = preg_replace("/[“”«»„]/u",'"',$text); $text = str_replace("–","-",$text); $text = str_replace(" "," ",$text); $text = str_replace("ñ","n",$text); $text = str_replace("Ñ","N",$text); } //2) Translation CP1252. – => - if($cp1252 == true){ $trans = get_html_translation_table(HTML_ENTITIES); $trans[chr(130)] = '‚'; // Single Low-9 Quotation Mark $trans[chr(131)] = 'ƒ'; // Latin Small Letter F With Hook $trans[chr(132)] = '„'; // Double Low-9 Quotation Mark $trans[chr(133)] = '…'; // Horizontal Ellipsis $trans[chr(134)] = '†'; // Dagger $trans[chr(135)] = '‡'; // Double Dagger $trans[chr(136)] = 'ˆ'; // Modifier Letter Circumflex Accent $trans[chr(137)] = '‰'; // Per Mille Sign $trans[chr(138)] = 'Š'; // Latin Capital Letter S With Caron $trans[chr(139)] = '‹'; // Single Left-Pointing Angle Quotation Mark $trans[chr(140)] = 'Œ'; // Latin Capital Ligature OE $trans[chr(145)] = '‘'; // Left Single Quotation Mark $trans[chr(146)] = '’'; // Right Single Quotation Mark $trans[chr(147)] = '“'; // Left Double Quotation Mark $trans[chr(148)] = '”'; // Right Double Quotation Mark $trans[chr(149)] = '•'; // Bullet $trans[chr(150)] = '–'; // En Dash $trans[chr(151)] = '—'; // Em Dash $trans[chr(152)] = '˜'; // Small Tilde $trans[chr(153)] = '™'; // Trade Mark Sign $trans[chr(154)] = 'š'; // Latin Small Letter S With Caron $trans[chr(155)] = '›'; // Single Right-Pointing Angle Quotation Mark $trans[chr(156)] = 'œ'; // Latin Small Ligature OE $trans[chr(159)] = 'Ÿ'; // Latin Capital Letter Y With Diaeresis $trans['euro'] = '€'; // euro currency symbol ksort($trans); foreach ($trans as $k => $v) { $text = str_replace($v, $k, $text); } } // 3) remove <p>, <br/> ... if($strip_tags == true){ $text = strip_tags($text); } // 4) & => & " => ' if($html_entity == true){ $text = html_entity_decode($text); } // 5) remove Windows-1252 symbols like "TradeMark", "Euro"... if($windows_1252 == true){ $text = preg_replace('/[^(\x20-\x7F)]*/','', $text); } if($new_line == true){ $targets=array('\r\n','\n','\r','\t'); $results=array(" "," "," ",""); $text = str_replace($targets,$results,$text); } //XML compatible if($xml == true){ $text = str_replace("&", "and", $text); $text = str_replace("<", ".", $text); $text = str_replace(">", ".", $text); $text = str_replace("\\", "-", $text); $text = str_replace("/", "-", $text); } return ($text); }