1: <?php
2: /**
3: * Provides static methods for charset and locale safe string manipulation.
4: *
5: * Copyright 2003-2012 Horde LLC (http://www.horde.org/)
6: *
7: * See the enclosed file COPYING for license information (LGPL). If you
8: * did not receive this file, see http://www.horde.org/licenses/lgpl21.
9: *
10: * @author Jan Schneider <jan@horde.org>
11: * @category Horde
12: * @license http://www.horde.org/licenses/lgpl21 LGPL 2.1
13: * @package Util
14: */
15: class Horde_String
16: {
17: /**
18: * lower() cache.
19: *
20: * @var array
21: */
22: static protected $_lowers = array();
23:
24: /**
25: * upper() cache.
26: *
27: * @var array
28: */
29: static protected $_uppers = array();
30:
31: /**
32: * Converts a string from one charset to another.
33: *
34: * Uses the iconv or the mbstring extensions.
35: * The original string is returned if conversion failed or none
36: * of the extensions were available.
37: *
38: * @param mixed $input The data to be converted. If $input is an an
39: * array, the array's values get converted
40: * recursively.
41: * @param string $from The string's current charset.
42: * @param string $to The charset to convert the string to.
43: * @param boolean $force Force conversion?
44: *
45: * @return mixed The converted input data.
46: */
47: static public function convertCharset($input, $from, $to, $force = false)
48: {
49: /* Don't bother converting numbers. */
50: if (is_numeric($input)) {
51: return $input;
52: }
53:
54: /* If the from and to character sets are identical, return now. */
55: if (!$force && $from == $to) {
56: return $input;
57: }
58: $from = self::lower($from);
59: $to = self::lower($to);
60: if (!$force && $from == $to) {
61: return $input;
62: }
63:
64: if (is_array($input)) {
65: $tmp = array();
66: reset($input);
67: while (list($key, $val) = each($input)) {
68: $tmp[self::_convertCharset($key, $from, $to)] = self::convertCharset($val, $from, $to, $force);
69: }
70: return $tmp;
71: }
72:
73: if (is_object($input)) {
74: // PEAR_Error/Exception objects are almost guaranteed to contain
75: // recursion, which will cause a segfault in PHP. We should never
76: // reach this line, but add a check.
77: if (($input instanceof Exception) ||
78: ($input instanceof PEAR_Error)) {
79: return '';
80: }
81:
82: $input = Horde_Util::cloneObject($input);
83: $vars = get_object_vars($input);
84: while (list($key, $val) = each($vars)) {
85: $input->$key = self::convertCharset($val, $from, $to, $force);
86: }
87: return $input;
88: }
89:
90: if (!is_string($input)) {
91: return $input;
92: }
93:
94: return self::_convertCharset($input, $from, $to);
95: }
96:
97: /**
98: * Internal function used to do charset conversion.
99: *
100: * @param string $input See self::convertCharset().
101: * @param string $from See self::convertCharset().
102: * @param string $to See self::convertCharset().
103: *
104: * @return string The converted string.
105: */
106: static protected function _convertCharset($input, $from, $to)
107: {
108: /* Use utf8_[en|de]code() if possible and if the string isn't too
109: * large (less than 16 MB = 16 * 1024 * 1024 = 16777216 bytes) - these
110: * functions use more memory. */
111: if (Horde_Util::extensionExists('xml') &&
112: ((strlen($input) < 16777216) ||
113: !Horde_Util::extensionExists('iconv') ||
114: !Horde_Util::extensionExists('mbstring'))) {
115: if (($to == 'utf-8') &&
116: in_array($from, array('iso-8859-1', 'us-ascii', 'utf-8'))) {
117: return utf8_encode($input);
118: }
119:
120: if (($from == 'utf-8') &&
121: in_array($to, array('iso-8859-1', 'us-ascii', 'utf-8'))) {
122: return utf8_decode($input);
123: }
124: }
125:
126: /* Try UTF7-IMAP conversions. */
127: if (($from == 'utf7-imap') || ($to == 'utf7-imap')) {
128: try {
129: if ($from == 'utf7-imap') {
130: return self::convertCharset(Horde_Imap_Client_Utf7imap::Utf7ImapToUtf8($input), 'UTF-8', $to);
131: } else {
132: if ($from == 'utf-8') {
133: $conv = $input;
134: } else {
135: $conv = self::convertCharset($input, $from, 'UTF-8');
136: }
137: return Horde_Imap_Client_Utf7imap::Utf8ToUtf7Imap($conv);
138: }
139: } catch (Horde_Imap_Client_Exception $e) {
140: return $input;
141: }
142: }
143:
144: /* Try iconv with transliteration. */
145: if (Horde_Util::extensionExists('iconv')) {
146: unset($php_errormsg);
147: ini_set('track_errors', 1);
148: $out = @iconv($from, $to . '//TRANSLIT', $input);
149: $errmsg = isset($php_errormsg);
150: ini_restore('track_errors');
151: if (!$errmsg) {
152: return $out;
153: }
154: }
155:
156: /* Try mbstring. */
157: if (Horde_Util::extensionExists('mbstring')) {
158: $out = @mb_convert_encoding($input, $to, self::_mbstringCharset($from));
159: if (!empty($out)) {
160: return $out;
161: }
162: }
163:
164: return $input;
165: }
166:
167: /**
168: * Makes a string lowercase.
169: *
170: * @param string $string The string to be converted.
171: * @param boolean $locale If true the string will be converted based on
172: * a given charset, locale independent else.
173: * @param string $charset If $locale is true, the charset to use when
174: * converting.
175: *
176: * @return string The string with lowercase characters.
177: */
178: static public function lower($string, $locale = false, $charset = null)
179: {
180: if ($locale) {
181: if (Horde_Util::extensionExists('mbstring')) {
182: if (is_null($charset)) {
183: throw new InvalidArgumentException('$charset argument must not be null');
184: }
185: $ret = @mb_strtolower($string, self::_mbstringCharset($charset));
186: if (!empty($ret)) {
187: return $ret;
188: }
189: }
190: return strtolower($string);
191: }
192:
193: if (!isset(self::$_lowers[$string])) {
194: $language = setlocale(LC_CTYPE, 0);
195: setlocale(LC_CTYPE, 'C');
196: self::$_lowers[$string] = strtolower($string);
197: setlocale(LC_CTYPE, $language);
198: }
199:
200: return self::$_lowers[$string];
201: }
202:
203: /**
204: * Makes a string uppercase.
205: *
206: * @param string $string The string to be converted.
207: * @param boolean $locale If true the string will be converted based on a
208: * given charset, locale independent else.
209: * @param string $charset If $locale is true, the charset to use when
210: * converting. If not provided the current charset.
211: *
212: * @return string The string with uppercase characters.
213: */
214: static public function upper($string, $locale = false, $charset = null)
215: {
216: if ($locale) {
217: if (Horde_Util::extensionExists('mbstring')) {
218: if (is_null($charset)) {
219: throw new InvalidArgumentException('$charset argument must not be null');
220: }
221: $ret = @mb_strtoupper($string, self::_mbstringCharset($charset));
222: if (!empty($ret)) {
223: return $ret;
224: }
225: }
226: return strtoupper($string);
227: }
228:
229: if (!isset(self::$_uppers[$string])) {
230: $language = setlocale(LC_CTYPE, 0);
231: setlocale(LC_CTYPE, 'C');
232: self::$_uppers[$string] = strtoupper($string);
233: setlocale(LC_CTYPE, $language);
234: }
235:
236: return self::$_uppers[$string];
237: }
238:
239: /**
240: * Returns a string with the first letter capitalized if it is
241: * alphabetic.
242: *
243: * @param string $string The string to be capitalized.
244: * @param boolean $locale If true the string will be converted based on a
245: * given charset, locale independent else.
246: * @param string $charset The charset to use, defaults to current charset.
247: *
248: * @return string The capitalized string.
249: */
250: static public function ucfirst($string, $locale = false, $charset = null)
251: {
252: if ($locale) {
253: if (is_null($charset)) {
254: throw new InvalidArgumentException('$charset argument must not be null');
255: }
256: $first = self::substr($string, 0, 1, $charset);
257: if (self::isAlpha($first, $charset)) {
258: $string = self::upper($first, true, $charset) . self::substr($string, 1, null, $charset);
259: }
260: } else {
261: $string = self::upper(substr($string, 0, 1), false) . substr($string, 1);
262: }
263:
264: return $string;
265: }
266:
267: /**
268: * Returns a string with the first letter of each word capitalized if it is
269: * alphabetic.
270: *
271: * Sentences are splitted into words at whitestrings.
272: *
273: * @param string $string The string to be capitalized.
274: * @param boolean $locale If true the string will be converted based on a
275: * given charset, locale independent else.
276: * @param string $charset The charset to use, defaults to current charset.
277: *
278: * @return string The capitalized string.
279: */
280: static public function ucwords($string, $locale = false, $charset = null)
281: {
282: $words = preg_split('/(\s+)/', $string, -1, PREG_SPLIT_DELIM_CAPTURE);
283: for ($i = 0, $c = count($words); $i < $c; $i += 2) {
284: $words[$i] = self::ucfirst($words[$i], $locale, $charset);
285: }
286: return implode('', $words);
287: }
288:
289: /**
290: * Returns part of a string.
291: *
292: * @param string $string The string to be converted.
293: * @param integer $start The part's start position, zero based.
294: * @param integer $length The part's length.
295: * @param string $charset The charset to use when calculating the part's
296: * position and length, defaults to current
297: * charset.
298: *
299: * @return string The string's part.
300: */
301: static public function substr($string, $start, $length = null,
302: $charset = 'UTF-8')
303: {
304: if (is_null($length)) {
305: $length = self::length($string, $charset) - $start;
306: }
307:
308: if ($length == 0) {
309: return '';
310: }
311:
312: /* Try mbstring. */
313: if (Horde_Util::extensionExists('mbstring')) {
314: $ret = @mb_substr($string, $start, $length, self::_mbstringCharset($charset));
315:
316: /* mb_substr() returns empty string on failure. */
317: if (strlen($ret)) {
318: return $ret;
319: }
320: }
321:
322: /* Try iconv. */
323: if (Horde_Util::extensionExists('iconv')) {
324: $ret = @iconv_substr($string, $start, $length, $charset);
325:
326: /* iconv_substr() returns false on failure. */
327: if ($ret !== false) {
328: return $ret;
329: }
330: }
331:
332: return substr($string, $start, $length);
333: }
334:
335: /**
336: * Returns the character (not byte) length of a string.
337: *
338: * @param string $string The string to return the length of.
339: * @param string $charset The charset to use when calculating the string's
340: * length.
341: *
342: * @return integer The string's length.
343: */
344: static public function length($string, $charset = 'UTF-8')
345: {
346: $charset = self::lower($charset);
347:
348: if ($charset == 'utf-8' || $charset == 'utf8') {
349: return strlen(utf8_decode($string));
350: }
351:
352: if (Horde_Util::extensionExists('mbstring')) {
353: $ret = @mb_strlen($string, self::_mbstringCharset($charset));
354: if (!empty($ret)) {
355: return $ret;
356: }
357: }
358:
359: return strlen($string);
360: }
361:
362: /**
363: * Returns the numeric position of the first occurrence of $needle
364: * in the $haystack string.
365: *
366: * @param string $haystack The string to search through.
367: * @param string $needle The string to search for.
368: * @param integer $offset Allows to specify which character in haystack
369: * to start searching.
370: * @param string $charset The charset to use when searching for the
371: * $needle string.
372: *
373: * @return integer The position of first occurrence.
374: */
375: static public function pos($haystack, $needle, $offset = 0,
376: $charset = 'UTF-8')
377: {
378: if (Horde_Util::extensionExists('mbstring')) {
379: $track_errors = ini_set('track_errors', 1);
380: $ret = @mb_strpos($haystack, $needle, $offset, self::_mbstringCharset($charset));
381: ini_set('track_errors', $track_errors);
382: if (!isset($php_errormsg)) {
383: return $ret;
384: }
385: }
386:
387: return strpos($haystack, $needle, $offset);
388: }
389:
390: /**
391: * Returns the numeric position of the last occurrence of $needle
392: * in the $haystack string.
393: *
394: * @param string $haystack The string to search through.
395: * @param string $needle The string to search for.
396: * @param integer $offset Allows to specify which character in haystack
397: * to start searching.
398: * @param string $charset The charset to use when searching for the
399: * $needle string.
400: *
401: * @return integer The position of first occurrence.
402: */
403: static public function rpos($haystack, $needle, $offset = 0,
404: $charset = 'UTF-8')
405: {
406: if (Horde_Util::extensionExists('mbstring')) {
407: $track_errors = ini_set('track_errors', 1);
408: $ret = @mb_strrpos($haystack, $needle, $offset, self::_mbstringCharset($charset));
409: ini_set('track_errors', $track_errors);
410: if (!isset($php_errormsg)) {
411: return $ret;
412: }
413: }
414:
415: return strrpos($haystack, $needle, $offset);
416: }
417:
418: /**
419: * Returns a string padded to a certain length with another string.
420: * This method behaves exactly like str_pad() but is multibyte safe.
421: *
422: * @param string $input The string to be padded.
423: * @param integer $length The length of the resulting string.
424: * @param string $pad The string to pad the input string with. Must
425: * be in the same charset like the input string.
426: * @param const $type The padding type. One of STR_PAD_LEFT,
427: * STR_PAD_RIGHT, or STR_PAD_BOTH.
428: * @param string $charset The charset of the input and the padding
429: * strings.
430: *
431: * @return string The padded string.
432: */
433: static public function pad($input, $length, $pad = ' ',
434: $type = STR_PAD_RIGHT, $charset = 'UTF-8')
435: {
436: $mb_length = self::length($input, $charset);
437: $sb_length = strlen($input);
438: $pad_length = self::length($pad, $charset);
439:
440: /* Return if we already have the length. */
441: if ($mb_length >= $length) {
442: return $input;
443: }
444:
445: /* Shortcut for single byte strings. */
446: if ($mb_length == $sb_length && $pad_length == strlen($pad)) {
447: return str_pad($input, $length, $pad, $type);
448: }
449:
450: switch ($type) {
451: case STR_PAD_LEFT:
452: $left = $length - $mb_length;
453: $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) . $input;
454: break;
455:
456: case STR_PAD_BOTH:
457: $left = floor(($length - $mb_length) / 2);
458: $right = ceil(($length - $mb_length) / 2);
459: $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) .
460: $input .
461: self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset);
462: break;
463:
464: case STR_PAD_RIGHT:
465: $right = $length - $mb_length;
466: $output = $input . self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset);
467: break;
468: }
469:
470: return $output;
471: }
472:
473: /**
474: * Wraps the text of a message.
475: *
476: * @param string $string String containing the text to wrap.
477: * @param integer $width Wrap the string at this number of
478: * characters.
479: * @param string $break Character(s) to use when breaking lines.
480: * @param boolean $cut Whether to cut inside words if a line
481: * can't be wrapped.
482: * @param boolean $line_folding Whether to apply line folding rules per
483: * RFC 822 or similar. The correct break
484: * characters including leading whitespace
485: * have to be specified too.
486: *
487: * @return string String containing the wrapped text.
488: */
489: static public function wordwrap($string, $width = 75, $break = "\n",
490: $cut = false, $line_folding = false)
491: {
492: $wrapped = '';
493:
494: while (self::length($string, 'UTF-8') > $width) {
495: $line = self::substr($string, 0, $width, 'UTF-8');
496: $string = self::substr($string, self::length($line, 'UTF-8'), null, 'UTF-8');
497:
498: // Make sure we didn't cut a word, unless we want hard breaks
499: // anyway.
500: if (!$cut && preg_match('/^(.+?)((\s|\r?\n).*)/us', $string, $match)) {
501: $line .= $match[1];
502: $string = $match[2];
503: }
504:
505: // Wrap at existing line breaks.
506: if (preg_match('/^(.*?)(\r?\n)(.*)$/su', $line, $match)) {
507: $wrapped .= $match[1] . $match[2];
508: $string = $match[3] . $string;
509: continue;
510: }
511:
512: // Wrap at the last colon or semicolon followed by a whitespace if
513: // doing line folding.
514: if ($line_folding &&
515: preg_match('/^(.*?)(;|:)(\s+.*)$/u', $line, $match)) {
516: $wrapped .= $match[1] . $match[2] . $break;
517: $string = $match[3] . $string;
518: continue;
519: }
520:
521: // Wrap at the last whitespace of $line.
522: $sub = $line_folding
523: ? '(.+[^\s])'
524: : '(.*)';
525:
526: if (preg_match('/^' . $sub . '(\s+)(.*)$/u', $line, $match)) {
527: $wrapped .= $match[1] . $break;
528: $string = ($line_folding ? $match[2] : '') . $match[3] . $string;
529: continue;
530: }
531:
532: // Hard wrap if necessary.
533: if ($cut) {
534: $wrapped .= $line . $break;
535: continue;
536: }
537:
538: $wrapped .= $line;
539: }
540:
541: return $wrapped . $string;
542: }
543:
544: /**
545: * Wraps the text of a message.
546: *
547: * @param string $text String containing the text to wrap.
548: * @param integer $length Wrap $text at this number of characters.
549: * @param string $break_char Character(s) to use when breaking lines.
550: * @param boolean $quote Ignore lines that are wrapped with the '>'
551: * character (RFC 2646)? If true, we don't
552: * remove any padding whitespace at the end of
553: * the string.
554: *
555: * @return string String containing the wrapped text.
556: */
557: static public function wrap($text, $length = 80, $break_char = "\n",
558: $quote = false)
559: {
560: $paragraphs = array();
561:
562: foreach (preg_split('/\r?\n/', $text) as $input) {
563: if ($quote && (strpos($input, '>') === 0)) {
564: $line = $input;
565: } else {
566: /* We need to handle the Usenet-style signature line
567: * separately; since the space after the two dashes is
568: * REQUIRED, we don't want to trim the line. */
569: if ($input != '-- ') {
570: $input = rtrim($input);
571: }
572: $line = self::wordwrap($input, $length, $break_char);
573: }
574:
575: $paragraphs[] = $line;
576: }
577:
578: return implode($break_char, $paragraphs);
579: }
580:
581: /**
582: * Return a truncated string, suitable for notifications.
583: *
584: * @param string $text The original string.
585: * @param integer $length The maximum length.
586: *
587: * @return string The truncated string, if longer than $length.
588: */
589: static public function truncate($text, $length = 100)
590: {
591: return (self::length($text) > $length)
592: ? rtrim(self::substr($text, 0, $length - 3)) . '...'
593: : $text;
594: }
595:
596: /**
597: * Return an abbreviated string, with characters in the middle of the
598: * excessively long string replaced by '...'.
599: *
600: * @param string $text The original string.
601: * @param integer $length The length at which to abbreviate.
602: *
603: * @return string The abbreviated string, if longer than $length.
604: */
605: static public function abbreviate($text, $length = 20)
606: {
607: return (self::length($text) > $length)
608: ? rtrim(self::substr($text, 0, round(($length - 3) / 2))) . '...' . ltrim(self::substr($text, (($length - 3) / 2) * -1))
609: : $text;
610: }
611:
612: /**
613: * Returns the common leading part of two strings.
614: *
615: * @param string $str1 A string.
616: * @param string $str2 Another string.
617: *
618: * @return string The start of $str1 and $str2 that is identical in both.
619: */
620: static public function common($str1, $str2)
621: {
622: for ($result = '', $i = 0;
623: isset($str1[$i]) && isset($str2[$i]) && $str1[$i] == $str2[$i];
624: $i++) {
625: $result .= $str1[$i];
626: }
627: return $result;
628: }
629:
630: /**
631: * Returns true if the every character in the parameter is an alphabetic
632: * character.
633: *
634: * @param string $string The string to test.
635: * @param string $charset The charset to use when testing the string.
636: *
637: * @return boolean True if the parameter was alphabetic only.
638: */
639: static public function isAlpha($string, $charset)
640: {
641: if (!Horde_Util::extensionExists('mbstring')) {
642: return ctype_alpha($string);
643: }
644:
645: $charset = self::_mbstringCharset($charset);
646: $old_charset = mb_regex_encoding();
647:
648: if ($charset != $old_charset) {
649: @mb_regex_encoding($charset);
650: }
651: $alpha = !@mb_ereg_match('[^[:alpha:]]', $string);
652: if ($charset != $old_charset) {
653: @mb_regex_encoding($old_charset);
654: }
655:
656: return $alpha;
657: }
658:
659: /**
660: * Returns true if ever character in the parameter is a lowercase letter in
661: * the current locale.
662: *
663: * @param string $string The string to test.
664: * @param string $charset The charset to use when testing the string.
665: *
666: * @return boolean True if the parameter was lowercase.
667: */
668: static public function isLower($string, $charset)
669: {
670: return ((self::lower($string, true, $charset) === $string) &&
671: self::isAlpha($string, $charset));
672: }
673:
674: /**
675: * Returns true if every character in the parameter is an uppercase letter
676: * in the current locale.
677: *
678: * @param string $string The string to test.
679: * @param string $charset The charset to use when testing the string.
680: *
681: * @return boolean True if the parameter was uppercase.
682: */
683: static public function isUpper($string, $charset)
684: {
685: return ((self::upper($string, true, $charset) === $string) &&
686: self::isAlpha($string, $charset));
687: }
688:
689: /**
690: * Performs a multibyte safe regex match search on the text provided.
691: *
692: * @param string $text The text to search.
693: * @param array $regex The regular expressions to use, without perl
694: * regex delimiters (e.g. '/' or '|').
695: * @param string $charset The character set of the text.
696: *
697: * @return array The matches array from the first regex that matches.
698: */
699: static public function regexMatch($text, $regex, $charset = null)
700: {
701: if (!empty($charset)) {
702: $regex = self::convertCharset($regex, $charset, 'utf-8');
703: $text = self::convertCharset($text, $charset, 'utf-8');
704: }
705:
706: $matches = array();
707: foreach ($regex as $val) {
708: if (preg_match('/' . $val . '/u', $text, $matches)) {
709: break;
710: }
711: }
712:
713: if (!empty($charset)) {
714: $matches = self::convertCharset($matches, 'utf-8', $charset);
715: }
716:
717: return $matches;
718: }
719:
720: /**
721: * Check to see if a string is valid UTF-8.
722: *
723: * @since 1.1.0
724: *
725: * @param string $text The text to check.
726: *
727: * @return boolean True if valid UTF-8.
728: */
729: static public function validUtf8($text)
730: {
731: /* There is bug in PHP/PCRE with larger strings; stack overflow causes
732: * PHP segfaults. See:
733: * https://bugs.php.net/bug.php?id=37793
734: *
735: * Thus, break string down into smaller chunks instead.
736: */
737: $chunk_size = 4000;
738: $length = strlen($text);
739:
740: while ($length > $chunk_size) {
741: /* Can't use self::substr() here since the input may not be
742: * proper UTF-8, which is sort of the whole point of this
743: * method. */
744: if (!self::validUtf8(substr($text, 0, $chunk_size))) {
745: return false;
746: }
747:
748: $text = substr($text, $chunk_size);
749: $length -= $chunk_size;
750: }
751:
752: /* Regex from:
753: * http://stackoverflow.com/questions/1523460/ensuring-valid-utf-8-in-php
754: */
755: return preg_match('/^(?:
756: [\x09\x0A\x0D\x20-\x7E] # ASCII
757: | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
758: | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
759: | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
760: | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
761: | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
762: | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
763: | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
764: )*$/xs', $text);
765: }
766:
767: /**
768: * Workaround charsets that don't work with mbstring functions.
769: *
770: * @param string $charset The original charset.
771: *
772: * @return string The charset to use with mbstring functions.
773: */
774: static protected function _mbstringCharset($charset)
775: {
776: /* mbstring functions do not handle the 'ks_c_5601-1987' &
777: * 'ks_c_5601-1989' charsets. However, these charsets are used, for
778: * example, by various versions of Outlook to send Korean characters.
779: * Use UHC (CP949) encoding instead. See, e.g.,
780: * http://lists.w3.org/Archives/Public/ietf-charsets/2001AprJun/0030.html */
781: return in_array(self::lower($charset), array('ks_c_5601-1987', 'ks_c_5601-1989'))
782: ? 'UHC'
783: : $charset;
784: }
785:
786: /**
787: * Strip UTF-8 byte order mark (BOM) from string data.
788: *
789: * @since 1.4.0
790: *
791: * @param string $str Input string (UTF-8).
792: *
793: * @return string Stripped string (UTF-8).
794: */
795: static public function trimUtf8Bom($str)
796: {
797: return (substr($str, 0, 3) == pack('CCC', 239, 187, 191))
798: ? substr($str, 3)
799: : $str;
800: }
801:
802: }
803: