1: <?php
2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17:
18:
19: 20: 21: 22: 23:
24: class Multibyte {
25:
26: 27: 28: 29: 30:
31: protected static $_caseFold = array();
32:
33: 34: 35: 36: 37:
38: protected static $_codeRange = array();
39:
40: 41: 42: 43: 44:
45: protected static $_table = null;
46:
47: 48: 49: 50: 51: 52: 53:
54: public static function utf8($string) {
55: $map = array();
56:
57: $values = array();
58: $find = 1;
59: $length = strlen($string);
60:
61: for ($i = 0; $i < $length; $i++) {
62: $value = ord($string[$i]);
63:
64: if ($value < 128) {
65: $map[] = $value;
66: } else {
67: if (empty($values)) {
68: $find = ($value < 224) ? 2 : 3;
69: }
70: $values[] = $value;
71:
72: if (count($values) === $find) {
73: if ($find == 3) {
74: $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
75: } else {
76: $map[] = (($values[0] % 32) * 64) + ($values[1] % 64);
77: }
78: $values = array();
79: $find = 1;
80: }
81: }
82: }
83: return $map;
84: }
85:
86: 87: 88: 89: 90: 91: 92:
93: public static function ascii($array) {
94: $ascii = '';
95:
96: foreach ($array as $utf8) {
97: if ($utf8 < 128) {
98: $ascii .= chr($utf8);
99: } elseif ($utf8 < 2048) {
100: $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64));
101: $ascii .= chr(128 + ($utf8 % 64));
102: } else {
103: $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096));
104: $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64));
105: $ascii .= chr(128 + ($utf8 % 64));
106: }
107: }
108: return $ascii;
109: }
110:
111: 112: 113: 114: 115: 116: 117: 118: 119:
120: public static function stripos($haystack, $needle, $offset = 0) {
121: if (Multibyte::checkMultibyte($haystack)) {
122: $haystack = Multibyte::strtoupper($haystack);
123: $needle = Multibyte::strtoupper($needle);
124: return Multibyte::strpos($haystack, $needle, $offset);
125: }
126: return stripos($haystack, $needle, $offset);
127: }
128:
129: 130: 131: 132: 133: 134: 135: 136: 137: 138: 139:
140: public static function stristr($haystack, $needle, $part = false) {
141: $php = (PHP_VERSION < 5.3);
142:
143: if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
144: $check = Multibyte::strtoupper($haystack);
145: $check = Multibyte::utf8($check);
146: $found = false;
147:
148: $haystack = Multibyte::utf8($haystack);
149: $haystackCount = count($haystack);
150:
151: $needle = Multibyte::strtoupper($needle);
152: $needle = Multibyte::utf8($needle);
153: $needleCount = count($needle);
154:
155: $parts = array();
156: $position = 0;
157:
158: while (($found === false) && ($position < $haystackCount)) {
159: if (isset($needle[0]) && $needle[0] === $check[$position]) {
160: for ($i = 1; $i < $needleCount; $i++) {
161: if ($needle[$i] !== $check[$position + $i]) {
162: break;
163: }
164: }
165: if ($i === $needleCount) {
166: $found = true;
167: }
168: }
169: if (!$found) {
170: $parts[] = $haystack[$position];
171: unset($haystack[$position]);
172: }
173: $position++;
174: }
175:
176: if ($found && $part && !empty($parts)) {
177: return Multibyte::ascii($parts);
178: } elseif ($found && !empty($haystack)) {
179: return Multibyte::ascii($haystack);
180: }
181: return false;
182: }
183:
184: if (!$php) {
185: return stristr($haystack, $needle, $part);
186: }
187: return stristr($haystack, $needle);
188: }
189:
190: 191: 192: 193: 194: 195:
196: public static function strlen($string) {
197: if (Multibyte::checkMultibyte($string)) {
198: $string = Multibyte::utf8($string);
199: return count($string);
200: }
201: return strlen($string);
202: }
203:
204: 205: 206: 207: 208: 209: 210: 211: 212:
213: public static function strpos($haystack, $needle, $offset = 0) {
214: if (Multibyte::checkMultibyte($haystack)) {
215: $found = false;
216:
217: $haystack = Multibyte::utf8($haystack);
218: $haystackCount = count($haystack);
219:
220: $needle = Multibyte::utf8($needle);
221: $needleCount = count($needle);
222:
223: $position = $offset;
224:
225: while (($found === false) && ($position < $haystackCount)) {
226: if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
227: for ($i = 1; $i < $needleCount; $i++) {
228: if ($needle[$i] !== $haystack[$position + $i]) {
229: break;
230: }
231: }
232: if ($i === $needleCount) {
233: $found = true;
234: $position--;
235: }
236: }
237: $position++;
238: }
239: if ($found) {
240: return $position;
241: }
242: return false;
243: }
244: return strpos($haystack, $needle, $offset);
245: }
246:
247: 248: 249: 250: 251: 252: 253: 254: 255: 256: 257:
258: public static function strrchr($haystack, $needle, $part = false) {
259: $check = Multibyte::utf8($haystack);
260: $found = false;
261:
262: $haystack = Multibyte::utf8($haystack);
263: $haystackCount = count($haystack);
264:
265: $matches = array_count_values($check);
266:
267: $needle = Multibyte::utf8($needle);
268: $needleCount = count($needle);
269:
270: $parts = array();
271: $position = 0;
272:
273: while (($found === false) && ($position < $haystackCount)) {
274: if (isset($needle[0]) && $needle[0] === $check[$position]) {
275: for ($i = 1; $i < $needleCount; $i++) {
276: if ($needle[$i] !== $check[$position + $i]) {
277: if ($needle[$i] === $check[($position + $i) - 1]) {
278: $found = true;
279: }
280: unset($parts[$position - 1]);
281: $haystack = array_merge(array($haystack[$position]), $haystack);
282: break;
283: }
284: }
285: if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
286: $matches[$needle[0]] = $matches[$needle[0]] - 1;
287: } elseif ($i === $needleCount) {
288: $found = true;
289: }
290: }
291:
292: if (!$found && isset($haystack[$position])) {
293: $parts[] = $haystack[$position];
294: unset($haystack[$position]);
295: }
296: $position++;
297: }
298:
299: if ($found && $part && !empty($parts)) {
300: return Multibyte::ascii($parts);
301: } elseif ($found && !empty($haystack)) {
302: return Multibyte::ascii($haystack);
303: }
304: return false;
305: }
306:
307: 308: 309: 310: 311: 312: 313: 314: 315: 316: 317:
318: public static function strrichr($haystack, $needle, $part = false) {
319: $check = Multibyte::strtoupper($haystack);
320: $check = Multibyte::utf8($check);
321: $found = false;
322:
323: $haystack = Multibyte::utf8($haystack);
324: $haystackCount = count($haystack);
325:
326: $matches = array_count_values($check);
327:
328: $needle = Multibyte::strtoupper($needle);
329: $needle = Multibyte::utf8($needle);
330: $needleCount = count($needle);
331:
332: $parts = array();
333: $position = 0;
334:
335: while (($found === false) && ($position < $haystackCount)) {
336: if (isset($needle[0]) && $needle[0] === $check[$position]) {
337: for ($i = 1; $i < $needleCount; $i++) {
338: if ($needle[$i] !== $check[$position + $i]) {
339: if ($needle[$i] === $check[($position + $i) - 1]) {
340: $found = true;
341: }
342: unset($parts[$position - 1]);
343: $haystack = array_merge(array($haystack[$position]), $haystack);
344: break;
345: }
346: }
347: if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
348: $matches[$needle[0]] = $matches[$needle[0]] - 1;
349: } elseif ($i === $needleCount) {
350: $found = true;
351: }
352: }
353:
354: if (!$found && isset($haystack[$position])) {
355: $parts[] = $haystack[$position];
356: unset($haystack[$position]);
357: }
358: $position++;
359: }
360:
361: if ($found && $part && !empty($parts)) {
362: return Multibyte::ascii($parts);
363: } elseif ($found && !empty($haystack)) {
364: return Multibyte::ascii($haystack);
365: }
366: return false;
367: }
368:
369: 370: 371: 372: 373: 374: 375: 376: 377:
378: public static function strripos($haystack, $needle, $offset = 0) {
379: if (Multibyte::checkMultibyte($haystack)) {
380: $found = false;
381: $haystack = Multibyte::strtoupper($haystack);
382: $haystack = Multibyte::utf8($haystack);
383: $haystackCount = count($haystack);
384:
385: $matches = array_count_values($haystack);
386:
387: $needle = Multibyte::strtoupper($needle);
388: $needle = Multibyte::utf8($needle);
389: $needleCount = count($needle);
390:
391: $position = $offset;
392:
393: while (($found === false) && ($position < $haystackCount)) {
394: if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
395: for ($i = 1; $i < $needleCount; $i++) {
396: if ($needle[$i] !== $haystack[$position + $i]) {
397: if ($needle[$i] === $haystack[($position + $i) - 1]) {
398: $position--;
399: $found = true;
400: continue;
401: }
402: }
403: }
404:
405: if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
406: $matches[$needle[0]] = $matches[$needle[0]] - 1;
407: } elseif ($i === $needleCount) {
408: $found = true;
409: $position--;
410: }
411: }
412: $position++;
413: }
414: return ($found) ? $position : false;
415: }
416: return strripos($haystack, $needle, $offset);
417: }
418:
419: 420: 421: 422: 423: 424: 425: 426: 427: 428:
429: public static function strrpos($haystack, $needle, $offset = 0) {
430: if (Multibyte::checkMultibyte($haystack)) {
431: $found = false;
432:
433: $haystack = Multibyte::utf8($haystack);
434: $haystackCount = count($haystack);
435:
436: $matches = array_count_values($haystack);
437:
438: $needle = Multibyte::utf8($needle);
439: $needleCount = count($needle);
440:
441: $position = $offset;
442:
443: while (($found === false) && ($position < $haystackCount)) {
444: if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
445: for ($i = 1; $i < $needleCount; $i++) {
446: if ($needle[$i] !== $haystack[$position + $i]) {
447: if ($needle[$i] === $haystack[($position + $i) - 1]) {
448: $position--;
449: $found = true;
450: continue;
451: }
452: }
453: }
454:
455: if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
456: $matches[$needle[0]] = $matches[$needle[0]] - 1;
457: } elseif ($i === $needleCount) {
458: $found = true;
459: $position--;
460: }
461: }
462: $position++;
463: }
464: return ($found) ? $position : false;
465: }
466: return strrpos($haystack, $needle, $offset);
467: }
468:
469: 470: 471: 472: 473: 474: 475: 476: 477: 478: 479:
480: public static function strstr($haystack, $needle, $part = false) {
481: $php = (PHP_VERSION < 5.3);
482:
483: if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
484: $check = Multibyte::utf8($haystack);
485: $found = false;
486:
487: $haystack = Multibyte::utf8($haystack);
488: $haystackCount = count($haystack);
489:
490: $needle = Multibyte::utf8($needle);
491: $needleCount = count($needle);
492:
493: $parts = array();
494: $position = 0;
495:
496: while (($found === false) && ($position < $haystackCount)) {
497: if (isset($needle[0]) && $needle[0] === $check[$position]) {
498: for ($i = 1; $i < $needleCount; $i++) {
499: if ($needle[$i] !== $check[$position + $i]) {
500: break;
501: }
502: }
503: if ($i === $needleCount) {
504: $found = true;
505: }
506: }
507: if (!$found) {
508: $parts[] = $haystack[$position];
509: unset($haystack[$position]);
510: }
511: $position++;
512: }
513:
514: if ($found && $part && !empty($parts)) {
515: return Multibyte::ascii($parts);
516: } elseif ($found && !empty($haystack)) {
517: return Multibyte::ascii($haystack);
518: }
519: return false;
520: }
521:
522: if (!$php) {
523: return strstr($haystack, $needle, $part);
524: }
525: return strstr($haystack, $needle);
526: }
527:
528: 529: 530: 531: 532: 533:
534: public static function strtolower($string) {
535: $utf8Map = Multibyte::utf8($string);
536:
537: $length = count($utf8Map);
538: $lowerCase = array();
539:
540: for ($i = 0; $i < $length; $i++) {
541: $char = $utf8Map[$i];
542:
543: if ($char < 128) {
544: $str = strtolower(chr($char));
545: $strlen = strlen($str);
546: for ($ii = 0; $ii < $strlen; $ii++) {
547: $lower = ord(substr($str, $ii, 1));
548: }
549: $lowerCase[] = $lower;
550: $matched = true;
551: } else {
552: $matched = false;
553: $keys = self::_find($char, 'upper');
554:
555: if (!empty($keys)) {
556: foreach ($keys as $key => $value) {
557: if ($keys[$key]['upper'] == $char && count($keys[$key]['lower'][0]) === 1) {
558: $lowerCase[] = $keys[$key]['lower'][0];
559: $matched = true;
560: break 1;
561: }
562: }
563: }
564: }
565: if ($matched === false) {
566: $lowerCase[] = $char;
567: }
568: }
569: return Multibyte::ascii($lowerCase);
570: }
571:
572: 573: 574: 575: 576: 577:
578: public static function strtoupper($string) {
579: $utf8Map = Multibyte::utf8($string);
580:
581: $length = count($utf8Map);
582: $replaced = array();
583: $upperCase = array();
584:
585: for ($i = 0; $i < $length; $i++) {
586: $char = $utf8Map[$i];
587:
588: if ($char < 128) {
589: $str = strtoupper(chr($char));
590: $strlen = strlen($str);
591: for ($ii = 0; $ii < $strlen; $ii++) {
592: $upper = ord(substr($str, $ii, 1));
593: }
594: $upperCase[] = $upper;
595: $matched = true;
596:
597: } else {
598: $matched = false;
599: $keys = self::_find($char);
600: $keyCount = count($keys);
601:
602: if (!empty($keys)) {
603: foreach ($keys as $key => $value) {
604: $matched = false;
605: $replace = 0;
606: if ($length > 1 && count($keys[$key]['lower']) > 1) {
607: $j = 0;
608:
609: for ($ii = 0, $count = count($keys[$key]['lower']); $ii < $count; $ii++) {
610: $nextChar = $utf8Map[$i + $ii];
611:
612: if (isset($nextChar) && ($nextChar == $keys[$key]['lower'][$j + $ii])) {
613: $replace++;
614: }
615: }
616: if ($replace == $count) {
617: $upperCase[] = $keys[$key]['upper'];
618: $replaced = array_merge($replaced, array_values($keys[$key]['lower']));
619: $matched = true;
620: break 1;
621: }
622: } elseif ($length > 1 && $keyCount > 1) {
623: $j = 0;
624: for ($ii = 1; $ii < $keyCount; $ii++) {
625: $nextChar = $utf8Map[$i + $ii - 1];
626:
627: if (in_array($nextChar, $keys[$ii]['lower'])) {
628:
629: for ($jj = 0, $count = count($keys[$ii]['lower']); $jj < $count; $jj++) {
630: $nextChar = $utf8Map[$i + $jj];
631:
632: if (isset($nextChar) && ($nextChar == $keys[$ii]['lower'][$j + $jj])) {
633: $replace++;
634: }
635: }
636: if ($replace == $count) {
637: $upperCase[] = $keys[$ii]['upper'];
638: $replaced = array_merge($replaced, array_values($keys[$ii]['lower']));
639: $matched = true;
640: break 2;
641: }
642: }
643: }
644: }
645: if ($keys[$key]['lower'][0] == $char) {
646: $upperCase[] = $keys[$key]['upper'];
647: $matched = true;
648: break 1;
649: }
650: }
651: }
652: }
653: if ($matched === false && !in_array($char, $replaced, true)) {
654: $upperCase[] = $char;
655: }
656: }
657: return Multibyte::ascii($upperCase);
658: }
659:
660: 661: 662: 663: 664: 665: 666:
667: public static function substrCount($haystack, $needle) {
668: $count = 0;
669: $haystack = Multibyte::utf8($haystack);
670: $haystackCount = count($haystack);
671: $matches = array_count_values($haystack);
672: $needle = Multibyte::utf8($needle);
673: $needleCount = count($needle);
674:
675: if ($needleCount === 1 && isset($matches[$needle[0]])) {
676: return $matches[$needle[0]];
677: }
678:
679: for ($i = 0; $i < $haystackCount; $i++) {
680: if (isset($needle[0]) && $needle[0] === $haystack[$i]) {
681: for ($ii = 1; $ii < $needleCount; $ii++) {
682: if ($needle[$ii] === $haystack[$i + 1]) {
683: if ((isset($needle[$ii + 1]) && $haystack[$i + 2]) && $needle[$ii + 1] !== $haystack[$i + 2]) {
684: $count--;
685: } else {
686: $count++;
687: }
688: }
689: }
690: }
691: }
692: return $count;
693: }
694:
695: 696: 697: 698: 699: 700: 701: 702:
703: public static function substr($string, $start, $length = null) {
704: if ($start === 0 && $length === null) {
705: return $string;
706: }
707:
708: $string = Multibyte::utf8($string);
709:
710: for ($i = 1; $i <= $start; $i++) {
711: unset($string[$i - 1]);
712: }
713:
714: if ($length === null || count($string) < $length) {
715: return Multibyte::ascii($string);
716: }
717: $string = array_values($string);
718:
719: $value = array();
720: for ($i = 0; $i < $length; $i++) {
721: $value[] = $string[$i];
722: }
723: return Multibyte::ascii($value);
724: }
725:
726: 727: 728: 729: 730: 731: 732: 733:
734: public static function mimeEncode($string, $charset = null, $newline = "\r\n") {
735: if (!Multibyte::checkMultibyte($string) && strlen($string) < 75) {
736: return $string;
737: }
738:
739: if (empty($charset)) {
740: $charset = Configure::read('App.encoding');
741: }
742: $charset = strtoupper($charset);
743:
744: $start = '=?' . $charset . '?B?';
745: $end = '?=';
746: $spacer = $end . $newline . ' ' . $start;
747:
748: $length = 75 - strlen($start) - strlen($end);
749: $length = $length - ($length % 4);
750: if ($charset === 'UTF-8') {
751: $parts = array();
752: $maxchars = floor(($length * 3) / 4);
753: $stringLength = strlen($string);
754: while ($stringLength > $maxchars) {
755: $i = (int)$maxchars;
756: $test = ord($string[$i]);
757: while ($test >= 128 && $test <= 191) {
758: $i--;
759: $test = ord($string[$i]);
760: }
761: $parts[] = base64_encode(substr($string, 0, $i));
762: $string = substr($string, $i);
763: $stringLength = strlen($string);
764: }
765: $parts[] = base64_encode($string);
766: $string = implode($spacer, $parts);
767: } else {
768: $string = chunk_split(base64_encode($string), $length, $spacer);
769: $string = preg_replace('/' . preg_quote($spacer) . '$/', '', $string);
770: }
771: return $start . $string . $end;
772: }
773:
774: 775: 776: 777: 778: 779:
780: protected static function _codepoint($decimal) {
781: if ($decimal > 128 && $decimal < 256) {
782: $return = '0080_00ff';
783: } elseif ($decimal < 384) {
784: $return = '0100_017f';
785: } elseif ($decimal < 592) {
786: $return = '0180_024F';
787: } elseif ($decimal < 688) {
788: $return = '0250_02af';
789: } elseif ($decimal >= 880 && $decimal < 1024) {
790: $return = '0370_03ff';
791: } elseif ($decimal < 1280) {
792: $return = '0400_04ff';
793: } elseif ($decimal < 1328) {
794: $return = '0500_052f';
795: } elseif ($decimal < 1424) {
796: $return = '0530_058f';
797: } elseif ($decimal >= 7680 && $decimal < 7936) {
798: $return = '1e00_1eff';
799: } elseif ($decimal < 8192) {
800: $return = '1f00_1fff';
801: } elseif ($decimal >= 8448 && $decimal < 8528) {
802: $return = '2100_214f';
803: } elseif ($decimal < 8592) {
804: $return = '2150_218f';
805: } elseif ($decimal >= 9312 && $decimal < 9472) {
806: $return = '2460_24ff';
807: } elseif ($decimal >= 11264 && $decimal < 11360) {
808: $return = '2c00_2c5f';
809: } elseif ($decimal < 11392) {
810: $return = '2c60_2c7f';
811: } elseif ($decimal < 11520) {
812: $return = '2c80_2cff';
813: } elseif ($decimal >= 65280 && $decimal < 65520) {
814: $return = 'ff00_ffef';
815: } else {
816: $return = false;
817: }
818: self::$_codeRange[$decimal] = $return;
819: return $return;
820: }
821:
822: 823: 824: 825: 826: 827: 828:
829: protected static function _find($char, $type = 'lower') {
830: $found = array();
831: if (!isset(self::$_codeRange[$char])) {
832: $range = self::_codepoint($char);
833: if ($range === false) {
834: return null;
835: }
836: if (!Configure::configured('_cake_core_')) {
837: App::uses('PhpReader', 'Configure');
838: Configure::config('_cake_core_', new PhpReader(CAKE . 'Config' . DS));
839: }
840: Configure::load('unicode' . DS . 'casefolding' . DS . $range, '_cake_core_');
841: self::$_caseFold[$range] = Configure::read($range);
842: Configure::delete($range);
843: }
844:
845: if (!self::$_codeRange[$char]) {
846: return null;
847: }
848: self::$_table = self::$_codeRange[$char];
849: $count = count(self::$_caseFold[self::$_table]);
850:
851: for ($i = 0; $i < $count; $i++) {
852: if ($type === 'lower' && self::$_caseFold[self::$_table][$i][$type][0] === $char) {
853: $found[] = self::$_caseFold[self::$_table][$i];
854: } elseif ($type === 'upper' && self::$_caseFold[self::$_table][$i][$type] === $char) {
855: $found[] = self::$_caseFold[self::$_table][$i];
856: }
857: }
858: return $found;
859: }
860:
861: 862: 863: 864: 865:
866: public static function checkMultibyte($string) {
867: $length = strlen($string);
868:
869: for ($i = 0; $i < $length; $i++) {
870: $value = ord(($string[$i]));
871: if ($value > 128) {
872: return true;
873: }
874: }
875: return false;
876: }
877:
878: }
879: