1: <?php
2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20:
21:
22: 23: 24: 25: 26:
27: class Multibyte {
28:
29: 30: 31: 32: 33:
34: protected static $_caseFold = array();
35:
36: 37: 38: 39: 40:
41: protected static $_codeRange = array();
42:
43: 44: 45: 46: 47:
48: protected static $_table = null;
49:
50: 51: 52: 53: 54: 55: 56:
57: public static function utf8($string) {
58: $map = array();
59:
60: $values = array();
61: $find = 1;
62: $length = strlen($string);
63:
64: for ($i = 0; $i < $length; $i++) {
65: $value = ord($string[$i]);
66:
67: if ($value < 128) {
68: $map[] = $value;
69: } else {
70: if (empty($values)) {
71: $find = ($value < 224) ? 2 : 3;
72: }
73: $values[] = $value;
74:
75: if (count($values) === $find) {
76: if ($find == 3) {
77: $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
78: } else {
79: $map[] = (($values[0] % 32) * 64) + ($values[1] % 64);
80: }
81: $values = array();
82: $find = 1;
83: }
84: }
85: }
86: return $map;
87: }
88:
89: 90: 91: 92: 93: 94: 95:
96: public static function ascii($array) {
97: $ascii = '';
98:
99: foreach ($array as $utf8) {
100: if ($utf8 < 128) {
101: $ascii .= chr($utf8);
102: } elseif ($utf8 < 2048) {
103: $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64));
104: $ascii .= chr(128 + ($utf8 % 64));
105: } else {
106: $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096));
107: $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64));
108: $ascii .= chr(128 + ($utf8 % 64));
109: }
110: }
111: return $ascii;
112: }
113:
114: 115: 116: 117: 118: 119: 120: 121: 122:
123: public static function stripos($haystack, $needle, $offset = 0) {
124: if (Multibyte::checkMultibyte($haystack)) {
125: $haystack = Multibyte::strtoupper($haystack);
126: $needle = Multibyte::strtoupper($needle);
127: return Multibyte::strpos($haystack, $needle, $offset);
128: }
129: return stripos($haystack, $needle, $offset);
130: }
131:
132: 133: 134: 135: 136: 137: 138: 139: 140: 141: 142:
143: public static function stristr($haystack, $needle, $part = false) {
144: $php = (PHP_VERSION < 5.3);
145:
146: if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
147: $check = Multibyte::strtoupper($haystack);
148: $check = Multibyte::utf8($check);
149: $found = false;
150:
151: $haystack = Multibyte::utf8($haystack);
152: $haystackCount = count($haystack);
153:
154: $needle = Multibyte::strtoupper($needle);
155: $needle = Multibyte::utf8($needle);
156: $needleCount = count($needle);
157:
158: $parts = array();
159: $position = 0;
160:
161: while (($found === false) && ($position < $haystackCount)) {
162: if (isset($needle[0]) && $needle[0] === $check[$position]) {
163: for ($i = 1; $i < $needleCount; $i++) {
164: if ($needle[$i] !== $check[$position + $i]) {
165: break;
166: }
167: }
168: if ($i === $needleCount) {
169: $found = true;
170: }
171: }
172: if (!$found) {
173: $parts[] = $haystack[$position];
174: unset($haystack[$position]);
175: }
176: $position++;
177: }
178:
179: if ($found && $part && !empty($parts)) {
180: return Multibyte::ascii($parts);
181: } elseif ($found && !empty($haystack)) {
182: return Multibyte::ascii($haystack);
183: }
184: return false;
185: }
186:
187: if (!$php) {
188: return stristr($haystack, $needle, $part);
189: }
190: return stristr($haystack, $needle);
191: }
192:
193: 194: 195: 196: 197: 198:
199: public static function strlen($string) {
200: if (Multibyte::checkMultibyte($string)) {
201: $string = Multibyte::utf8($string);
202: return count($string);
203: }
204: return strlen($string);
205: }
206:
207: 208: 209: 210: 211: 212: 213: 214: 215:
216: public static function strpos($haystack, $needle, $offset = 0) {
217: if (Multibyte::checkMultibyte($haystack)) {
218: $found = false;
219:
220: $haystack = Multibyte::utf8($haystack);
221: $haystackCount = count($haystack);
222:
223: $needle = Multibyte::utf8($needle);
224: $needleCount = count($needle);
225:
226: $position = $offset;
227:
228: while (($found === false) && ($position < $haystackCount)) {
229: if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
230: for ($i = 1; $i < $needleCount; $i++) {
231: if ($needle[$i] !== $haystack[$position + $i]) {
232: break;
233: }
234: }
235: if ($i === $needleCount) {
236: $found = true;
237: $position--;
238: }
239: }
240: $position++;
241: }
242: if ($found) {
243: return $position;
244: }
245: return false;
246: }
247: return strpos($haystack, $needle, $offset);
248: }
249:
250: 251: 252: 253: 254: 255: 256: 257: 258: 259: 260:
261: public static function strrchr($haystack, $needle, $part = false) {
262: $check = Multibyte::utf8($haystack);
263: $found = false;
264:
265: $haystack = Multibyte::utf8($haystack);
266: $haystackCount = count($haystack);
267:
268: $matches = array_count_values($check);
269:
270: $needle = Multibyte::utf8($needle);
271: $needleCount = count($needle);
272:
273: $parts = array();
274: $position = 0;
275:
276: while (($found === false) && ($position < $haystackCount)) {
277: if (isset($needle[0]) && $needle[0] === $check[$position]) {
278: for ($i = 1; $i < $needleCount; $i++) {
279: if ($needle[$i] !== $check[$position + $i]) {
280: if ($needle[$i] === $check[($position + $i) - 1]) {
281: $found = true;
282: }
283: unset($parts[$position - 1]);
284: $haystack = array_merge(array($haystack[$position]), $haystack);
285: break;
286: }
287: }
288: if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
289: $matches[$needle[0]] = $matches[$needle[0]] - 1;
290: } elseif ($i === $needleCount) {
291: $found = true;
292: }
293: }
294:
295: if (!$found && isset($haystack[$position])) {
296: $parts[] = $haystack[$position];
297: unset($haystack[$position]);
298: }
299: $position++;
300: }
301:
302: if ($found && $part && !empty($parts)) {
303: return Multibyte::ascii($parts);
304: } elseif ($found && !empty($haystack)) {
305: return Multibyte::ascii($haystack);
306: }
307: return false;
308: }
309:
310: 311: 312: 313: 314: 315: 316: 317: 318: 319: 320:
321: public static function strrichr($haystack, $needle, $part = false) {
322: $check = Multibyte::strtoupper($haystack);
323: $check = Multibyte::utf8($check);
324: $found = false;
325:
326: $haystack = Multibyte::utf8($haystack);
327: $haystackCount = count($haystack);
328:
329: $matches = array_count_values($check);
330:
331: $needle = Multibyte::strtoupper($needle);
332: $needle = Multibyte::utf8($needle);
333: $needleCount = count($needle);
334:
335: $parts = array();
336: $position = 0;
337:
338: while (($found === false) && ($position < $haystackCount)) {
339: if (isset($needle[0]) && $needle[0] === $check[$position]) {
340: for ($i = 1; $i < $needleCount; $i++) {
341: if ($needle[$i] !== $check[$position + $i]) {
342: if ($needle[$i] === $check[($position + $i) - 1]) {
343: $found = true;
344: }
345: unset($parts[$position - 1]);
346: $haystack = array_merge(array($haystack[$position]), $haystack);
347: break;
348: }
349: }
350: if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
351: $matches[$needle[0]] = $matches[$needle[0]] - 1;
352: } elseif ($i === $needleCount) {
353: $found = true;
354: }
355: }
356:
357: if (!$found && isset($haystack[$position])) {
358: $parts[] = $haystack[$position];
359: unset($haystack[$position]);
360: }
361: $position++;
362: }
363:
364: if ($found && $part && !empty($parts)) {
365: return Multibyte::ascii($parts);
366: } elseif ($found && !empty($haystack)) {
367: return Multibyte::ascii($haystack);
368: }
369: return false;
370: }
371:
372: 373: 374: 375: 376: 377: 378: 379: 380:
381: public static function strripos($haystack, $needle, $offset = 0) {
382: if (Multibyte::checkMultibyte($haystack)) {
383: $found = false;
384: $haystack = Multibyte::strtoupper($haystack);
385: $haystack = Multibyte::utf8($haystack);
386: $haystackCount = count($haystack);
387:
388: $matches = array_count_values($haystack);
389:
390: $needle = Multibyte::strtoupper($needle);
391: $needle = Multibyte::utf8($needle);
392: $needleCount = count($needle);
393:
394: $position = $offset;
395:
396: while (($found === false) && ($position < $haystackCount)) {
397: if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
398: for ($i = 1; $i < $needleCount; $i++) {
399: if ($needle[$i] !== $haystack[$position + $i]) {
400: if ($needle[$i] === $haystack[($position + $i) - 1]) {
401: $position--;
402: $found = true;
403: continue;
404: }
405: }
406: }
407:
408: if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
409: $matches[$needle[0]] = $matches[$needle[0]] - 1;
410: } elseif ($i === $needleCount) {
411: $found = true;
412: $position--;
413: }
414: }
415: $position++;
416: }
417: return ($found) ? $position : false;
418: }
419: return strripos($haystack, $needle, $offset);
420: }
421:
422: 423: 424: 425: 426: 427: 428: 429: 430: 431:
432: public static function strrpos($haystack, $needle, $offset = 0) {
433: if (Multibyte::checkMultibyte($haystack)) {
434: $found = false;
435:
436: $haystack = Multibyte::utf8($haystack);
437: $haystackCount = count($haystack);
438:
439: $matches = array_count_values($haystack);
440:
441: $needle = Multibyte::utf8($needle);
442: $needleCount = count($needle);
443:
444: $position = $offset;
445:
446: while (($found === false) && ($position < $haystackCount)) {
447: if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
448: for ($i = 1; $i < $needleCount; $i++) {
449: if ($needle[$i] !== $haystack[$position + $i]) {
450: if ($needle[$i] === $haystack[($position + $i) - 1]) {
451: $position--;
452: $found = true;
453: continue;
454: }
455: }
456: }
457:
458: if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
459: $matches[$needle[0]] = $matches[$needle[0]] - 1;
460: } elseif ($i === $needleCount) {
461: $found = true;
462: $position--;
463: }
464: }
465: $position++;
466: }
467: return ($found) ? $position : false;
468: }
469: return strrpos($haystack, $needle, $offset);
470: }
471:
472: 473: 474: 475: 476: 477: 478: 479: 480: 481: 482:
483: public static function strstr($haystack, $needle, $part = false) {
484: $php = (PHP_VERSION < 5.3);
485:
486: if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
487: $check = Multibyte::utf8($haystack);
488: $found = false;
489:
490: $haystack = Multibyte::utf8($haystack);
491: $haystackCount = count($haystack);
492:
493: $needle = Multibyte::utf8($needle);
494: $needleCount = count($needle);
495:
496: $parts = array();
497: $position = 0;
498:
499: while (($found === false) && ($position < $haystackCount)) {
500: if (isset($needle[0]) && $needle[0] === $check[$position]) {
501: for ($i = 1; $i < $needleCount; $i++) {
502: if ($needle[$i] !== $check[$position + $i]) {
503: break;
504: }
505: }
506: if ($i === $needleCount) {
507: $found = true;
508: }
509: }
510: if (!$found) {
511: $parts[] = $haystack[$position];
512: unset($haystack[$position]);
513: }
514: $position++;
515: }
516:
517: if ($found && $part && !empty($parts)) {
518: return Multibyte::ascii($parts);
519: } elseif ($found && !empty($haystack)) {
520: return Multibyte::ascii($haystack);
521: }
522: return false;
523: }
524:
525: if (!$php) {
526: return strstr($haystack, $needle, $part);
527: }
528: return strstr($haystack, $needle);
529: }
530:
531: 532: 533: 534: 535: 536:
537: public static function strtolower($string) {
538: $utf8Map = Multibyte::utf8($string);
539:
540: $length = count($utf8Map);
541: $lowerCase = array();
542:
543: for ($i = 0; $i < $length; $i++) {
544: $char = $utf8Map[$i];
545:
546: if ($char < 128) {
547: $str = strtolower(chr($char));
548: $strlen = strlen($str);
549: for ($ii = 0; $ii < $strlen; $ii++) {
550: $lower = ord(substr($str, $ii, 1));
551: }
552: $lowerCase[] = $lower;
553: $matched = true;
554: } else {
555: $matched = false;
556: $keys = self::_find($char, 'upper');
557:
558: if (!empty($keys)) {
559: foreach ($keys as $key => $value) {
560: if ($keys[$key]['upper'] == $char && count($keys[$key]['lower'][0]) === 1) {
561: $lowerCase[] = $keys[$key]['lower'][0];
562: $matched = true;
563: break 1;
564: }
565: }
566: }
567: }
568: if ($matched === false) {
569: $lowerCase[] = $char;
570: }
571: }
572: return Multibyte::ascii($lowerCase);
573: }
574:
575: 576: 577: 578: 579: 580:
581: public static function strtoupper($string) {
582: $utf8Map = Multibyte::utf8($string);
583:
584: $length = count($utf8Map);
585: $replaced = array();
586: $upperCase = array();
587:
588: for ($i = 0; $i < $length; $i++) {
589: $char = $utf8Map[$i];
590:
591: if ($char < 128) {
592: $str = strtoupper(chr($char));
593: $strlen = strlen($str);
594: for ($ii = 0; $ii < $strlen; $ii++) {
595: $upper = ord(substr($str, $ii, 1));
596: }
597: $upperCase[] = $upper;
598: $matched = true;
599:
600: } else {
601: $matched = false;
602: $keys = self::_find($char);
603: $keyCount = count($keys);
604:
605: if (!empty($keys)) {
606: foreach ($keys as $key => $value) {
607: $matched = false;
608: $replace = 0;
609: if ($length > 1 && count($keys[$key]['lower']) > 1) {
610: $j = 0;
611:
612: for ($ii = 0, $count = count($keys[$key]['lower']); $ii < $count; $ii++) {
613: $nextChar = $utf8Map[$i + $ii];
614:
615: if (isset($nextChar) && ($nextChar == $keys[$key]['lower'][$j + $ii])) {
616: $replace++;
617: }
618: }
619: if ($replace == $count) {
620: $upperCase[] = $keys[$key]['upper'];
621: $replaced = array_merge($replaced, array_values($keys[$key]['lower']));
622: $matched = true;
623: break 1;
624: }
625: } elseif ($length > 1 && $keyCount > 1) {
626: $j = 0;
627: for ($ii = 1; $ii < $keyCount; $ii++) {
628: $nextChar = $utf8Map[$i + $ii - 1];
629:
630: if (in_array($nextChar, $keys[$ii]['lower'])) {
631:
632: for ($jj = 0, $count = count($keys[$ii]['lower']); $jj < $count; $jj++) {
633: $nextChar = $utf8Map[$i + $jj];
634:
635: if (isset($nextChar) && ($nextChar == $keys[$ii]['lower'][$j + $jj])) {
636: $replace++;
637: }
638: }
639: if ($replace == $count) {
640: $upperCase[] = $keys[$ii]['upper'];
641: $replaced = array_merge($replaced, array_values($keys[$ii]['lower']));
642: $matched = true;
643: break 2;
644: }
645: }
646: }
647: }
648: if ($keys[$key]['lower'][0] == $char) {
649: $upperCase[] = $keys[$key]['upper'];
650: $matched = true;
651: break 1;
652: }
653: }
654: }
655: }
656: if ($matched === false && !in_array($char, $replaced, true)) {
657: $upperCase[] = $char;
658: }
659: }
660: return Multibyte::ascii($upperCase);
661: }
662:
663: 664: 665: 666: 667: 668: 669:
670: public static function substrCount($haystack, $needle) {
671: $count = 0;
672: $haystack = Multibyte::utf8($haystack);
673: $haystackCount = count($haystack);
674: $matches = array_count_values($haystack);
675: $needle = Multibyte::utf8($needle);
676: $needleCount = count($needle);
677:
678: if ($needleCount === 1 && isset($matches[$needle[0]])) {
679: return $matches[$needle[0]];
680: }
681:
682: for ($i = 0; $i < $haystackCount; $i++) {
683: if (isset($needle[0]) && $needle[0] === $haystack[$i]) {
684: for ($ii = 1; $ii < $needleCount; $ii++) {
685: if ($needle[$ii] === $haystack[$i + 1]) {
686: if ((isset($needle[$ii + 1]) && $haystack[$i + 2]) && $needle[$ii + 1] !== $haystack[$i + 2]) {
687: $count--;
688: } else {
689: $count++;
690: }
691: }
692: }
693: }
694: }
695: return $count;
696: }
697:
698: 699: 700: 701: 702: 703: 704: 705:
706: public static function substr($string, $start, $length = null) {
707: if ($start === 0 && $length === null) {
708: return $string;
709: }
710:
711: $string = Multibyte::utf8($string);
712:
713: for ($i = 1; $i <= $start; $i++) {
714: unset($string[$i - 1]);
715: }
716:
717: if ($length === null || count($string) < $length) {
718: return Multibyte::ascii($string);
719: }
720: $string = array_values($string);
721:
722: $value = array();
723: for ($i = 0; $i < $length; $i++) {
724: $value[] = $string[$i];
725: }
726: return Multibyte::ascii($value);
727: }
728:
729: 730: 731: 732: 733: 734: 735: 736:
737: public static function mimeEncode($string, $charset = null, $newline = "\r\n") {
738: if (!Multibyte::checkMultibyte($string) && strlen($string) < 75) {
739: return $string;
740: }
741:
742: if (empty($charset)) {
743: $charset = Configure::read('App.encoding');
744: }
745: $charset = strtoupper($charset);
746:
747: $start = '=?' . $charset . '?B?';
748: $end = '?=';
749: $spacer = $end . $newline . ' ' . $start;
750:
751: $length = 75 - strlen($start) - strlen($end);
752: $length = $length - ($length % 4);
753: if ($charset === 'UTF-8') {
754: $parts = array();
755: $maxchars = floor(($length * 3) / 4);
756: $stringLength = strlen($string);
757: while ($stringLength > $maxchars) {
758: $i = (int)$maxchars;
759: $test = ord($string[$i]);
760: while ($test >= 128 && $test <= 191) {
761: $i--;
762: $test = ord($string[$i]);
763: }
764: $parts[] = base64_encode(substr($string, 0, $i));
765: $string = substr($string, $i);
766: $stringLength = strlen($string);
767: }
768: $parts[] = base64_encode($string);
769: $string = implode($spacer, $parts);
770: } else {
771: $string = chunk_split(base64_encode($string), $length, $spacer);
772: $string = preg_replace('/' . preg_quote($spacer) . '$/', '', $string);
773: }
774: return $start . $string . $end;
775: }
776:
777: 778: 779: 780: 781: 782:
783: protected static function _codepoint($decimal) {
784: if ($decimal > 128 && $decimal < 256) {
785: $return = '0080_00ff';
786: } elseif ($decimal < 384) {
787: $return = '0100_017f';
788: } elseif ($decimal < 592) {
789: $return = '0180_024F';
790: } elseif ($decimal < 688) {
791: $return = '0250_02af';
792: } elseif ($decimal >= 880 && $decimal < 1024) {
793: $return = '0370_03ff';
794: } elseif ($decimal < 1280) {
795: $return = '0400_04ff';
796: } elseif ($decimal < 1328) {
797: $return = '0500_052f';
798: } elseif ($decimal < 1424) {
799: $return = '0530_058f';
800: } elseif ($decimal >= 7680 && $decimal < 7936) {
801: $return = '1e00_1eff';
802: } elseif ($decimal < 8192) {
803: $return = '1f00_1fff';
804: } elseif ($decimal >= 8448 && $decimal < 8528) {
805: $return = '2100_214f';
806: } elseif ($decimal < 8592) {
807: $return = '2150_218f';
808: } elseif ($decimal >= 9312 && $decimal < 9472) {
809: $return = '2460_24ff';
810: } elseif ($decimal >= 11264 && $decimal < 11360) {
811: $return = '2c00_2c5f';
812: } elseif ($decimal < 11392) {
813: $return = '2c60_2c7f';
814: } elseif ($decimal < 11520) {
815: $return = '2c80_2cff';
816: } elseif ($decimal >= 65280 && $decimal < 65520) {
817: $return = 'ff00_ffef';
818: } else {
819: $return = false;
820: }
821: self::$_codeRange[$decimal] = $return;
822: return $return;
823: }
824:
825: 826: 827: 828: 829: 830: 831:
832: protected static function _find($char, $type = 'lower') {
833: $found = array();
834: if (!isset(self::$_codeRange[$char])) {
835: $range = self::_codepoint($char);
836: if ($range === false) {
837: return null;
838: }
839: if (!Configure::configured('_cake_core_')) {
840: App::uses('PhpReader', 'Configure');
841: Configure::config('_cake_core_', new PhpReader(CAKE . 'Config' . DS));
842: }
843: Configure::load('unicode' . DS . 'casefolding' . DS . $range, '_cake_core_');
844: self::$_caseFold[$range] = Configure::read($range);
845: Configure::delete($range);
846: }
847:
848: if (!self::$_codeRange[$char]) {
849: return null;
850: }
851: self::$_table = self::$_codeRange[$char];
852: $count = count(self::$_caseFold[self::$_table]);
853:
854: for ($i = 0; $i < $count; $i++) {
855: if ($type === 'lower' && self::$_caseFold[self::$_table][$i][$type][0] === $char) {
856: $found[] = self::$_caseFold[self::$_table][$i];
857: } elseif ($type === 'upper' && self::$_caseFold[self::$_table][$i][$type] === $char) {
858: $found[] = self::$_caseFold[self::$_table][$i];
859: }
860: }
861: return $found;
862: }
863:
864: 865: 866: 867: 868:
869: public static function checkMultibyte($string) {
870: $length = strlen($string);
871:
872: for ($i = 0; $i < $length; $i++) {
873: $value = ord(($string[$i]));
874: if ($value > 128) {
875: return true;
876: }
877: }
878: return false;
879: }
880:
881: }
882: