1: <?php
2: /**
3: * Multibyte handling methods.
4: *
5: *
6: * PHP 5
7: *
8: * CakePHP(tm) : Rapid Development Framework (http://cakephp.org)
9: * Copyright 2005-2012, Cake Software Foundation, Inc. (http://cakefoundation.org)
10: *
11: * Licensed under The MIT License
12: * Redistributions of files must retain the above copyright notice.
13: *
14: * @copyright Copyright 2005-2012, Cake Software Foundation, Inc. (http://cakefoundation.org)
15: * @link http://cakephp.org CakePHP(tm) Project
16: * @package Cake.I18n
17: * @since CakePHP(tm) v 1.2.0.6833
18: * @license MIT License (http://www.opensource.org/licenses/mit-license.php)
19: */
20:
21: if (!function_exists('mb_stripos')) {
22:
23: /**
24: * Find position of first occurrence of a case-insensitive string.
25: *
26: * @param string $haystack The string from which to get the position of the first occurrence of $needle.
27: * @param string $needle The string to find in $haystack.
28: * @param integer $offset The position in $haystack to start searching.
29: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
30: * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string, or false
31: * if $needle is not found.
32: */
33: function mb_stripos($haystack, $needle, $offset = 0, $encoding = null) {
34: return Multibyte::stripos($haystack, $needle, $offset);
35: }
36:
37: }
38:
39: if (!function_exists('mb_stristr')) {
40:
41: /**
42: * Finds first occurrence of a string within another, case insensitive.
43: *
44: * @param string $haystack The string from which to get the first occurrence of $needle.
45: * @param string $needle The string to find in $haystack.
46: * @param boolean $part Determines which portion of $haystack this function returns.
47: * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
48: * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
49: * Default value is false.
50: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
51: * @return string|boolean The portion of $haystack, or false if $needle is not found.
52: */
53: function mb_stristr($haystack, $needle, $part = false, $encoding = null) {
54: return Multibyte::stristr($haystack, $needle, $part);
55: }
56:
57: }
58:
59: if (!function_exists('mb_strlen')) {
60:
61: /**
62: * Get string length.
63: *
64: * @param string $string The string being checked for length.
65: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
66: * @return integer The number of characters in string $string having character encoding encoding.
67: * A multi-byte character is counted as 1.
68: */
69: function mb_strlen($string, $encoding = null) {
70: return Multibyte::strlen($string);
71: }
72:
73: }
74:
75: if (!function_exists('mb_strpos')) {
76:
77: /**
78: * Find position of first occurrence of a string.
79: *
80: * @param string $haystack The string being checked.
81: * @param string $needle The position counted from the beginning of haystack.
82: * @param integer $offset The search offset. If it is not specified, 0 is used.
83: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
84: * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string.
85: * If $needle is not found, it returns false.
86: */
87: function mb_strpos($haystack, $needle, $offset = 0, $encoding = null) {
88: return Multibyte::strpos($haystack, $needle, $offset);
89: }
90:
91: }
92:
93: if (!function_exists('mb_strrchr')) {
94:
95: /**
96: * Finds the last occurrence of a character in a string within another.
97: *
98: * @param string $haystack The string from which to get the last occurrence of $needle.
99: * @param string $needle The string to find in $haystack.
100: * @param boolean $part Determines which portion of $haystack this function returns.
101: * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
102: * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
103: * Default value is false.
104: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
105: * @return string|boolean The portion of $haystack. or false if $needle is not found.
106: */
107: function mb_strrchr($haystack, $needle, $part = false, $encoding = null) {
108: return Multibyte::strrchr($haystack, $needle, $part);
109: }
110:
111: }
112:
113: if (!function_exists('mb_strrichr')) {
114:
115: /**
116: * Finds the last occurrence of a character in a string within another, case insensitive.
117: *
118: * @param string $haystack The string from which to get the last occurrence of $needle.
119: * @param string $needle The string to find in $haystack.
120: * @param boolean $part Determines which portion of $haystack this function returns.
121: * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
122: * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
123: * Default value is false.
124: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
125: * @return string|boolean The portion of $haystack. or false if $needle is not found.
126: */
127: function mb_strrichr($haystack, $needle, $part = false, $encoding = null) {
128: return Multibyte::strrichr($haystack, $needle, $part);
129: }
130:
131: }
132:
133: if (!function_exists('mb_strripos')) {
134:
135: /**
136: * Finds position of last occurrence of a string within another, case insensitive
137: *
138: * @param string $haystack The string from which to get the position of the last occurrence of $needle.
139: * @param string $needle The string to find in $haystack.
140: * @param integer $offset The position in $haystack to start searching.
141: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
142: * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string,
143: * or false if $needle is not found.
144: */
145: function mb_strripos($haystack, $needle, $offset = 0, $encoding = null) {
146: return Multibyte::strripos($haystack, $needle, $offset);
147: }
148:
149: }
150:
151: if (!function_exists('mb_strrpos')) {
152:
153: /**
154: * Find position of last occurrence of a string in a string.
155: *
156: * @param string $haystack The string being checked, for the last occurrence of $needle.
157: * @param string $needle The string to find in $haystack.
158: * @param integer $offset May be specified to begin searching an arbitrary number of characters into the string.
159: * Negative values will stop searching at an arbitrary point prior to the end of the string.
160: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
161: * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string.
162: * If $needle is not found, it returns false.
163: */
164: function mb_strrpos($haystack, $needle, $offset = 0, $encoding = null) {
165: return Multibyte::strrpos($haystack, $needle, $offset);
166: }
167:
168: }
169:
170: if (!function_exists('mb_strstr')) {
171:
172: /**
173: * Finds first occurrence of a string within another
174: *
175: * @param string $haystack The string from which to get the first occurrence of $needle.
176: * @param string $needle The string to find in $haystack
177: * @param boolean $part Determines which portion of $haystack this function returns.
178: * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
179: * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
180: * Default value is FALSE.
181: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
182: * @return string|boolean The portion of $haystack, or true if $needle is not found.
183: */
184: function mb_strstr($haystack, $needle, $part = false, $encoding = null) {
185: return Multibyte::strstr($haystack, $needle, $part);
186: }
187:
188: }
189:
190: if (!function_exists('mb_strtolower')) {
191:
192: /**
193: * Make a string lowercase
194: *
195: * @param string $string The string being lowercased.
196: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
197: * @return string with all alphabetic characters converted to lowercase.
198: */
199: function mb_strtolower($string, $encoding = null) {
200: return Multibyte::strtolower($string);
201: }
202:
203: }
204:
205: if (!function_exists('mb_strtoupper')) {
206:
207: /**
208: * Make a string uppercase
209: *
210: * @param string $string The string being uppercased.
211: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
212: * @return string with all alphabetic characters converted to uppercase.
213: */
214: function mb_strtoupper($string, $encoding = null) {
215: return Multibyte::strtoupper($string);
216: }
217:
218: }
219:
220: if (!function_exists('mb_substr_count')) {
221:
222: /**
223: * Count the number of substring occurrences
224: *
225: * @param string $haystack The string being checked.
226: * @param string $needle The string being found.
227: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
228: * @return integer The number of times the $needle substring occurs in the $haystack string.
229: */
230: function mb_substr_count($haystack, $needle, $encoding = null) {
231: return Multibyte::substrCount($haystack, $needle);
232: }
233:
234: }
235:
236: if (!function_exists('mb_substr')) {
237:
238: /**
239: * Get part of string
240: *
241: * @param string $string The string being checked.
242: * @param integer $start The first position used in $string.
243: * @param integer $length The maximum length of the returned string.
244: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
245: * @return string The portion of $string specified by the $string and $length parameters.
246: */
247: function mb_substr($string, $start, $length = null, $encoding = null) {
248: return Multibyte::substr($string, $start, $length);
249: }
250:
251: }
252:
253: if (!function_exists('mb_encode_mimeheader')) {
254:
255: /**
256: * Encode string for MIME header
257: *
258: * @param string $str The string being encoded
259: * @param string $charset specifies the name of the character set in which str is represented in.
260: * The default value is determined by the current NLS setting (mbstring.language).
261: * @param string $transfer_encoding specifies the scheme of MIME encoding.
262: * It should be either "B" (Base64) or "Q" (Quoted-Printable). Falls back to "B" if not given.
263: * @param string $linefeed specifies the EOL (end-of-line) marker with which
264: * mb_encode_mimeheader() performs line-folding
265: * (a ยป RFC term, the act of breaking a line longer than a certain length into multiple lines.
266: * The length is currently hard-coded to 74 characters). Falls back to "\r\n" (CRLF) if not given.
267: * @param integer $indent [definition unknown and appears to have no affect]
268: * @return string A converted version of the string represented in ASCII.
269: */
270: function mb_encode_mimeheader($str, $charset = 'UTF-8', $transferEncoding = 'B', $linefeed = "\r\n", $indent = 1) {
271: return Multibyte::mimeEncode($str, $charset, $linefeed);
272: }
273:
274: }
275:
276: /**
277: * Multibyte handling methods.
278: *
279: * @package Cake.I18n
280: */
281: class Multibyte {
282:
283: /**
284: * Holds the case folding values
285: *
286: * @var array
287: */
288: protected static $_caseFold = array();
289:
290: /**
291: * Holds an array of Unicode code point ranges
292: *
293: * @var array
294: */
295: protected static $_codeRange = array();
296:
297: /**
298: * Holds the current code point range
299: *
300: * @var string
301: */
302: protected static $_table = null;
303:
304: /**
305: * Converts a multibyte character string
306: * to the decimal value of the character
307: *
308: * @param string $string
309: * @return array
310: */
311: public static function utf8($string) {
312: $map = array();
313:
314: $values = array();
315: $find = 1;
316: $length = strlen($string);
317:
318: for ($i = 0; $i < $length; $i++) {
319: $value = ord($string[$i]);
320:
321: if ($value < 128) {
322: $map[] = $value;
323: } else {
324: if (empty($values)) {
325: $find = ($value < 224) ? 2 : 3;
326: }
327: $values[] = $value;
328:
329: if (count($values) === $find) {
330: if ($find == 3) {
331: $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
332: } else {
333: $map[] = (($values[0] % 32) * 64) + ($values[1] % 64);
334: }
335: $values = array();
336: $find = 1;
337: }
338: }
339: }
340: return $map;
341: }
342:
343: /**
344: * Converts the decimal value of a multibyte character string
345: * to a string
346: *
347: * @param array $array
348: * @return string
349: */
350: public static function ascii($array) {
351: $ascii = '';
352:
353: foreach ($array as $utf8) {
354: if ($utf8 < 128) {
355: $ascii .= chr($utf8);
356: } elseif ($utf8 < 2048) {
357: $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64));
358: $ascii .= chr(128 + ($utf8 % 64));
359: } else {
360: $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096));
361: $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64));
362: $ascii .= chr(128 + ($utf8 % 64));
363: }
364: }
365: return $ascii;
366: }
367:
368: /**
369: * Find position of first occurrence of a case-insensitive string.
370: *
371: * @param string $haystack The string from which to get the position of the first occurrence of $needle.
372: * @param string $needle The string to find in $haystack.
373: * @param integer $offset The position in $haystack to start searching.
374: * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string,
375: * or false if $needle is not found.
376: */
377: public static function stripos($haystack, $needle, $offset = 0) {
378: if (Multibyte::checkMultibyte($haystack)) {
379: $haystack = Multibyte::strtoupper($haystack);
380: $needle = Multibyte::strtoupper($needle);
381: return Multibyte::strpos($haystack, $needle, $offset);
382: }
383: return stripos($haystack, $needle, $offset);
384: }
385:
386: /**
387: * Finds first occurrence of a string within another, case insensitive.
388: *
389: * @param string $haystack The string from which to get the first occurrence of $needle.
390: * @param string $needle The string to find in $haystack.
391: * @param boolean $part Determines which portion of $haystack this function returns.
392: * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
393: * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
394: * Default value is false.
395: * @return integer|boolean The portion of $haystack, or false if $needle is not found.
396: */
397: public static function stristr($haystack, $needle, $part = false) {
398: $php = (PHP_VERSION < 5.3);
399:
400: if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
401: $check = Multibyte::strtoupper($haystack);
402: $check = Multibyte::utf8($check);
403: $found = false;
404:
405: $haystack = Multibyte::utf8($haystack);
406: $haystackCount = count($haystack);
407:
408: $needle = Multibyte::strtoupper($needle);
409: $needle = Multibyte::utf8($needle);
410: $needleCount = count($needle);
411:
412: $parts = array();
413: $position = 0;
414:
415: while (($found === false) && ($position < $haystackCount)) {
416: if (isset($needle[0]) && $needle[0] === $check[$position]) {
417: for ($i = 1; $i < $needleCount; $i++) {
418: if ($needle[$i] !== $check[$position + $i]) {
419: break;
420: }
421: }
422: if ($i === $needleCount) {
423: $found = true;
424: }
425: }
426: if (!$found) {
427: $parts[] = $haystack[$position];
428: unset($haystack[$position]);
429: }
430: $position++;
431: }
432:
433: if ($found && $part && !empty($parts)) {
434: return Multibyte::ascii($parts);
435: } elseif ($found && !empty($haystack)) {
436: return Multibyte::ascii($haystack);
437: }
438: return false;
439: }
440:
441: if (!$php) {
442: return stristr($haystack, $needle, $part);
443: }
444: return stristr($haystack, $needle);
445: }
446:
447: /**
448: * Get string length.
449: *
450: * @param string $string The string being checked for length.
451: * @return integer The number of characters in string $string
452: */
453: public static function strlen($string) {
454: if (Multibyte::checkMultibyte($string)) {
455: $string = Multibyte::utf8($string);
456: return count($string);
457: }
458: return strlen($string);
459: }
460:
461: /**
462: * Find position of first occurrence of a string.
463: *
464: * @param string $haystack The string being checked.
465: * @param string $needle The position counted from the beginning of haystack.
466: * @param integer $offset The search offset. If it is not specified, 0 is used.
467: * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string.
468: * If $needle is not found, it returns false.
469: */
470: public static function strpos($haystack, $needle, $offset = 0) {
471: if (Multibyte::checkMultibyte($haystack)) {
472: $found = false;
473:
474: $haystack = Multibyte::utf8($haystack);
475: $haystackCount = count($haystack);
476:
477: $needle = Multibyte::utf8($needle);
478: $needleCount = count($needle);
479:
480: $position = $offset;
481:
482: while (($found === false) && ($position < $haystackCount)) {
483: if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
484: for ($i = 1; $i < $needleCount; $i++) {
485: if ($needle[$i] !== $haystack[$position + $i]) {
486: break;
487: }
488: }
489: if ($i === $needleCount) {
490: $found = true;
491: $position--;
492: }
493: }
494: $position++;
495: }
496: if ($found) {
497: return $position;
498: }
499: return false;
500: }
501: return strpos($haystack, $needle, $offset);
502: }
503:
504: /**
505: * Finds the last occurrence of a character in a string within another.
506: *
507: * @param string $haystack The string from which to get the last occurrence of $needle.
508: * @param string $needle The string to find in $haystack.
509: * @param boolean $part Determines which portion of $haystack this function returns.
510: * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
511: * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
512: * Default value is false.
513: * @return string|boolean The portion of $haystack. or false if $needle is not found.
514: */
515: public static function strrchr($haystack, $needle, $part = false) {
516: $check = Multibyte::utf8($haystack);
517: $found = false;
518:
519: $haystack = Multibyte::utf8($haystack);
520: $haystackCount = count($haystack);
521:
522: $matches = array_count_values($check);
523:
524: $needle = Multibyte::utf8($needle);
525: $needleCount = count($needle);
526:
527: $parts = array();
528: $position = 0;
529:
530: while (($found === false) && ($position < $haystackCount)) {
531: if (isset($needle[0]) && $needle[0] === $check[$position]) {
532: for ($i = 1; $i < $needleCount; $i++) {
533: if ($needle[$i] !== $check[$position + $i]) {
534: if ($needle[$i] === $check[($position + $i) - 1]) {
535: $found = true;
536: }
537: unset($parts[$position - 1]);
538: $haystack = array_merge(array($haystack[$position]), $haystack);
539: break;
540: }
541: }
542: if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
543: $matches[$needle[0]] = $matches[$needle[0]] - 1;
544: } elseif ($i === $needleCount) {
545: $found = true;
546: }
547: }
548:
549: if (!$found && isset($haystack[$position])) {
550: $parts[] = $haystack[$position];
551: unset($haystack[$position]);
552: }
553: $position++;
554: }
555:
556: if ($found && $part && !empty($parts)) {
557: return Multibyte::ascii($parts);
558: } elseif ($found && !empty($haystack)) {
559: return Multibyte::ascii($haystack);
560: }
561: return false;
562: }
563:
564: /**
565: * Finds the last occurrence of a character in a string within another, case insensitive.
566: *
567: * @param string $haystack The string from which to get the last occurrence of $needle.
568: * @param string $needle The string to find in $haystack.
569: * @param boolean $part Determines which portion of $haystack this function returns.
570: * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
571: * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
572: * Default value is false.
573: * @return string|boolean The portion of $haystack. or false if $needle is not found.
574: */
575: public static function strrichr($haystack, $needle, $part = false) {
576: $check = Multibyte::strtoupper($haystack);
577: $check = Multibyte::utf8($check);
578: $found = false;
579:
580: $haystack = Multibyte::utf8($haystack);
581: $haystackCount = count($haystack);
582:
583: $matches = array_count_values($check);
584:
585: $needle = Multibyte::strtoupper($needle);
586: $needle = Multibyte::utf8($needle);
587: $needleCount = count($needle);
588:
589: $parts = array();
590: $position = 0;
591:
592: while (($found === false) && ($position < $haystackCount)) {
593: if (isset($needle[0]) && $needle[0] === $check[$position]) {
594: for ($i = 1; $i < $needleCount; $i++) {
595: if ($needle[$i] !== $check[$position + $i]) {
596: if ($needle[$i] === $check[($position + $i) - 1]) {
597: $found = true;
598: }
599: unset($parts[$position - 1]);
600: $haystack = array_merge(array($haystack[$position]), $haystack);
601: break;
602: }
603: }
604: if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
605: $matches[$needle[0]] = $matches[$needle[0]] - 1;
606: } elseif ($i === $needleCount) {
607: $found = true;
608: }
609: }
610:
611: if (!$found && isset($haystack[$position])) {
612: $parts[] = $haystack[$position];
613: unset($haystack[$position]);
614: }
615: $position++;
616: }
617:
618: if ($found && $part && !empty($parts)) {
619: return Multibyte::ascii($parts);
620: } elseif ($found && !empty($haystack)) {
621: return Multibyte::ascii($haystack);
622: }
623: return false;
624: }
625:
626: /**
627: * Finds position of last occurrence of a string within another, case insensitive
628: *
629: * @param string $haystack The string from which to get the position of the last occurrence of $needle.
630: * @param string $needle The string to find in $haystack.
631: * @param integer $offset The position in $haystack to start searching.
632: * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string,
633: * or false if $needle is not found.
634: */
635: public static function strripos($haystack, $needle, $offset = 0) {
636: if (Multibyte::checkMultibyte($haystack)) {
637: $found = false;
638: $haystack = Multibyte::strtoupper($haystack);
639: $haystack = Multibyte::utf8($haystack);
640: $haystackCount = count($haystack);
641:
642: $matches = array_count_values($haystack);
643:
644: $needle = Multibyte::strtoupper($needle);
645: $needle = Multibyte::utf8($needle);
646: $needleCount = count($needle);
647:
648: $position = $offset;
649:
650: while (($found === false) && ($position < $haystackCount)) {
651: if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
652: for ($i = 1; $i < $needleCount; $i++) {
653: if ($needle[$i] !== $haystack[$position + $i]) {
654: if ($needle[$i] === $haystack[($position + $i) - 1]) {
655: $position--;
656: $found = true;
657: continue;
658: }
659: }
660: }
661:
662: if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
663: $matches[$needle[0]] = $matches[$needle[0]] - 1;
664: } elseif ($i === $needleCount) {
665: $found = true;
666: $position--;
667: }
668: }
669: $position++;
670: }
671: return ($found) ? $position : false;
672: }
673: return strripos($haystack, $needle, $offset);
674: }
675:
676: /**
677: * Find position of last occurrence of a string in a string.
678: *
679: * @param string $haystack The string being checked, for the last occurrence of $needle.
680: * @param string $needle The string to find in $haystack.
681: * @param integer $offset May be specified to begin searching an arbitrary number of characters into the string.
682: * Negative values will stop searching at an arbitrary point prior to the end of the string.
683: * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string.
684: * If $needle is not found, it returns false.
685: */
686: public static function strrpos($haystack, $needle, $offset = 0) {
687: if (Multibyte::checkMultibyte($haystack)) {
688: $found = false;
689:
690: $haystack = Multibyte::utf8($haystack);
691: $haystackCount = count($haystack);
692:
693: $matches = array_count_values($haystack);
694:
695: $needle = Multibyte::utf8($needle);
696: $needleCount = count($needle);
697:
698: $position = $offset;
699:
700: while (($found === false) && ($position < $haystackCount)) {
701: if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
702: for ($i = 1; $i < $needleCount; $i++) {
703: if ($needle[$i] !== $haystack[$position + $i]) {
704: if ($needle[$i] === $haystack[($position + $i) - 1]) {
705: $position--;
706: $found = true;
707: continue;
708: }
709: }
710: }
711:
712: if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
713: $matches[$needle[0]] = $matches[$needle[0]] - 1;
714: } elseif ($i === $needleCount) {
715: $found = true;
716: $position--;
717: }
718: }
719: $position++;
720: }
721: return ($found) ? $position : false;
722: }
723: return strrpos($haystack, $needle, $offset);
724: }
725:
726: /**
727: * Finds first occurrence of a string within another
728: *
729: * @param string $haystack The string from which to get the first occurrence of $needle.
730: * @param string $needle The string to find in $haystack
731: * @param boolean $part Determines which portion of $haystack this function returns.
732: * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
733: * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
734: * Default value is FALSE.
735: * @return string|boolean The portion of $haystack, or true if $needle is not found.
736: */
737: public static function strstr($haystack, $needle, $part = false) {
738: $php = (PHP_VERSION < 5.3);
739:
740: if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
741: $check = Multibyte::utf8($haystack);
742: $found = false;
743:
744: $haystack = Multibyte::utf8($haystack);
745: $haystackCount = count($haystack);
746:
747: $needle = Multibyte::utf8($needle);
748: $needleCount = count($needle);
749:
750: $parts = array();
751: $position = 0;
752:
753: while (($found === false) && ($position < $haystackCount)) {
754: if (isset($needle[0]) && $needle[0] === $check[$position]) {
755: for ($i = 1; $i < $needleCount; $i++) {
756: if ($needle[$i] !== $check[$position + $i]) {
757: break;
758: }
759: }
760: if ($i === $needleCount) {
761: $found = true;
762: }
763: }
764: if (!$found) {
765: $parts[] = $haystack[$position];
766: unset($haystack[$position]);
767: }
768: $position++;
769: }
770:
771: if ($found && $part && !empty($parts)) {
772: return Multibyte::ascii($parts);
773: } elseif ($found && !empty($haystack)) {
774: return Multibyte::ascii($haystack);
775: }
776: return false;
777: }
778:
779: if (!$php) {
780: return strstr($haystack, $needle, $part);
781: }
782: return strstr($haystack, $needle);
783: }
784:
785: /**
786: * Make a string lowercase
787: *
788: * @param string $string The string being lowercased.
789: * @return string with all alphabetic characters converted to lowercase.
790: */
791: public static function strtolower($string) {
792: $utf8Map = Multibyte::utf8($string);
793:
794: $length = count($utf8Map);
795: $lowerCase = array();
796:
797: for ($i = 0; $i < $length; $i++) {
798: $char = $utf8Map[$i];
799:
800: if ($char < 128) {
801: $str = strtolower(chr($char));
802: $strlen = strlen($str);
803: for ($ii = 0; $ii < $strlen; $ii++) {
804: $lower = ord(substr($str, $ii, 1));
805: }
806: $lowerCase[] = $lower;
807: $matched = true;
808: } else {
809: $matched = false;
810: $keys = self::_find($char, 'upper');
811:
812: if (!empty($keys)) {
813: foreach ($keys as $key => $value) {
814: if ($keys[$key]['upper'] == $char && count($keys[$key]['lower'][0]) === 1) {
815: $lowerCase[] = $keys[$key]['lower'][0];
816: $matched = true;
817: break 1;
818: }
819: }
820: }
821: }
822: if ($matched === false) {
823: $lowerCase[] = $char;
824: }
825: }
826: return Multibyte::ascii($lowerCase);
827: }
828:
829: /**
830: * Make a string uppercase
831: *
832: * @param string $string The string being uppercased.
833: * @return string with all alphabetic characters converted to uppercase.
834: */
835: public static function strtoupper($string) {
836: $utf8Map = Multibyte::utf8($string);
837:
838: $length = count($utf8Map);
839: $replaced = array();
840: $upperCase = array();
841:
842: for ($i = 0; $i < $length; $i++) {
843: $char = $utf8Map[$i];
844:
845: if ($char < 128) {
846: $str = strtoupper(chr($char));
847: $strlen = strlen($str);
848: for ($ii = 0; $ii < $strlen; $ii++) {
849: $upper = ord(substr($str, $ii, 1));
850: }
851: $upperCase[] = $upper;
852: $matched = true;
853:
854: } else {
855: $matched = false;
856: $keys = self::_find($char);
857: $keyCount = count($keys);
858:
859: if (!empty($keys)) {
860: foreach ($keys as $key => $value) {
861: $matched = false;
862: $replace = 0;
863: if ($length > 1 && count($keys[$key]['lower']) > 1) {
864: $j = 0;
865:
866: for ($ii = 0, $count = count($keys[$key]['lower']); $ii < $count; $ii++) {
867: $nextChar = $utf8Map[$i + $ii];
868:
869: if (isset($nextChar) && ($nextChar == $keys[$key]['lower'][$j + $ii])) {
870: $replace++;
871: }
872: }
873: if ($replace == $count) {
874: $upperCase[] = $keys[$key]['upper'];
875: $replaced = array_merge($replaced, array_values($keys[$key]['lower']));
876: $matched = true;
877: break 1;
878: }
879: } elseif ($length > 1 && $keyCount > 1) {
880: $j = 0;
881: for ($ii = 1; $ii < $keyCount; $ii++) {
882: $nextChar = $utf8Map[$i + $ii - 1];
883:
884: if (in_array($nextChar, $keys[$ii]['lower'])) {
885:
886: for ($jj = 0, $count = count($keys[$ii]['lower']); $jj < $count; $jj++) {
887: $nextChar = $utf8Map[$i + $jj];
888:
889: if (isset($nextChar) && ($nextChar == $keys[$ii]['lower'][$j + $jj])) {
890: $replace++;
891: }
892: }
893: if ($replace == $count) {
894: $upperCase[] = $keys[$ii]['upper'];
895: $replaced = array_merge($replaced, array_values($keys[$ii]['lower']));
896: $matched = true;
897: break 2;
898: }
899: }
900: }
901: }
902: if ($keys[$key]['lower'][0] == $char) {
903: $upperCase[] = $keys[$key]['upper'];
904: $matched = true;
905: break 1;
906: }
907: }
908: }
909: }
910: if ($matched === false && !in_array($char, $replaced, true)) {
911: $upperCase[] = $char;
912: }
913: }
914: return Multibyte::ascii($upperCase);
915: }
916:
917: /**
918: * Count the number of substring occurrences
919: *
920: * @param string $haystack The string being checked.
921: * @param string $needle The string being found.
922: * @return integer The number of times the $needle substring occurs in the $haystack string.
923: */
924: public static function substrCount($haystack, $needle) {
925: $count = 0;
926: $haystack = Multibyte::utf8($haystack);
927: $haystackCount = count($haystack);
928: $matches = array_count_values($haystack);
929: $needle = Multibyte::utf8($needle);
930: $needleCount = count($needle);
931:
932: if ($needleCount === 1 && isset($matches[$needle[0]])) {
933: return $matches[$needle[0]];
934: }
935:
936: for ($i = 0; $i < $haystackCount; $i++) {
937: if (isset($needle[0]) && $needle[0] === $haystack[$i]) {
938: for ($ii = 1; $ii < $needleCount; $ii++) {
939: if ($needle[$ii] === $haystack[$i + 1]) {
940: if ((isset($needle[$ii + 1]) && $haystack[$i + 2]) && $needle[$ii + 1] !== $haystack[$i + 2]) {
941: $count--;
942: } else {
943: $count++;
944: }
945: }
946: }
947: }
948: }
949: return $count;
950: }
951:
952: /**
953: * Get part of string
954: *
955: * @param string $string The string being checked.
956: * @param integer $start The first position used in $string.
957: * @param integer $length The maximum length of the returned string.
958: * @return string The portion of $string specified by the $string and $length parameters.
959: */
960: public static function substr($string, $start, $length = null) {
961: if ($start === 0 && $length === null) {
962: return $string;
963: }
964:
965: $string = Multibyte::utf8($string);
966:
967: for ($i = 1; $i <= $start; $i++) {
968: unset($string[$i - 1]);
969: }
970:
971: if ($length === null || count($string) < $length) {
972: return Multibyte::ascii($string);
973: }
974: $string = array_values($string);
975:
976: $value = array();
977: for ($i = 0; $i < $length; $i++) {
978: $value[] = $string[$i];
979: }
980: return Multibyte::ascii($value);
981: }
982:
983: /**
984: * Prepare a string for mail transport, using the provided encoding
985: *
986: * @param string $string value to encode
987: * @param string $charset charset to use for encoding. defaults to UTF-8
988: * @param string $newline
989: * @return string
990: */
991: public static function mimeEncode($string, $charset = null, $newline = "\r\n") {
992: if (!Multibyte::checkMultibyte($string) && strlen($string) < 75) {
993: return $string;
994: }
995:
996: if (empty($charset)) {
997: $charset = Configure::read('App.encoding');
998: }
999: $charset = strtoupper($charset);
1000:
1001: $start = '=?' . $charset . '?B?';
1002: $end = '?=';
1003: $spacer = $end . $newline . ' ' . $start;
1004:
1005: $length = 75 - strlen($start) - strlen($end);
1006: $length = $length - ($length % 4);
1007: if ($charset == 'UTF-8') {
1008: $parts = array();
1009: $maxchars = floor(($length * 3) / 4);
1010: $stringLength = strlen($string);
1011: while ($stringLength > $maxchars) {
1012: $i = (int)$maxchars;
1013: $test = ord($string[$i]);
1014: while ($test >= 128 && $test <= 191) {
1015: $i--;
1016: $test = ord($string[$i]);
1017: }
1018: $parts[] = base64_encode(substr($string, 0, $i));
1019: $string = substr($string, $i);
1020: $stringLength = strlen($string);
1021: }
1022: $parts[] = base64_encode($string);
1023: $string = implode($spacer, $parts);
1024: } else {
1025: $string = chunk_split(base64_encode($string), $length, $spacer);
1026: $string = preg_replace('/' . preg_quote($spacer) . '$/', '', $string);
1027: }
1028: return $start . $string . $end;
1029: }
1030:
1031: /**
1032: * Return the Code points range for Unicode characters
1033: *
1034: * @param integer $decimal
1035: * @return string
1036: */
1037: protected static function _codepoint($decimal) {
1038: if ($decimal > 128 && $decimal < 256) {
1039: $return = '0080_00ff'; // Latin-1 Supplement
1040: } elseif ($decimal < 384) {
1041: $return = '0100_017f'; // Latin Extended-A
1042: } elseif ($decimal < 592) {
1043: $return = '0180_024F'; // Latin Extended-B
1044: } elseif ($decimal < 688) {
1045: $return = '0250_02af'; // IPA Extensions
1046: } elseif ($decimal >= 880 && $decimal < 1024) {
1047: $return = '0370_03ff'; // Greek and Coptic
1048: } elseif ($decimal < 1280) {
1049: $return = '0400_04ff'; // Cyrillic
1050: } elseif ($decimal < 1328) {
1051: $return = '0500_052f'; // Cyrillic Supplement
1052: } elseif ($decimal < 1424) {
1053: $return = '0530_058f'; // Armenian
1054: } elseif ($decimal >= 7680 && $decimal < 7936) {
1055: $return = '1e00_1eff'; // Latin Extended Additional
1056: } elseif ($decimal < 8192) {
1057: $return = '1f00_1fff'; // Greek Extended
1058: } elseif ($decimal >= 8448 && $decimal < 8528) {
1059: $return = '2100_214f'; // Letterlike Symbols
1060: } elseif ($decimal < 8592) {
1061: $return = '2150_218f'; // Number Forms
1062: } elseif ($decimal >= 9312 && $decimal < 9472) {
1063: $return = '2460_24ff'; // Enclosed Alphanumerics
1064: } elseif ($decimal >= 11264 && $decimal < 11360) {
1065: $return = '2c00_2c5f'; // Glagolitic
1066: } elseif ($decimal < 11392) {
1067: $return = '2c60_2c7f'; // Latin Extended-C
1068: } elseif ($decimal < 11520) {
1069: $return = '2c80_2cff'; // Coptic
1070: } elseif ($decimal >= 65280 && $decimal < 65520) {
1071: $return = 'ff00_ffef'; // Halfwidth and Fullwidth Forms
1072: } else {
1073: $return = false;
1074: }
1075: self::$_codeRange[$decimal] = $return;
1076: return $return;
1077: }
1078:
1079: /**
1080: * Find the related code folding values for $char
1081: *
1082: * @param integer $char decimal value of character
1083: * @param string $type
1084: * @return array
1085: */
1086: protected static function _find($char, $type = 'lower') {
1087: $found = array();
1088: if (!isset(self::$_codeRange[$char])) {
1089: $range = self::_codepoint($char);
1090: if ($range === false) {
1091: return null;
1092: }
1093: if (!Configure::configured('_cake_core_')) {
1094: App::uses('PhpReader', 'Configure');
1095: Configure::config('_cake_core_', new PhpReader(CAKE . 'Config' . DS));
1096: }
1097: Configure::load('unicode' . DS . 'casefolding' . DS . $range, '_cake_core_');
1098: self::$_caseFold[$range] = Configure::read($range);
1099: Configure::delete($range);
1100: }
1101:
1102: if (!self::$_codeRange[$char]) {
1103: return null;
1104: }
1105: self::$_table = self::$_codeRange[$char];
1106: $count = count(self::$_caseFold[self::$_table]);
1107:
1108: for ($i = 0; $i < $count; $i++) {
1109: if ($type === 'lower' && self::$_caseFold[self::$_table][$i][$type][0] === $char) {
1110: $found[] = self::$_caseFold[self::$_table][$i];
1111: } elseif ($type === 'upper' && self::$_caseFold[self::$_table][$i][$type] === $char) {
1112: $found[] = self::$_caseFold[self::$_table][$i];
1113: }
1114: }
1115: return $found;
1116: }
1117:
1118: /**
1119: * Check the $string for multibyte characters
1120: * @param string $string value to test
1121: * @return boolean
1122: */
1123: public static function checkMultibyte($string) {
1124: $length = strlen($string);
1125:
1126: for ($i = 0; $i < $length; $i++ ) {
1127: $value = ord(($string[$i]));
1128: if ($value > 128) {
1129: return true;
1130: }
1131: }
1132: return false;
1133: }
1134:
1135: }
1136: