1: <?php
2: /**
3: * Multibyte handling methods.
4: *
5: *
6: * PHP versions 4 and 5
7: *
8: * CakePHP(tm) : Rapid Development Framework (http://cakephp.org)
9: * Copyright 2005-2012, Cake Software Foundation, Inc. (http://cakefoundation.org)
10: *
11: * Licensed under The MIT License
12: * Redistributions of files must retain the above copyright notice.
13: *
14: * @copyright Copyright 2005-2012, Cake Software Foundation, Inc. (http://cakefoundation.org)
15: * @link http://cakephp.org CakePHP(tm) Project
16: * @package cake
17: * @subpackage cake.cake.libs
18: * @since CakePHP(tm) v 1.2.0.6833
19: * @license MIT License (http://www.opensource.org/licenses/mit-license.php)
20: */
21: if (function_exists('mb_internal_encoding')) {
22: $encoding = Configure::read('App.encoding');
23: if (!empty($encoding)) {
24: mb_internal_encoding($encoding);
25: }
26: }
27:
28: /**
29: * Find position of first occurrence of a case-insensitive string.
30: *
31: * @param string $haystack The string from which to get the position of the first occurrence of $needle.
32: * @param string $needle The string to find in $haystack.
33: * @param integer $offset The position in $haystack to start searching.
34: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
35: * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string, or false
36: * if $needle is not found.
37: */
38: if (!function_exists('mb_stripos')) {
39: function mb_stripos($haystack, $needle, $offset = 0, $encoding = null) {
40: return Multibyte::stripos($haystack, $needle, $offset);
41: }
42: }
43:
44: /**
45: * Finds first occurrence of a string within another, case insensitive.
46: *
47: * @param string $haystack The string from which to get the first occurrence of $needle.
48: * @param string $needle The string to find in $haystack.
49: * @param boolean $part Determines which portion of $haystack this function returns.
50: * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
51: * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
52: * Default value is false.
53: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
54: * @return string|boolean The portion of $haystack, or false if $needle is not found.
55: */
56: if (!function_exists('mb_stristr')) {
57: function mb_stristr($haystack, $needle, $part = false, $encoding = null) {
58: return Multibyte::stristr($haystack, $needle, $part);
59: }
60: }
61:
62: /**
63: * Get string length.
64: *
65: * @param string $string The string being checked for length.
66: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
67: * @return integer The number of characters in string $string having character encoding encoding.
68: * A multi-byte character is counted as 1.
69: */
70: if (!function_exists('mb_strlen')) {
71: function mb_strlen($string, $encoding = null) {
72: return Multibyte::strlen($string);
73: }
74: }
75:
76: /**
77: * Find position of first occurrence of a string.
78: *
79: * @param string $haystack The string being checked.
80: * @param string $needle The position counted from the beginning of haystack.
81: * @param integer $offset The search offset. If it is not specified, 0 is used.
82: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
83: * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string.
84: * If $needle is not found, it returns false.
85: */
86: if (!function_exists('mb_strpos')) {
87: function mb_strpos($haystack, $needle, $offset = 0, $encoding = null) {
88: return Multibyte::strpos($haystack, $needle, $offset);
89: }
90: }
91:
92: /**
93: * Finds the last occurrence of a character in a string within another.
94: *
95: * @param string $haystack The string from which to get the last occurrence of $needle.
96: * @param string $needle The string to find in $haystack.
97: * @param boolean $part Determines which portion of $haystack this function returns.
98: * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
99: * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
100: * Default value is false.
101: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
102: * @return string|boolean The portion of $haystack. or false if $needle is not found.
103: */
104: if (!function_exists('mb_strrchr')) {
105: function mb_strrchr($haystack, $needle, $part = false, $encoding = null) {
106: return Multibyte::strrchr($haystack, $needle, $part);
107: }
108: }
109:
110: /**
111: * Finds the last occurrence of a character in a string within another, case insensitive.
112: *
113: * @param string $haystack The string from which to get the last occurrence of $needle.
114: * @param string $needle The string to find in $haystack.
115: * @param boolean $part Determines which portion of $haystack this function returns.
116: * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
117: * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
118: * Default value is false.
119: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
120: * @return string|boolean The portion of $haystack. or false if $needle is not found.
121: */
122: if (!function_exists('mb_strrichr')) {
123: function mb_strrichr($haystack, $needle, $part = false, $encoding = null) {
124: return Multibyte::strrichr($haystack, $needle, $part);
125: }
126: }
127:
128: /**
129: * Finds position of last occurrence of a string within another, case insensitive
130: *
131: * @param string $haystack The string from which to get the position of the last occurrence of $needle.
132: * @param string $needle The string to find in $haystack.
133: * @param integer $offset The position in $haystack to start searching.
134: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
135: * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string,
136: * or false if $needle is not found.
137: */
138: if (!function_exists('mb_strripos')) {
139: function mb_strripos($haystack, $needle, $offset = 0, $encoding = null) {
140: return Multibyte::strripos($haystack, $needle, $offset);
141: }
142: }
143:
144: /**
145: * Find position of last occurrence of a string in a string.
146: *
147: * @param string $haystack The string being checked, for the last occurrence of $needle.
148: * @param string $needle The string to find in $haystack.
149: * @param integer $offset May be specified to begin searching an arbitrary number of characters into the string.
150: * Negative values will stop searching at an arbitrary point prior to the end of the string.
151: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
152: * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string.
153: * If $needle is not found, it returns false.
154: */
155: if (!function_exists('mb_strrpos')) {
156: function mb_strrpos($haystack, $needle, $offset = 0, $encoding = null) {
157: return Multibyte::strrpos($haystack, $needle, $offset);
158: }
159: }
160:
161: /**
162: * Finds first occurrence of a string within another
163: *
164: * @param string $haystack The string from which to get the first occurrence of $needle.
165: * @param string $needle The string to find in $haystack
166: * @param boolean $part Determines which portion of $haystack this function returns.
167: * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
168: * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
169: * Default value is FALSE.
170: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
171: * @return string|boolean The portion of $haystack, or true if $needle is not found.
172: */
173: if (!function_exists('mb_strstr')) {
174: function mb_strstr($haystack, $needle, $part = false, $encoding = null) {
175: return Multibyte::strstr($haystack, $needle, $part);
176: }
177: }
178:
179: /**
180: * Make a string lowercase
181: *
182: * @param string $string The string being lowercased.
183: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
184: * @return string with all alphabetic characters converted to lowercase.
185: */
186: if (!function_exists('mb_strtolower')) {
187: function mb_strtolower($string, $encoding = null) {
188: return Multibyte::strtolower($string);
189: }
190: }
191:
192: /**
193: * Make a string uppercase
194: *
195: * @param string $string The string being uppercased.
196: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
197: * @return string with all alphabetic characters converted to uppercase.
198: */
199: if (!function_exists('mb_strtoupper')) {
200: function mb_strtoupper($string, $encoding = null) {
201: return Multibyte::strtoupper($string);
202: }
203: }
204:
205: /**
206: * Count the number of substring occurrences
207: *
208: * @param string $haystack The string being checked.
209: * @param string $needle The string being found.
210: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
211: * @return integer The number of times the $needle substring occurs in the $haystack string.
212: */
213: if (!function_exists('mb_substr_count')) {
214: function mb_substr_count($haystack, $needle, $encoding = null) {
215: return Multibyte::substrCount($haystack, $needle);
216: }
217: }
218:
219: /**
220: * Get part of string
221: *
222: * @param string $string The string being checked.
223: * @param integer $start The first position used in $string.
224: * @param integer $length The maximum length of the returned string.
225: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
226: * @return string The portion of $string specified by the $string and $length parameters.
227: */
228: if (!function_exists('mb_substr')) {
229: function mb_substr($string, $start, $length = null, $encoding = null) {
230: return Multibyte::substr($string, $start, $length);
231: }
232: }
233:
234: /**
235: * Encode string for MIME header
236: *
237: * @param string $str The string being encoded
238: * @param string $charset specifies the name of the character set in which str is represented in.
239: * The default value is determined by the current NLS setting (mbstring.language).
240: * @param string $transfer_encoding specifies the scheme of MIME encoding.
241: * It should be either "B" (Base64) or "Q" (Quoted-Printable). Falls back to "B" if not given.
242: * @param string $linefeed specifies the EOL (end-of-line) marker with which
243: * mb_encode_mimeheader() performs line-folding
244: * (a ยป RFC term, the act of breaking a line longer than a certain length into multiple lines.
245: * The length is currently hard-coded to 74 characters). Falls back to "\r\n" (CRLF) if not given.
246: * @param integer $indent [definition unknown and appears to have no affect]
247: * @return string A converted version of the string represented in ASCII.
248: */
249: if (!function_exists('mb_encode_mimeheader')) {
250: function mb_encode_mimeheader($str, $charset = 'UTF-8', $transfer_encoding = 'B', $linefeed = "\r\n", $indent = 1) {
251: return Multibyte::mimeEncode($str, $charset, $linefeed);
252: }
253: }
254:
255: /**
256: * Multibyte handling methods.
257: *
258: *
259: * @package cake
260: * @subpackage cake.cake.libs
261: */
262: class Multibyte extends Object {
263:
264: /**
265: * Holds the case folding values
266: *
267: * @var array
268: * @access private
269: */
270: var $__caseFold = array();
271:
272: /**
273: * Holds an array of Unicode code point ranges
274: *
275: * @var array
276: * @access private
277: */
278: var $__codeRange = array();
279:
280: /**
281: * Holds the current code point range
282: *
283: * @var string
284: * @access private
285: */
286: var $__table = null;
287:
288: /**
289: * Gets a reference to the Multibyte object instance
290: *
291: * @return object Multibyte instance
292: * @access public
293: * @static
294: */
295: function &getInstance() {
296: static $instance = array();
297:
298: if (!$instance) {
299: $instance[0] =& new Multibyte();
300: }
301: return $instance[0];
302: }
303:
304: /**
305: * Converts a multibyte character string
306: * to the decimal value of the character
307: *
308: * @param multibyte string $string
309: * @return array
310: * @access public
311: * @static
312: */
313: function utf8($string) {
314: $map = array();
315:
316: $values = array();
317: $find = 1;
318: $length = strlen($string);
319:
320: for ($i = 0; $i < $length; $i++) {
321: $value = ord($string[$i]);
322:
323: if ($value < 128) {
324: $map[] = $value;
325: } else {
326: if (empty($values)) {
327: $find = ($value < 224) ? 2 : 3;
328: }
329: $values[] = $value;
330:
331: if (count($values) === $find) {
332: if ($find == 3) {
333: $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
334: } else {
335: $map[] = (($values[0] % 32) * 64) + ($values[1] % 64);
336: }
337: $values = array();
338: $find = 1;
339: }
340: }
341: }
342: return $map;
343: }
344:
345: /**
346: * Converts the decimal value of a multibyte character string
347: * to a string
348: *
349: * @param array $array
350: * @return string
351: * @access public
352: * @static
353: */
354: function ascii($array) {
355: $ascii = '';
356:
357: foreach ($array as $utf8) {
358: if ($utf8 < 128) {
359: $ascii .= chr($utf8);
360: } elseif ($utf8 < 2048) {
361: $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64));
362: $ascii .= chr(128 + ($utf8 % 64));
363: } else {
364: $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096));
365: $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64));
366: $ascii .= chr(128 + ($utf8 % 64));
367: }
368: }
369: return $ascii;
370: }
371:
372: /**
373: * Find position of first occurrence of a case-insensitive string.
374: *
375: * @param multi-byte string $haystack The string from which to get the position of the first occurrence of $needle.
376: * @param multi-byte string $needle The string to find in $haystack.
377: * @param integer $offset The position in $haystack to start searching.
378: * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string,
379: * or false if $needle is not found.
380: * @access public
381: * @static
382: */
383: function stripos($haystack, $needle, $offset = 0) {
384: if (!PHP5 || Multibyte::checkMultibyte($haystack)) {
385: $haystack = Multibyte::strtoupper($haystack);
386: $needle = Multibyte::strtoupper($needle);
387: return Multibyte::strpos($haystack, $needle, $offset);
388: }
389: return stripos($haystack, $needle, $offset);
390: }
391:
392: /**
393: * Finds first occurrence of a string within another, case insensitive.
394: *
395: * @param string $haystack The string from which to get the first occurrence of $needle.
396: * @param string $needle The string to find in $haystack.
397: * @param boolean $part Determines which portion of $haystack this function returns.
398: * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
399: * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
400: * Default value is false.
401: * @return int|boolean The portion of $haystack, or false if $needle is not found.
402: * @access public
403: * @static
404: */
405: function stristr($haystack, $needle, $part = false) {
406: $php = (PHP_VERSION < 5.3);
407:
408: if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
409: $check = Multibyte::strtoupper($haystack);
410: $check = Multibyte::utf8($check);
411: $found = false;
412:
413: $haystack = Multibyte::utf8($haystack);
414: $haystackCount = count($haystack);
415:
416: $needle = Multibyte::strtoupper($needle);
417: $needle = Multibyte::utf8($needle);
418: $needleCount = count($needle);
419:
420: $parts = array();
421: $position = 0;
422:
423: while (($found === false) && ($position < $haystackCount)) {
424: if (isset($needle[0]) && $needle[0] === $check[$position]) {
425: for ($i = 1; $i < $needleCount; $i++) {
426: if ($needle[$i] !== $check[$position + $i]) {
427: break;
428: }
429: }
430: if ($i === $needleCount) {
431: $found = true;
432: }
433: }
434: if (!$found) {
435: $parts[] = $haystack[$position];
436: unset($haystack[$position]);
437: }
438: $position++;
439: }
440:
441: if ($found && $part && !empty($parts)) {
442: return Multibyte::ascii($parts);
443: } elseif ($found && !empty($haystack)) {
444: return Multibyte::ascii($haystack);
445: }
446: return false;
447: }
448:
449: if (!$php) {
450: return stristr($haystack, $needle, $part);
451: }
452: return stristr($haystack, $needle);
453: }
454:
455: /**
456: * Get string length.
457: *
458: * @param string $string The string being checked for length.
459: * @return integer The number of characters in string $string
460: * @access public
461: * @static
462: */
463: function strlen($string) {
464: if (Multibyte::checkMultibyte($string)) {
465: $string = Multibyte::utf8($string);
466: return count($string);
467: }
468: return strlen($string);
469: }
470:
471: /**
472: * Find position of first occurrence of a string.
473: *
474: * @param string $haystack The string being checked.
475: * @param string $needle The position counted from the beginning of haystack.
476: * @param integer $offset The search offset. If it is not specified, 0 is used.
477: * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string.
478: * If $needle is not found, it returns false.
479: * @access public
480: * @static
481: */
482: function strpos($haystack, $needle, $offset = 0) {
483: if (Multibyte::checkMultibyte($haystack)) {
484: $found = false;
485:
486: $haystack = Multibyte::utf8($haystack);
487: $haystackCount = count($haystack);
488:
489: $needle = Multibyte::utf8($needle);
490: $needleCount = count($needle);
491:
492: $position = $offset;
493:
494: while (($found === false) && ($position < $haystackCount)) {
495: if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
496: for ($i = 1; $i < $needleCount; $i++) {
497: if ($needle[$i] !== $haystack[$position + $i]) {
498: break;
499: }
500: }
501: if ($i === $needleCount) {
502: $found = true;
503: $position--;
504: }
505: }
506: $position++;
507: }
508: if ($found) {
509: return $position;
510: }
511: return false;
512: }
513: return strpos($haystack, $needle, $offset);
514: }
515:
516: /**
517: * Finds the last occurrence of a character in a string within another.
518: *
519: * @param string $haystack The string from which to get the last occurrence of $needle.
520: * @param string $needle The string to find in $haystack.
521: * @param boolean $part Determines which portion of $haystack this function returns.
522: * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
523: * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
524: * Default value is false.
525: * @return string|boolean The portion of $haystack. or false if $needle is not found.
526: * @access public
527: * @static
528: */
529: function strrchr($haystack, $needle, $part = false) {
530: $check = Multibyte::utf8($haystack);
531: $found = false;
532:
533: $haystack = Multibyte::utf8($haystack);
534: $haystackCount = count($haystack);
535:
536: $matches = array_count_values($check);
537:
538: $needle = Multibyte::utf8($needle);
539: $needleCount = count($needle);
540:
541: $parts = array();
542: $position = 0;
543:
544: while (($found === false) && ($position < $haystackCount)) {
545: if (isset($needle[0]) && $needle[0] === $check[$position]) {
546: for ($i = 1; $i < $needleCount; $i++) {
547: if ($needle[$i] !== $check[$position + $i]) {
548: if ($needle[$i] === $check[($position + $i) -1]) {
549: $found = true;
550: }
551: unset($parts[$position - 1]);
552: $haystack = array_merge(array($haystack[$position]), $haystack);
553: break;
554: }
555: }
556: if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
557: $matches[$needle[0]] = $matches[$needle[0]] - 1;
558: } elseif ($i === $needleCount) {
559: $found = true;
560: }
561: }
562:
563: if (!$found && isset($haystack[$position])) {
564: $parts[] = $haystack[$position];
565: unset($haystack[$position]);
566: }
567: $position++;
568: }
569:
570: if ($found && $part && !empty($parts)) {
571: return Multibyte::ascii($parts);
572: } elseif ($found && !empty($haystack)) {
573: return Multibyte::ascii($haystack);
574: }
575: return false;
576: }
577:
578: /**
579: * Finds the last occurrence of a character in a string within another, case insensitive.
580: *
581: * @param string $haystack The string from which to get the last occurrence of $needle.
582: * @param string $needle The string to find in $haystack.
583: * @param boolean $part Determines which portion of $haystack this function returns.
584: * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
585: * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
586: * Default value is false.
587: * @return string|boolean The portion of $haystack. or false if $needle is not found.
588: * @access public
589: * @static
590: */
591: function strrichr($haystack, $needle, $part = false) {
592: $check = Multibyte::strtoupper($haystack);
593: $check = Multibyte::utf8($check);
594: $found = false;
595:
596: $haystack = Multibyte::utf8($haystack);
597: $haystackCount = count($haystack);
598:
599: $matches = array_count_values($check);
600:
601: $needle = Multibyte::strtoupper($needle);
602: $needle = Multibyte::utf8($needle);
603: $needleCount = count($needle);
604:
605: $parts = array();
606: $position = 0;
607:
608: while (($found === false) && ($position < $haystackCount)) {
609: if (isset($needle[0]) && $needle[0] === $check[$position]) {
610: for ($i = 1; $i < $needleCount; $i++) {
611: if ($needle[$i] !== $check[$position + $i]) {
612: if ($needle[$i] === $check[($position + $i) -1]) {
613: $found = true;
614: }
615: unset($parts[$position - 1]);
616: $haystack = array_merge(array($haystack[$position]), $haystack);
617: break;
618: }
619: }
620: if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
621: $matches[$needle[0]] = $matches[$needle[0]] - 1;
622: } elseif ($i === $needleCount) {
623: $found = true;
624: }
625: }
626:
627: if (!$found && isset($haystack[$position])) {
628: $parts[] = $haystack[$position];
629: unset($haystack[$position]);
630: }
631: $position++;
632: }
633:
634: if ($found && $part && !empty($parts)) {
635: return Multibyte::ascii($parts);
636: } elseif ($found && !empty($haystack)) {
637: return Multibyte::ascii($haystack);
638: }
639: return false;
640: }
641:
642: /**
643: * Finds position of last occurrence of a string within another, case insensitive
644: *
645: * @param string $haystack The string from which to get the position of the last occurrence of $needle.
646: * @param string $needle The string to find in $haystack.
647: * @param integer $offset The position in $haystack to start searching.
648: * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string,
649: * or false if $needle is not found.
650: * @access public
651: * @static
652: */
653: function strripos($haystack, $needle, $offset = 0) {
654: if (!PHP5 || Multibyte::checkMultibyte($haystack)) {
655: $found = false;
656: $haystack = Multibyte::strtoupper($haystack);
657: $haystack = Multibyte::utf8($haystack);
658: $haystackCount = count($haystack);
659:
660: $matches = array_count_values($haystack);
661:
662: $needle = Multibyte::strtoupper($needle);
663: $needle = Multibyte::utf8($needle);
664: $needleCount = count($needle);
665:
666: $position = $offset;
667:
668: while (($found === false) && ($position < $haystackCount)) {
669: if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
670: for ($i = 1; $i < $needleCount; $i++) {
671: if ($needle[$i] !== $haystack[$position + $i]) {
672: if ($needle[$i] === $haystack[($position + $i) -1]) {
673: $position--;
674: $found = true;
675: continue;
676: }
677: }
678: }
679:
680: if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
681: $matches[$needle[0]] = $matches[$needle[0]] - 1;
682: } elseif ($i === $needleCount) {
683: $found = true;
684: $position--;
685: }
686: }
687: $position++;
688: }
689: return ($found) ? $position : false;
690: }
691: return strripos($haystack, $needle, $offset);
692: }
693:
694: /**
695: * Find position of last occurrence of a string in a string.
696: *
697: * @param string $haystack The string being checked, for the last occurrence of $needle.
698: * @param string $needle The string to find in $haystack.
699: * @param integer $offset May be specified to begin searching an arbitrary number of characters into the string.
700: * Negative values will stop searching at an arbitrary point prior to the end of the string.
701: * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string.
702: * If $needle is not found, it returns false.
703: * @access public
704: * @static
705: */
706: function strrpos($haystack, $needle, $offset = 0) {
707: if (!PHP5 || Multibyte::checkMultibyte($haystack)) {
708: $found = false;
709:
710: $haystack = Multibyte::utf8($haystack);
711: $haystackCount = count($haystack);
712:
713: $matches = array_count_values($haystack);
714:
715: $needle = Multibyte::utf8($needle);
716: $needleCount = count($needle);
717:
718: $position = $offset;
719:
720: while (($found === false) && ($position < $haystackCount)) {
721: if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
722: for ($i = 1; $i < $needleCount; $i++) {
723: if ($needle[$i] !== $haystack[$position + $i]) {
724: if ($needle[$i] === $haystack[($position + $i) -1]) {
725: $position--;
726: $found = true;
727: continue;
728: }
729: }
730: }
731:
732: if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
733: $matches[$needle[0]] = $matches[$needle[0]] - 1;
734: } elseif ($i === $needleCount) {
735: $found = true;
736: $position--;
737: }
738: }
739: $position++;
740: }
741: return ($found) ? $position : false;
742: }
743: return strrpos($haystack, $needle, $offset);
744: }
745:
746: /**
747: * Finds first occurrence of a string within another
748: *
749: * @param string $haystack The string from which to get the first occurrence of $needle.
750: * @param string $needle The string to find in $haystack
751: * @param boolean $part Determines which portion of $haystack this function returns.
752: * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
753: * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
754: * Default value is FALSE.
755: * @return string|boolean The portion of $haystack, or true if $needle is not found.
756: * @access public
757: * @static
758: */
759: function strstr($haystack, $needle, $part = false) {
760: $php = (PHP_VERSION < 5.3);
761:
762: if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
763: $check = Multibyte::utf8($haystack);
764: $found = false;
765:
766: $haystack = Multibyte::utf8($haystack);
767: $haystackCount = count($haystack);
768:
769: $needle = Multibyte::utf8($needle);
770: $needleCount = count($needle);
771:
772: $parts = array();
773: $position = 0;
774:
775: while (($found === false) && ($position < $haystackCount)) {
776: if (isset($needle[0]) && $needle[0] === $check[$position]) {
777: for ($i = 1; $i < $needleCount; $i++) {
778: if ($needle[$i] !== $check[$position + $i]) {
779: break;
780: }
781: }
782: if ($i === $needleCount) {
783: $found = true;
784: }
785: }
786: if (!$found) {
787: $parts[] = $haystack[$position];
788: unset($haystack[$position]);
789: }
790: $position++;
791: }
792:
793: if ($found && $part && !empty($parts)) {
794: return Multibyte::ascii($parts);
795: } elseif ($found && !empty($haystack)) {
796: return Multibyte::ascii($haystack);
797: }
798: return false;
799: }
800:
801: if (!$php) {
802: return strstr($haystack, $needle, $part);
803: }
804: return strstr($haystack, $needle);
805: }
806:
807: /**
808: * Make a string lowercase
809: *
810: * @param string $string The string being lowercased.
811: * @return string with all alphabetic characters converted to lowercase.
812: * @access public
813: * @static
814: */
815: function strtolower($string) {
816: $_this =& Multibyte::getInstance();
817: $utf8Map = Multibyte::utf8($string);
818:
819: $length = count($utf8Map);
820: $lowerCase = array();
821: $matched = false;
822:
823: for ($i = 0 ; $i < $length; $i++) {
824: $char = $utf8Map[$i];
825:
826: if ($char < 128) {
827: $str = strtolower(chr($char));
828: $strlen = strlen($str);
829: for ($ii = 0 ; $ii < $strlen; $ii++) {
830: $lower = ord(substr($str, $ii, 1));
831: }
832: $lowerCase[] = $lower;
833: $matched = true;
834: } else {
835: $matched = false;
836: $keys = $_this->__find($char, 'upper');
837:
838: if (!empty($keys)) {
839: foreach ($keys as $key => $value) {
840: if ($keys[$key]['upper'] == $char && count($keys[$key]['lower'][0]) === 1) {
841: $lowerCase[] = $keys[$key]['lower'][0];
842: $matched = true;
843: break 1;
844: }
845: }
846: }
847: }
848: if ($matched === false) {
849: $lowerCase[] = $char;
850: }
851: }
852: return Multibyte::ascii($lowerCase);
853: }
854:
855: /**
856: * Make a string uppercase
857: *
858: * @param string $string The string being uppercased.
859: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
860: * @return string with all alphabetic characters converted to uppercase.
861: * @access public
862: * @static
863: */
864: function strtoupper($string) {
865: $_this =& Multibyte::getInstance();
866: $utf8Map = Multibyte::utf8($string);
867:
868: $length = count($utf8Map);
869: $matched = false;
870: $replaced = array();
871: $upperCase = array();
872:
873: for ($i = 0 ; $i < $length; $i++) {
874: $char = $utf8Map[$i];
875:
876: if ($char < 128) {
877: $str = strtoupper(chr($char));
878: $strlen = strlen($str);
879: for ($ii = 0 ; $ii < $strlen; $ii++) {
880: $upper = ord(substr($str, $ii, 1));
881: }
882: $upperCase[] = $upper;
883: $matched = true;
884:
885: } else {
886: $matched = false;
887: $keys = $_this->__find($char);
888: $keyCount = count($keys);
889:
890: if (!empty($keys)) {
891: foreach ($keys as $key => $value) {
892: $matched = false;
893: $replace = 0;
894: if ($length > 1 && count($keys[$key]['lower']) > 1) {
895: $j = 0;
896:
897: for ($ii = 0, $count = count($keys[$key]['lower']); $ii < $count; $ii++) {
898: $nextChar = $utf8Map[$i + $ii];
899:
900: if (isset($nextChar) && ($nextChar == $keys[$key]['lower'][$j + $ii])) {
901: $replace++;
902: }
903: }
904: if ($replace == $count) {
905: $upperCase[] = $keys[$key]['upper'];
906: $replaced = array_merge($replaced, array_values($keys[$key]['lower']));
907: $matched = true;
908: break 1;
909: }
910: } elseif ($length > 1 && $keyCount > 1) {
911: $j = 0;
912: for ($ii = 1; $ii < $keyCount; $ii++) {
913: $nextChar = $utf8Map[$i + $ii - 1];
914:
915: if (in_array($nextChar, $keys[$ii]['lower'])) {
916:
917: for ($jj = 0, $count = count($keys[$ii]['lower']); $jj < $count; $jj++) {
918: $nextChar = $utf8Map[$i + $jj];
919:
920: if (isset($nextChar) && ($nextChar == $keys[$ii]['lower'][$j + $jj])) {
921: $replace++;
922: }
923: }
924: if ($replace == $count) {
925: $upperCase[] = $keys[$ii]['upper'];
926: $replaced = array_merge($replaced, array_values($keys[$ii]['lower']));
927: $matched = true;
928: break 2;
929: }
930: }
931: }
932: }
933: if ($keys[$key]['lower'][0] == $char) {
934: $upperCase[] = $keys[$key]['upper'];
935: $matched = true;
936: break 1;
937: }
938: }
939: }
940: }
941: if ($matched === false && !in_array($char, $replaced, true)) {
942: $upperCase[] = $char;
943: }
944: }
945: return Multibyte::ascii($upperCase);
946: }
947:
948: /**
949: * Count the number of substring occurrences
950: *
951: * @param string $haystack The string being checked.
952: * @param string $needle The string being found.
953: * @return integer The number of times the $needle substring occurs in the $haystack string.
954: * @access public
955: * @static
956: */
957: function substrCount($haystack, $needle) {
958: $count = 0;
959: $haystack = Multibyte::utf8($haystack);
960: $haystackCount = count($haystack);
961: $matches = array_count_values($haystack);
962: $needle = Multibyte::utf8($needle);
963: $needleCount = count($needle);
964:
965: if ($needleCount === 1 && isset($matches[$needle[0]])) {
966: return $matches[$needle[0]];
967: }
968:
969: for ($i = 0; $i < $haystackCount; $i++) {
970: if (isset($needle[0]) && $needle[0] === $haystack[$i]) {
971: for ($ii = 1; $ii < $needleCount; $ii++) {
972: if ($needle[$ii] === $haystack[$i + 1]) {
973: if ((isset($needle[$ii + 1]) && $haystack[$i + 2]) && $needle[$ii + 1] !== $haystack[$i + 2]) {
974: $count--;
975: } else {
976: $count++;
977: }
978: }
979: }
980: }
981: }
982: return $count;
983: }
984:
985: /**
986: * Get part of string
987: *
988: * @param string $string The string being checked.
989: * @param integer $start The first position used in $string.
990: * @param integer $length The maximum length of the returned string.
991: * @return string The portion of $string specified by the $string and $length parameters.
992: * @access public
993: * @static
994: */
995: function substr($string, $start, $length = null) {
996: if ($start === 0 && $length === null) {
997: return $string;
998: }
999:
1000: $string = Multibyte::utf8($string);
1001: $stringCount = count($string);
1002:
1003: for ($i = 1; $i <= $start; $i++) {
1004: unset($string[$i - 1]);
1005: }
1006:
1007: if ($length === null || count($string) < $length) {
1008: return Multibyte::ascii($string);
1009: }
1010: $string = array_values($string);
1011:
1012: $value = array();
1013: for ($i = 0; $i < $length; $i++) {
1014: $value[] = $string[$i];
1015: }
1016: return Multibyte::ascii($value);
1017: }
1018:
1019: /**
1020: * Prepare a string for mail transport, using the provided encoding
1021: *
1022: * @param string $string value to encode
1023: * @param string $charset charset to use for encoding. defaults to UTF-8
1024: * @param string $newline
1025: * @return string
1026: * @access public
1027: * @static
1028: * @TODO: add support for 'Q'('Quoted Printable') encoding
1029: */
1030: function mimeEncode($string, $charset = null, $newline = "\r\n") {
1031: if (!Multibyte::checkMultibyte($string) && strlen($string) < 75) {
1032: return $string;
1033: }
1034:
1035: if (empty($charset)) {
1036: $charset = Configure::read('App.encoding');
1037: }
1038: $charset = strtoupper($charset);
1039:
1040: $start = '=?' . $charset . '?B?';
1041: $end = '?=';
1042: $spacer = $end . $newline . ' ' . $start;
1043:
1044: $length = 75 - strlen($start) - strlen($end);
1045: $length = $length - ($length % 4);
1046: if ($charset == 'UTF-8') {
1047: $parts = array();
1048: $maxchars = floor(($length * 3) / 4);
1049: while (strlen($string) > $maxchars) {
1050: $i = (int)$maxchars;
1051: $test = ord($string[$i]);
1052: while ($test >= 128 && $test <= 191) {
1053: $i--;
1054: $test = ord($string[$i]);
1055: }
1056: $parts[] = base64_encode(substr($string, 0, $i));
1057: $string = substr($string, $i);
1058: }
1059: $parts[] = base64_encode($string);
1060: $string = implode($spacer, $parts);
1061: } else {
1062: $string = chunk_split(base64_encode($string), $length, $spacer);
1063: $string = preg_replace('/' . preg_quote($spacer) . '$/', '', $string);
1064: }
1065: return $start . $string . $end;
1066: }
1067:
1068: /**
1069: * Return the Code points range for Unicode characters
1070: *
1071: * @param interger $decimal
1072: * @return string
1073: * @access private
1074: */
1075: function __codepoint($decimal) {
1076: if ($decimal > 128 && $decimal < 256) {
1077: $return = '0080_00ff'; // Latin-1 Supplement
1078: } elseif ($decimal < 384) {
1079: $return = '0100_017f'; // Latin Extended-A
1080: } elseif ($decimal < 592) {
1081: $return = '0180_024F'; // Latin Extended-B
1082: } elseif ($decimal < 688) {
1083: $return = '0250_02af'; // IPA Extensions
1084: } elseif ($decimal >= 880 && $decimal < 1024) {
1085: $return = '0370_03ff'; // Greek and Coptic
1086: } elseif ($decimal < 1280) {
1087: $return = '0400_04ff'; // Cyrillic
1088: } elseif ($decimal < 1328) {
1089: $return = '0500_052f'; // Cyrillic Supplement
1090: } elseif ($decimal < 1424) {
1091: $return = '0530_058f'; // Armenian
1092: } elseif ($decimal >= 7680 && $decimal < 7936) {
1093: $return = '1e00_1eff'; // Latin Extended Additional
1094: } elseif ($decimal < 8192) {
1095: $return = '1f00_1fff'; // Greek Extended
1096: } elseif ($decimal >= 8448 && $decimal < 8528) {
1097: $return = '2100_214f'; // Letterlike Symbols
1098: } elseif ($decimal < 8592) {
1099: $return = '2150_218f'; // Number Forms
1100: } elseif ($decimal >= 9312 && $decimal < 9472) {
1101: $return = '2460_24ff'; // Enclosed Alphanumerics
1102: } elseif ($decimal >= 11264 && $decimal < 11360) {
1103: $return = '2c00_2c5f'; // Glagolitic
1104: } elseif ($decimal < 11392) {
1105: $return = '2c60_2c7f'; // Latin Extended-C
1106: } elseif ($decimal < 11520) {
1107: $return = '2c80_2cff'; // Coptic
1108: } elseif ($decimal >= 65280 && $decimal < 65520) {
1109: $return = 'ff00_ffef'; // Halfwidth and Fullwidth Forms
1110: } else {
1111: $return = false;
1112: }
1113: $this->__codeRange[$decimal] = $return;
1114: return $return;
1115: }
1116:
1117: /**
1118: * Find the related code folding values for $char
1119: *
1120: * @param integer $char decimal value of character
1121: * @param string $type
1122: * @return array
1123: * @access private
1124: */
1125: function __find($char, $type = 'lower') {
1126: $value = false;
1127: $found = array();
1128: if (!isset($this->__codeRange[$char])) {
1129: $range = $this->__codepoint($char);
1130: if ($range === false) {
1131: return null;
1132: }
1133: Configure::load('unicode' . DS . 'casefolding' . DS . $range);
1134: $this->__caseFold[$range] = Configure::read($range);
1135: Configure::delete($range);
1136: }
1137:
1138: if (!$this->__codeRange[$char]) {
1139: return null;
1140: }
1141: $this->__table = $this->__codeRange[$char];
1142: $count = count($this->__caseFold[$this->__table]);
1143:
1144: for ($i = 0; $i < $count; $i++) {
1145: if ($type === 'lower' && $this->__caseFold[$this->__table][$i][$type][0] === $char) {
1146: $found[] = $this->__caseFold[$this->__table][$i];
1147: } elseif ($type === 'upper' && $this->__caseFold[$this->__table][$i][$type] === $char) {
1148: $found[] = $this->__caseFold[$this->__table][$i];
1149: }
1150: }
1151: return $found;
1152: }
1153:
1154: /**
1155: * Check the $string for multibyte characters
1156: * @param string $string value to test
1157: * @return boolean
1158: * @access public
1159: * @static
1160: */
1161: function checkMultibyte($string) {
1162: $length = strlen($string);
1163:
1164: for ($i = 0; $i < $length; $i++ ) {
1165: $value = ord(($string[$i]));
1166: if ($value > 128) {
1167: return true;
1168: }
1169: }
1170: return false;
1171: }
1172: }
1173: