1: <?php
2: /* SVN FILE: $Id$ */
3: /**
4: * Multibyte handling methods.
5: *
6: *
7: * PHP versions 4 and 5
8: *
9: * CakePHP(tm) : Rapid Development Framework (http://cakephp.org)
10: * Copyright 2005-2012, Cake Software Foundation, Inc. (http://cakefoundation.org)
11: *
12: * Licensed under The MIT License
13: * Redistributions of files must retain the above copyright notice.
14: *
15: * @copyright Copyright 2005-2012, Cake Software Foundation, Inc. (http://cakefoundation.org)
16: * @link http://cakephp.org CakePHP(tm) Project
17: * @package cake
18: * @subpackage cake.cake.libs
19: * @since CakePHP(tm) v 1.2.0.6833
20: * @version $Revision$
21: * @modifiedby $LastChangedBy$
22: * @lastmodified $Date$
23: * @license http://www.opensource.org/licenses/mit-license.php The MIT License
24: */
25: if (function_exists('mb_internal_encoding')) {
26: $encoding = Configure::read('App.encoding');
27: if (!empty($encoding)) {
28: mb_internal_encoding($encoding);
29: }
30: }
31: /**
32: * Find position of first occurrence of a case-insensitive string.
33: *
34: * @param string $haystack The string from which to get the position of the first occurrence of $needle.
35: * @param string $needle The string to find in $haystack.
36: * @param integer $offset The position in $haystack to start searching.
37: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
38: * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string, or false
39: * if $needle is not found.
40: */
41: if (!function_exists('mb_stripos')) {
42: function mb_stripos($haystack, $needle, $offset = 0, $encoding = null) {
43: return Multibyte::stripos($haystack, $needle, $offset);
44: }
45: }
46: /**
47: * Finds first occurrence of a string within another, case insensitive.
48: *
49: * @param string $haystack The string from which to get the first occurrence of $needle.
50: * @param string $needle The string to find in $haystack.
51: * @param boolean $part Determines which portion of $haystack this function returns.
52: * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
53: * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
54: * Default value is false.
55: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
56: * @return string|boolean The portion of $haystack, or false if $needle is not found.
57: */
58: if (!function_exists('mb_stristr')) {
59: function mb_stristr($haystack, $needle, $part = false, $encoding = null) {
60: return Multibyte::stristr($haystack, $needle, $part);
61: }
62: }
63: /**
64: * Get string length.
65: *
66: * @param string $string The string being checked for length.
67: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
68: * @return integer The number of characters in string $string having character encoding encoding.
69: * A multi-byte character is counted as 1.
70: */
71: if (!function_exists('mb_strlen')) {
72: function mb_strlen($string, $encoding = null) {
73: return Multibyte::strlen($string);
74: }
75: }
76: /**
77: * Find position of first occurrence of a string.
78: *
79: * @param string $haystack The string being checked.
80: * @param string $needle The position counted from the beginning of haystack.
81: * @param integer $offset The search offset. If it is not specified, 0 is used.
82: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
83: * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string.
84: * If $needle is not found, it returns false.
85: */
86: if (!function_exists('mb_strpos')) {
87: function mb_strpos($haystack, $needle, $offset = 0, $encoding = null) {
88: return Multibyte::strpos($haystack, $needle, $offset);
89: }
90: }
91: /**
92: * Finds the last occurrence of a character in a string within another.
93: *
94: * @param string $haystack The string from which to get the last occurrence of $needle.
95: * @param string $needle The string to find in $haystack.
96: * @param boolean $part Determines which portion of $haystack this function returns.
97: * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
98: * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
99: * Default value is false.
100: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
101: * @return string|boolean The portion of $haystack. or false if $needle is not found.
102: */
103: if (!function_exists('mb_strrchr')) {
104: function mb_strrchr($haystack, $needle, $part = false, $encoding = null) {
105: return Multibyte::strrchr($haystack, $needle, $part);
106: }
107: }
108: /**
109: * Finds the last occurrence of a character in a string within another, case insensitive.
110: *
111: * @param string $haystack The string from which to get the last occurrence of $needle.
112: * @param string $needle The string to find in $haystack.
113: * @param boolean $part Determines which portion of $haystack this function returns.
114: * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
115: * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
116: * Default value is false.
117: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
118: * @return string|boolean The portion of $haystack. or false if $needle is not found.
119: */
120: if (!function_exists('mb_strrichr')) {
121: function mb_strrichr($haystack, $needle, $part = false, $encoding = null) {
122: return Multibyte::strrichr($haystack, $needle, $part);
123: }
124: }
125: /**
126: * Finds position of last occurrence of a string within another, case insensitive
127: *
128: * @param string $haystack The string from which to get the position of the last occurrence of $needle.
129: * @param string $needle The string to find in $haystack.
130: * @param integer $offset The position in $haystack to start searching.
131: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
132: * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string,
133: * or false if $needle is not found.
134: */
135: if (!function_exists('mb_strripos')) {
136: function mb_strripos($haystack, $needle, $offset = 0, $encoding = null) {
137: return Multibyte::strripos($haystack, $needle, $offset);
138: }
139: }
140: /**
141: * Find position of last occurrence of a string in a string.
142: *
143: * @param string $haystack The string being checked, for the last occurrence of $needle.
144: * @param string $needle The string to find in $haystack.
145: * @param integer $offset May be specified to begin searching an arbitrary number of characters into the string.
146: * Negative values will stop searching at an arbitrary point prior to the end of the string.
147: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
148: * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string.
149: * If $needle is not found, it returns false.
150: */
151: if (!function_exists('mb_strrpos')) {
152: function mb_strrpos($haystack, $needle, $offset = 0, $encoding = null) {
153: return Multibyte::strrpos($haystack, $needle, $offset);
154: }
155: }
156: /**
157: * Finds first occurrence of a string within another
158: *
159: * @param string $haystack The string from which to get the first occurrence of $needle.
160: * @param string $needle The string to find in $haystack
161: * @param boolean $part Determines which portion of $haystack this function returns.
162: * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
163: * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
164: * Default value is FALSE.
165: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
166: * @return string|boolean The portion of $haystack, or true if $needle is not found.
167: */
168: if (!function_exists('mb_strstr')) {
169: function mb_strstr($haystack, $needle, $part = false, $encoding = null) {
170: return Multibyte::strstr($haystack, $needle, $part);
171: }
172: }
173: /**
174: * Make a string lowercase
175: *
176: * @param string $string The string being lowercased.
177: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
178: * @return string with all alphabetic characters converted to lowercase.
179: */
180: if (!function_exists('mb_strtolower')) {
181: function mb_strtolower($string, $encoding = null) {
182: return Multibyte::strtolower($string);
183: }
184: }
185: /**
186: * Make a string uppercase
187: *
188: * @param string $string The string being uppercased.
189: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
190: * @return string with all alphabetic characters converted to uppercase.
191: */
192: if (!function_exists('mb_strtoupper')) {
193: function mb_strtoupper($string, $encoding = null) {
194: return Multibyte::strtoupper($string);
195: }
196: }
197: /**
198: * Count the number of substring occurrences
199: *
200: * @param string $haystack The string being checked.
201: * @param string $needle The string being found.
202: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
203: * @return integer The number of times the $needle substring occurs in the $haystack string.
204: */
205: if (!function_exists('mb_substr_count')) {
206: function mb_substr_count($haystack, $needle, $encoding = null) {
207: return Multibyte::substrCount($haystack, $needle);
208: }
209: }
210: /**
211: * Get part of string
212: *
213: * @param string $string The string being checked.
214: * @param integer $start The first position used in $string.
215: * @param integer $length The maximum length of the returned string.
216: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
217: * @return string The portion of $string specified by the $string and $length parameters.
218: */
219: if (!function_exists('mb_substr')) {
220: function mb_substr($string, $start, $length = null, $encoding = null) {
221: return Multibyte::substr($string, $start, $length);
222: }
223: }
224: /**
225: * Encode string for MIME header
226: *
227: * @param string $str The string being encoded
228: * @param string $charset specifies the name of the character set in which str is represented in.
229: * The default value is determined by the current NLS setting (mbstring.language).
230: * @param string $transfer_encoding specifies the scheme of MIME encoding.
231: * It should be either "B" (Base64) or "Q" (Quoted-Printable). Falls back to "B" if not given.
232: * @param string $linefeed specifies the EOL (end-of-line) marker with which
233: * mb_encode_mimeheader() performs line-folding
234: * (a ยป RFC term, the act of breaking a line longer than a certain length into multiple lines.
235: * The length is currently hard-coded to 74 characters). Falls back to "\r\n" (CRLF) if not given.
236: * @param integer $indent [definition unknown and appears to have no affect]
237: * @return string A converted version of the string represented in ASCII.
238: */
239: if (!function_exists('mb_encode_mimeheader')) {
240: function mb_encode_mimeheader($str, $charset = 'UTF-8', $transfer_encoding = 'B', $linefeed = "\r\n", $indent = 1) {
241: return Multibyte::mimeEncode($str, $charset, $linefeed);
242: }
243: }
244: /**
245: * Multibyte handling methods.
246: *
247: *
248: * @package cake
249: * @subpackage cake.cake.libs
250: */
251: class Multibyte extends Object {
252: /**
253: * Holds the case folding values
254: *
255: * @var array
256: * @access private
257: */
258: var $__caseFold = array();
259: /**
260: * Holds an array of Unicode code point ranges
261: *
262: * @var array
263: * @access private
264: */
265: var $__codeRange = array();
266: /**
267: * Holds the current code point range
268: *
269: * @var string
270: * @access private
271: */
272: var $__table = null;
273: /**
274: * Gets a reference to the Multibyte object instance
275: *
276: * @return object Multibyte instance
277: * @access public
278: * @static
279: */
280: function &getInstance() {
281: static $instance = array();
282:
283: if (!$instance) {
284: $instance[0] =& new Multibyte();
285: }
286: return $instance[0];
287: }
288: /**
289: * Converts a multibyte character string
290: * to the decimal value of the character
291: *
292: * @param multibyte string $string
293: * @return array
294: * @access public
295: * @static
296: */
297: function utf8($string) {
298: $map = array();
299:
300: $values = array();
301: $find = 1;
302: $length = strlen($string);
303:
304: for ($i = 0; $i < $length; $i++) {
305: $value = ord($string[$i]);
306:
307: if ($value < 128) {
308: $map[] = $value;
309: } else {
310: if (empty($values)) {
311: $find = ($value < 224) ? 2 : 3;
312: }
313: $values[] = $value;
314:
315: if (count($values) === $find) {
316: if ($find == 3) {
317: $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
318: } else {
319: $map[] = (($values[0] % 32) * 64) + ($values[1] % 64);
320: }
321: $values = array();
322: $find = 1;
323: }
324: }
325: }
326: return $map;
327: }
328: /**
329: * Converts the decimal value of a multibyte character string
330: * to a string
331: *
332: * @param array $array
333: * @return string
334: * @access public
335: * @static
336: */
337: function ascii($array) {
338: $ascii = '';
339:
340: foreach ($array as $utf8) {
341: if ($utf8 < 128) {
342: $ascii .= chr($utf8);
343: } elseif ($utf8 < 2048) {
344: $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64));
345: $ascii .= chr(128 + ($utf8 % 64));
346: } else {
347: $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096));
348: $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64));
349: $ascii .= chr(128 + ($utf8 % 64));
350: }
351: }
352: return $ascii;
353: }
354: /**
355: * Find position of first occurrence of a case-insensitive string.
356: *
357: * @param multi-byte string $haystack The string from which to get the position of the first occurrence of $needle.
358: * @param multi-byte string $needle The string to find in $haystack.
359: * @param integer $offset The position in $haystack to start searching.
360: * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string,
361: * or false if $needle is not found.
362: * @access public
363: * @static
364: */
365: function stripos($haystack, $needle, $offset = 0) {
366: if (!PHP5 || Multibyte::checkMultibyte($haystack)) {
367: $haystack = Multibyte::strtoupper($haystack);
368: $needle = Multibyte::strtoupper($needle);
369: return Multibyte::strpos($haystack, $needle, $offset);
370: }
371: return stripos($haystack, $needle, $offset);
372: }
373: /**
374: * Finds first occurrence of a string within another, case insensitive.
375: *
376: * @param string $haystack The string from which to get the first occurrence of $needle.
377: * @param string $needle The string to find in $haystack.
378: * @param boolean $part Determines which portion of $haystack this function returns.
379: * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
380: * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
381: * Default value is false.
382: * @return int|boolean The portion of $haystack, or false if $needle is not found.
383: * @access public
384: * @static
385: */
386: function stristr($haystack, $needle, $part = false) {
387: $php = (PHP_VERSION < 5.3);
388:
389: if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
390: $check = Multibyte::strtoupper($haystack);
391: $check = Multibyte::utf8($check);
392: $found = false;
393:
394: $haystack = Multibyte::utf8($haystack);
395: $haystackCount = count($haystack);
396:
397: $needle = Multibyte::strtoupper($needle);
398: $needle = Multibyte::utf8($needle);
399: $needleCount = count($needle);
400:
401: $parts = array();
402: $position = 0;
403:
404: while (($found === false) && ($position < $haystackCount)) {
405: if (isset($needle[0]) && $needle[0] === $check[$position]) {
406: for ($i = 1; $i < $needleCount; $i++) {
407: if ($needle[$i] !== $check[$position + $i]) {
408: break;
409: }
410: }
411: if ($i === $needleCount) {
412: $found = true;
413: }
414: }
415: if (!$found) {
416: $parts[] = $haystack[$position];
417: unset($haystack[$position]);
418: }
419: $position++;
420: }
421:
422: if ($found && $part && !empty($parts)) {
423: return Multibyte::ascii($parts);
424: } elseif ($found && !empty($haystack)) {
425: return Multibyte::ascii($haystack);
426: }
427: return false;
428: }
429:
430: if (!$php) {
431: return stristr($haystack, $needle, $part);
432: }
433: return stristr($haystack, $needle);
434: }
435: /**
436: * Get string length.
437: *
438: * @param string $string The string being checked for length.
439: * @return integer The number of characters in string $string
440: * @access public
441: * @static
442: */
443: function strlen($string) {
444: if (Multibyte::checkMultibyte($string)) {
445: $string = Multibyte::utf8($string);
446: return count($string);
447: }
448: return strlen($string);
449: }
450: /**
451: * Find position of first occurrence of a string.
452: *
453: * @param string $haystack The string being checked.
454: * @param string $needle The position counted from the beginning of haystack.
455: * @param integer $offset The search offset. If it is not specified, 0 is used.
456: * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string.
457: * If $needle is not found, it returns false.
458: * @access public
459: * @static
460: */
461: function strpos($haystack, $needle, $offset = 0) {
462: if (Multibyte::checkMultibyte($haystack)) {
463: $found = false;
464:
465: $haystack = Multibyte::utf8($haystack);
466: $haystackCount = count($haystack);
467:
468: $needle = Multibyte::utf8($needle);
469: $needleCount = count($needle);
470:
471: $position = $offset;
472:
473: while (($found === false) && ($position < $haystackCount)) {
474: if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
475: for ($i = 1; $i < $needleCount; $i++) {
476: if ($needle[$i] !== $haystack[$position + $i]) {
477: break;
478: }
479: }
480: if ($i === $needleCount) {
481: $found = true;
482: $position--;
483: }
484: }
485: $position++;
486: }
487: if ($found) {
488: return $position;
489: }
490: return false;
491: }
492: return strpos($haystack, $needle, $offset);
493: }
494: /**
495: * Finds the last occurrence of a character in a string within another.
496: *
497: * @param string $haystack The string from which to get the last occurrence of $needle.
498: * @param string $needle The string to find in $haystack.
499: * @param boolean $part Determines which portion of $haystack this function returns.
500: * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
501: * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
502: * Default value is false.
503: * @return string|boolean The portion of $haystack. or false if $needle is not found.
504: * @access public
505: * @static
506: */
507: function strrchr($haystack, $needle, $part = false) {
508: $check = Multibyte::utf8($haystack);
509: $found = false;
510:
511: $haystack = Multibyte::utf8($haystack);
512: $haystackCount = count($haystack);
513:
514: $matches = array_count_values($check);
515:
516: $needle = Multibyte::utf8($needle);
517: $needleCount = count($needle);
518:
519: $parts = array();
520: $position = 0;
521:
522: while (($found === false) && ($position < $haystackCount)) {
523: if (isset($needle[0]) && $needle[0] === $check[$position]) {
524: for ($i = 1; $i < $needleCount; $i++) {
525: if ($needle[$i] !== $check[$position + $i]) {
526: if ($needle[$i] === $check[($position + $i) -1]) {
527: $found = true;
528: }
529: unset($parts[$position - 1]);
530: $haystack = array_merge(array($haystack[$position]), $haystack);
531: break;
532: }
533: }
534: if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
535: $matches[$needle[0]] = $matches[$needle[0]] - 1;
536: } elseif ($i === $needleCount) {
537: $found = true;
538: }
539: }
540:
541: if (!$found && isset($haystack[$position])) {
542: $parts[] = $haystack[$position];
543: unset($haystack[$position]);
544: }
545: $position++;
546: }
547:
548: if ($found && $part && !empty($parts)) {
549: return Multibyte::ascii($parts);
550: } elseif ($found && !empty($haystack)) {
551: return Multibyte::ascii($haystack);
552: }
553: return false;
554: }
555: /**
556: * Finds the last occurrence of a character in a string within another, case insensitive.
557: *
558: * @param string $haystack The string from which to get the last occurrence of $needle.
559: * @param string $needle The string to find in $haystack.
560: * @param boolean $part Determines which portion of $haystack this function returns.
561: * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
562: * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
563: * Default value is false.
564: * @return string|boolean The portion of $haystack. or false if $needle is not found.
565: * @access public
566: * @static
567: */
568: function strrichr($haystack, $needle, $part = false) {
569: $check = Multibyte::strtoupper($haystack);
570: $check = Multibyte::utf8($check);
571: $found = false;
572:
573: $haystack = Multibyte::utf8($haystack);
574: $haystackCount = count($haystack);
575:
576: $matches = array_count_values($check);
577:
578: $needle = Multibyte::strtoupper($needle);
579: $needle = Multibyte::utf8($needle);
580: $needleCount = count($needle);
581:
582: $parts = array();
583: $position = 0;
584:
585: while (($found === false) && ($position < $haystackCount)) {
586: if (isset($needle[0]) && $needle[0] === $check[$position]) {
587: for ($i = 1; $i < $needleCount; $i++) {
588: if ($needle[$i] !== $check[$position + $i]) {
589: if ($needle[$i] === $check[($position + $i) -1]) {
590: $found = true;
591: }
592: unset($parts[$position - 1]);
593: $haystack = array_merge(array($haystack[$position]), $haystack);
594: break;
595: }
596: }
597: if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
598: $matches[$needle[0]] = $matches[$needle[0]] - 1;
599: } elseif ($i === $needleCount) {
600: $found = true;
601: }
602: }
603:
604: if (!$found && isset($haystack[$position])) {
605: $parts[] = $haystack[$position];
606: unset($haystack[$position]);
607: }
608: $position++;
609: }
610:
611: if ($found && $part && !empty($parts)) {
612: return Multibyte::ascii($parts);
613: } elseif ($found && !empty($haystack)) {
614: return Multibyte::ascii($haystack);
615: }
616: return false;
617: }
618: /**
619: * Finds position of last occurrence of a string within another, case insensitive
620: *
621: * @param string $haystack The string from which to get the position of the last occurrence of $needle.
622: * @param string $needle The string to find in $haystack.
623: * @param integer $offset The position in $haystack to start searching.
624: * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string,
625: * or false if $needle is not found.
626: * @access public
627: * @static
628: */
629: function strripos($haystack, $needle, $offset = 0) {
630: if (!PHP5 || Multibyte::checkMultibyte($haystack)) {
631: $found = false;
632: $haystack = Multibyte::strtoupper($haystack);
633: $haystack = Multibyte::utf8($haystack);
634: $haystackCount = count($haystack);
635:
636: $matches = array_count_values($haystack);
637:
638: $needle = Multibyte::strtoupper($needle);
639: $needle = Multibyte::utf8($needle);
640: $needleCount = count($needle);
641:
642: $position = $offset;
643:
644: while (($found === false) && ($position < $haystackCount)) {
645: if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
646: for ($i = 1; $i < $needleCount; $i++) {
647: if ($needle[$i] !== $haystack[$position + $i]) {
648: if ($needle[$i] === $haystack[($position + $i) -1]) {
649: $position--;
650: $found = true;
651: continue;
652: }
653: }
654: }
655:
656: if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
657: $matches[$needle[0]] = $matches[$needle[0]] - 1;
658: } elseif ($i === $needleCount) {
659: $found = true;
660: $position--;
661: }
662: }
663: $position++;
664: }
665: return ($found) ? $position : false;
666: }
667: return strripos($haystack, $needle, $offset);
668: }
669:
670: /**
671: * Find position of last occurrence of a string in a string.
672: *
673: * @param string $haystack The string being checked, for the last occurrence of $needle.
674: * @param string $needle The string to find in $haystack.
675: * @param integer $offset May be specified to begin searching an arbitrary number of characters into the string.
676: * Negative values will stop searching at an arbitrary point prior to the end of the string.
677: * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string.
678: * If $needle is not found, it returns false.
679: * @access public
680: * @static
681: */
682: function strrpos($haystack, $needle, $offset = 0) {
683: if (!PHP5 || Multibyte::checkMultibyte($haystack)) {
684: $found = false;
685:
686: $haystack = Multibyte::utf8($haystack);
687: $haystackCount = count($haystack);
688:
689: $matches = array_count_values($haystack);
690:
691: $needle = Multibyte::utf8($needle);
692: $needleCount = count($needle);
693:
694: $position = $offset;
695:
696: while (($found === false) && ($position < $haystackCount)) {
697: if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
698: for ($i = 1; $i < $needleCount; $i++) {
699: if ($needle[$i] !== $haystack[$position + $i]) {
700: if ($needle[$i] === $haystack[($position + $i) -1]) {
701: $position--;
702: $found = true;
703: continue;
704: }
705: }
706: }
707:
708: if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
709: $matches[$needle[0]] = $matches[$needle[0]] - 1;
710: } elseif ($i === $needleCount) {
711: $found = true;
712: $position--;
713: }
714: }
715: $position++;
716: }
717: return ($found) ? $position : false;
718: }
719: return strrpos($haystack, $needle, $offset);
720: }
721: /**
722: * Finds first occurrence of a string within another
723: *
724: * @param string $haystack The string from which to get the first occurrence of $needle.
725: * @param string $needle The string to find in $haystack
726: * @param boolean $part Determines which portion of $haystack this function returns.
727: * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
728: * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
729: * Default value is FALSE.
730: * @return string|boolean The portion of $haystack, or true if $needle is not found.
731: * @access public
732: * @static
733: */
734: function strstr($haystack, $needle, $part = false) {
735: $php = (PHP_VERSION < 5.3);
736:
737: if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
738: $check = Multibyte::utf8($haystack);
739: $found = false;
740:
741: $haystack = Multibyte::utf8($haystack);
742: $haystackCount = count($haystack);
743:
744: $needle = Multibyte::utf8($needle);
745: $needleCount = count($needle);
746:
747: $parts = array();
748: $position = 0;
749:
750: while (($found === false) && ($position < $haystackCount)) {
751: if (isset($needle[0]) && $needle[0] === $check[$position]) {
752: for ($i = 1; $i < $needleCount; $i++) {
753: if ($needle[$i] !== $check[$position + $i]) {
754: break;
755: }
756: }
757: if ($i === $needleCount) {
758: $found = true;
759: }
760: }
761: if (!$found) {
762: $parts[] = $haystack[$position];
763: unset($haystack[$position]);
764: }
765: $position++;
766: }
767:
768: if ($found && $part && !empty($parts)) {
769: return Multibyte::ascii($parts);
770: } elseif ($found && !empty($haystack)) {
771: return Multibyte::ascii($haystack);
772: }
773: return false;
774: }
775:
776: if (!$php) {
777: return strstr($haystack, $needle, $part);
778: }
779: return strstr($haystack, $needle);
780: }
781: /**
782: * Make a string lowercase
783: *
784: * @param string $string The string being lowercased.
785: * @return string with all alphabetic characters converted to lowercase.
786: * @access public
787: * @static
788: */
789: function strtolower($string) {
790: $_this =& Multibyte::getInstance();
791: $utf8Map = Multibyte::utf8($string);
792:
793: $length = count($utf8Map);
794: $lowerCase = array();
795: $matched = false;
796:
797: for ($i = 0 ; $i < $length; $i++) {
798: $char = $utf8Map[$i];
799:
800: if ($char < 128) {
801: $str = strtolower(chr($char));
802: $strlen = strlen($str);
803: for ($ii = 0 ; $ii < $strlen; $ii++) {
804: $lower = ord(substr($str, $ii, 1));
805: }
806: $lowerCase[] = $lower;
807: $matched = true;
808: } else {
809: $matched = false;
810: $keys = $_this->__find($char, 'upper');
811:
812: if (!empty($keys)) {
813: foreach ($keys as $key => $value) {
814: if ($keys[$key]['upper'] == $char && count($keys[$key]['lower'][0]) === 1) {
815: $lowerCase[] = $keys[$key]['lower'][0];
816: $matched = true;
817: break 1;
818: }
819: }
820: }
821: }
822: if ($matched === false) {
823: $lowerCase[] = $char;
824: }
825: }
826: return Multibyte::ascii($lowerCase);
827: }
828: /**
829: * Make a string uppercase
830: *
831: * @param string $string The string being uppercased.
832: * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
833: * @return string with all alphabetic characters converted to uppercase.
834: * @access public
835: * @static
836: */
837: function strtoupper($string) {
838: $_this =& Multibyte::getInstance();
839: $utf8Map = Multibyte::utf8($string);
840:
841: $length = count($utf8Map);
842: $matched = false;
843: $replaced = array();
844: $upperCase = array();
845:
846: for ($i = 0 ; $i < $length; $i++) {
847: $char = $utf8Map[$i];
848:
849: if ($char < 128) {
850: $str = strtoupper(chr($char));
851: $strlen = strlen($str);
852: for ($ii = 0 ; $ii < $strlen; $ii++) {
853: $upper = ord(substr($str, $ii, 1));
854: }
855: $upperCase[] = $upper;
856: $matched = true;
857:
858: } else {
859: $matched = false;
860: $keys = $_this->__find($char);
861: $keyCount = count($keys);
862:
863: if (!empty($keys)) {
864: foreach ($keys as $key => $value) {
865: $matched = false;
866: $replace = 0;
867: if ($length > 1 && count($keys[$key]['lower']) > 1) {
868: $j = 0;
869:
870: for ($ii = 0, $count = count($keys[$key]['lower']); $ii < $count; $ii++) {
871: $nextChar = $utf8Map[$i + $ii];
872:
873: if (isset($nextChar) && ($nextChar == $keys[$key]['lower'][$j + $ii])) {
874: $replace++;
875: }
876: }
877: if ($replace == $count) {
878: $upperCase[] = $keys[$key]['upper'];
879: $replaced = array_merge($replaced, array_values($keys[$key]['lower']));
880: $matched = true;
881: break 1;
882: }
883: } elseif ($length > 1 && $keyCount > 1) {
884: $j = 0;
885: for ($ii = 1; $ii < $keyCount; $ii++) {
886: $nextChar = $utf8Map[$i + $ii - 1];
887:
888: if (in_array($nextChar, $keys[$ii]['lower'])) {
889:
890: for ($jj = 0, $count = count($keys[$ii]['lower']); $jj < $count; $jj++) {
891: $nextChar = $utf8Map[$i + $jj];
892:
893: if (isset($nextChar) && ($nextChar == $keys[$ii]['lower'][$j + $jj])) {
894: $replace++;
895: }
896: }
897: if ($replace == $count) {
898: $upperCase[] = $keys[$ii]['upper'];
899: $replaced = array_merge($replaced, array_values($keys[$ii]['lower']));
900: $matched = true;
901: break 2;
902: }
903: }
904: }
905: }
906: if ($keys[$key]['lower'][0] == $char) {
907: $upperCase[] = $keys[$key]['upper'];
908: $matched = true;
909: break 1;
910: }
911: }
912: }
913: }
914: if ($matched === false && !in_array($char, $replaced, true)) {
915: $upperCase[] = $char;
916: }
917: }
918: return Multibyte::ascii($upperCase);
919: }
920: /**
921: * Count the number of substring occurrences
922: *
923: * @param string $haystack The string being checked.
924: * @param string $needle The string being found.
925: * @return integer The number of times the $needle substring occurs in the $haystack string.
926: * @access public
927: * @static
928: */
929: function substrCount($haystack, $needle) {
930: $count = 0;
931: $haystack = Multibyte::utf8($haystack);
932: $haystackCount = count($haystack);
933: $matches = array_count_values($haystack);
934: $needle = Multibyte::utf8($needle);
935: $needleCount = count($needle);
936:
937: if ($needleCount === 1 && isset($matches[$needle[0]])) {
938: return $matches[$needle[0]];
939: }
940:
941: for ($i = 0; $i < $haystackCount; $i++) {
942: if (isset($needle[0]) && $needle[0] === $haystack[$i]) {
943: for ($ii = 1; $ii < $needleCount; $ii++) {
944: if ($needle[$ii] === $haystack[$i + 1]) {
945: if ((isset($needle[$ii + 1]) && $haystack[$i + 2]) && $needle[$ii + 1] !== $haystack[$i + 2]) {
946: $count--;
947: } else {
948: $count++;
949: }
950: }
951: }
952: }
953: }
954: return $count;
955: }
956: /**
957: * Get part of string
958: *
959: * @param string $string The string being checked.
960: * @param integer $start The first position used in $string.
961: * @param integer $length The maximum length of the returned string.
962: * @return string The portion of $string specified by the $string and $length parameters.
963: * @access public
964: * @static
965: */
966: function substr($string, $start, $length = null) {
967: if ($start === 0 && $length === null) {
968: return $string;
969: }
970:
971: $string = Multibyte::utf8($string);
972: $stringCount = count($string);
973:
974: for ($i = 1; $i <= $start; $i++) {
975: unset($string[$i - 1]);
976: }
977:
978: if ($length === null || count($string) < $length) {
979: return Multibyte::ascii($string);
980: }
981: $string = array_values($string);
982:
983: $value = array();
984: for ($i = 0; $i < $length; $i++) {
985: $value[] = $string[$i];
986: }
987: return Multibyte::ascii($value);
988: }
989: /**
990: * Prepare a string for mail transport, using the provided encoding
991: *
992: * @param string $string value to encode
993: * @param string $charset charset to use for encoding. defaults to UTF-8
994: * @param string $newline
995: * @return string
996: * @access public
997: * @static
998: * @TODO: add support for 'Q'('Quoted Printable') encoding
999: */
1000: function mimeEncode($string, $charset = null, $newline = "\r\n") {
1001: if (!Multibyte::checkMultibyte($string) && strlen($string) < 75) {
1002: return $string;
1003: }
1004:
1005: if (empty($charset)) {
1006: $charset = Configure::read('App.encoding');
1007: }
1008: $charset = strtoupper($charset);
1009:
1010: $start = '=?' . $charset . '?B?';
1011: $end = '?=';
1012: $spacer = $end . $newline . ' ' . $start;
1013:
1014: $length = 75 - strlen($start) - strlen($end);
1015: $length = $length - ($length % 4);
1016: if ($charset == 'UTF-8') {
1017: $parts = array();
1018: $maxchars = floor(($length * 3) / 4);
1019: while (strlen($string) > $maxchars) {
1020: $i = $maxchars;
1021: $test = ord($string[$i]);
1022: while ($test >= 128 && $test <= 191) {
1023: $i--;
1024: $test = ord($string[$i]);
1025: }
1026: $parts[] = base64_encode(substr($string, 0, $i));
1027: $string = substr($string, $i);
1028: }
1029: $parts[] = base64_encode($string);
1030: $string = implode($spacer, $parts);
1031: } else {
1032: $string = chunk_split(base64_encode($string), $length, $spacer);
1033: $string = preg_replace('/' . preg_quote($spacer) . '$/', '', $string);
1034: }
1035: return $start . $string . $end;
1036: }
1037: /**
1038: * Return the Code points range for Unicode characters
1039: *
1040: * @param interger $decimal
1041: * @return string
1042: * @access private
1043: */
1044: function __codepoint ($decimal) {
1045: if ($decimal > 128 && $decimal < 256) {
1046: $return = '0080_00ff'; // Latin-1 Supplement
1047: } elseif ($decimal < 384) {
1048: $return = '0100_017f'; // Latin Extended-A
1049: } elseif ($decimal < 592) {
1050: $return = '0180_024F'; // Latin Extended-B
1051: } elseif ($decimal < 688) {
1052: $return = '0250_02af'; // IPA Extensions
1053: } elseif ($decimal >= 880 && $decimal < 1024) {
1054: $return = '0370_03ff'; // Greek and Coptic
1055: } elseif ($decimal < 1280) {
1056: $return = '0400_04ff'; // Cyrillic
1057: } elseif ($decimal < 1328) {
1058: $return = '0500_052f'; // Cyrillic Supplement
1059: } elseif ($decimal < 1424) {
1060: $return = '0530_058f'; // Armenian
1061: } elseif ($decimal >= 7680 && $decimal < 7936) {
1062: $return = '1e00_1eff'; // Latin Extended Additional
1063: } elseif ($decimal < 8192) {
1064: $return = '1f00_1fff'; // Greek Extended
1065: } elseif ($decimal >= 8448 && $decimal < 8528) {
1066: $return = '2100_214f'; // Letterlike Symbols
1067: } elseif ($decimal < 8592) {
1068: $return = '2150_218f'; // Number Forms
1069: } elseif ($decimal >= 9312 && $decimal < 9472) {
1070: $return = '2460_24ff'; // Enclosed Alphanumerics
1071: } elseif ($decimal >= 11264 && $decimal < 11360) {
1072: $return = '2c00_2c5f'; // Glagolitic
1073: } elseif ($decimal < 11392) {
1074: $return = '2c60_2c7f'; // Latin Extended-C
1075: } elseif ($decimal < 11520) {
1076: $return = '2c80_2cff'; // Coptic
1077: } elseif ($decimal >= 65280 && $decimal < 65520) {
1078: $return = 'ff00_ffef'; // Halfwidth and Fullwidth Forms
1079: } else {
1080: $return = false;
1081: }
1082: $this->__codeRange[$decimal] = $return;
1083: return $return;
1084: }
1085: /**
1086: * Find the related code folding values for $char
1087: *
1088: * @param integer $char decimal value of character
1089: * @param string $type
1090: * @return array
1091: * @access private
1092: */
1093: function __find($char, $type = 'lower') {
1094: $value = false;
1095: $found = array();
1096: if (!isset($this->__codeRange[$char])) {
1097: $range = $this->__codepoint($char);
1098: if ($range === false) {
1099: return null;
1100: }
1101: Configure::load('unicode' . DS . 'casefolding' . DS . $range);
1102: $this->__caseFold[$range] = Configure::read($range);
1103: Configure::delete($range);
1104: }
1105:
1106: if (!$this->__codeRange[$char]) {
1107: return null;
1108: }
1109: $this->__table = $this->__codeRange[$char];
1110: $count = count($this->__caseFold[$this->__table]);
1111:
1112: for ($i = 0; $i < $count; $i++) {
1113: if ($type === 'lower' && $this->__caseFold[$this->__table][$i][$type][0] === $char) {
1114: $found[] = $this->__caseFold[$this->__table][$i];
1115: } elseif ($type === 'upper' && $this->__caseFold[$this->__table][$i][$type] === $char) {
1116: $found[] = $this->__caseFold[$this->__table][$i];
1117: }
1118: }
1119: return $found;
1120: }
1121: /**
1122: * Check the $string for multibyte characters
1123: * @param string $string value to test
1124: * @return boolean
1125: * @access public
1126: * @static
1127: */
1128: function checkMultibyte($string) {
1129: $length = strlen($string);
1130:
1131: for ($i = 0; $i < $length; $i++ ) {
1132: $value = ord(($string[$i]));
1133: if ($value > 128) {
1134: return true;
1135: }
1136: }
1137: return false;
1138: }
1139: }
1140: ?>