Cake/Utility/String.php

1 <?php
2 /**
3 * String handling methods.
4 *
5 * PHP 5
6 *
7 * CakePHP(tm) : Rapid Development Framework (http://cakephp.org)
8 * Copyright 2005-2012, Cake Software Foundation, Inc. (http://cakefoundation.org)
9 *
10 * Licensed under The MIT License
11 * Redistributions of files must retain the above copyright notice.
12 *
13 * @copyright Copyright 2005-2012, Cake Software Foundation, Inc. (http://cakefoundation.org)
14 * @link http://cakephp.org CakePHP(tm) Project
15 * @package Cake.Utility
16 * @since CakePHP(tm) v 1.2.0.5551
17 * @license MIT License (http://www.opensource.org/licenses/mit-license.php)
18 */
19  
20 /**
21 * String handling methods.
22 *
23 *
24 * @package Cake.Utility
25 */
26 class String {
27  
28 /**
29 * Generate a random UUID
30 *
31 * @see http://www.ietf.org/rfc/rfc4122.txt
32 * @return RFC 4122 UUID
33 */
34 public static function uuid() {
35 $node = env('SERVER_ADDR');
36  
37 if (strpos($node, ':') !== false) {
38 if (substr_count($node, '::')) {
39 $node = str_replace(
40 '::', str_repeat(':0000', 8 - substr_count($node, ':')) . ':', $node
41 );
42 }
43 $node = explode(':', $node);
44 $ipSix = '';
45  
46 foreach ($node as $id) {
47 $ipSix .= str_pad(base_convert($id, 16, 2), 16, 0, STR_PAD_LEFT);
48 }
49 $node = base_convert($ipSix, 2, 10);
50  
51 if (strlen($node) < 38) {
52 $node = null;
53 } else {
54 $node = crc32($node);
55 }
56 } elseif (empty($node)) {
57 $host = env('HOSTNAME');
58  
59 if (empty($host)) {
60 $host = env('HOST');
61 }
62  
63 if (!empty($host)) {
64 $ip = gethostbyname($host);
65  
66 if ($ip === $host) {
67 $node = crc32($host);
68 } else {
69 $node = ip2long($ip);
70 }
71 }
72 } elseif ($node !== '127.0.0.1') {
73 $node = ip2long($node);
74 } else {
75 $node = null;
76 }
77  
78 if (empty($node)) {
79 $node = crc32(Configure::read('Security.salt'));
80 }
81  
82 if (function_exists('hphp_get_thread_id')) {
83 $pid = hphp_get_thread_id();
84 } elseif (function_exists('zend_thread_id')) {
85 $pid = zend_thread_id();
86 } else {
87 $pid = getmypid();
88 }
89  
90 if (!$pid || $pid > 65535) {
91 $pid = mt_rand(0, 0xfff) | 0x4000;
92 }
93  
94 list($timeMid, $timeLow) = explode(' ', microtime());
95 $uuid = sprintf(
96 "%08x-%04x-%04x-%02x%02x-%04x%08x", (int)$timeLow, (int)substr($timeMid, 2) & 0xffff,
97 mt_rand(0, 0xfff) | 0x4000, mt_rand(0, 0x3f) | 0x80, mt_rand(0, 0xff), $pid, $node
98 );
99  
100 return $uuid;
101 }
102  
103 /**
104 * Tokenizes a string using $separator, ignoring any instance of $separator that appears between
105 * $leftBound and $rightBound
106 *
107 * @param string $data The data to tokenize
108 * @param string $separator The token to split the data on.
109 * @param string $leftBound The left boundary to ignore separators in.
110 * @param string $rightBound The right boundary to ignore separators in.
111 * @return array Array of tokens in $data.
112 */
113 public static function tokenize($data, $separator = ',', $leftBound = '(', $rightBound = ')') {
114 if (empty($data) || is_array($data)) {
115 return $data;
116 }
117  
118 $depth = 0;
119 $offset = 0;
120 $buffer = '';
121 $results = array();
122 $length = strlen($data);
123 $open = false;
124  
125 while ($offset <= $length) {
126 $tmpOffset = -1;
127 $offsets = array(
128 strpos($data, $separator, $offset),
129 strpos($data, $leftBound, $offset),
130 strpos($data, $rightBound, $offset)
131 );
132 for ($i = 0; $i < 3; $i++) {
133 if ($offsets[$i] !== false && ($offsets[$i] < $tmpOffset || $tmpOffset == -1)) {
134 $tmpOffset = $offsets[$i];
135 }
136 }
137 if ($tmpOffset !== -1) {
138 $buffer .= substr($data, $offset, ($tmpOffset - $offset));
139 if ($data{$tmpOffset} == $separator && $depth == 0) {
140 $results[] = $buffer;
141 $buffer = '';
142 } else {
143 $buffer .= $data{$tmpOffset};
144 }
145 if ($leftBound != $rightBound) {
146 if ($data{$tmpOffset} == $leftBound) {
147 $depth++;
148 }
149 if ($data{$tmpOffset} == $rightBound) {
150 $depth--;
151 }
152 } else {
153 if ($data{$tmpOffset} == $leftBound) {
154 if (!$open) {
155 $depth++;
156 $open = true;
157 } else {
158 $depth--;
159 $open = false;
160 }
161 }
162 }
163 $offset = ++$tmpOffset;
164 } else {
165 $results[] = $buffer . substr($data, $offset);
166 $offset = $length + 1;
167 }
168 }
169 if (empty($results) && !empty($buffer)) {
170 $results[] = $buffer;
171 }
172  
173 if (!empty($results)) {
174 $data = array_map('trim', $results);
175 } else {
176 $data = array();
177 }
178 return $data;
179 }
180  
181 /**
182 * Replaces variable placeholders inside a $str with any given $data. Each key in the $data array
183 * corresponds to a variable placeholder name in $str.
184 * Example: `String::insert(':name is :age years old.', array('name' => 'Bob', '65'));`
185 * Returns: Bob is 65 years old.
186 *
187 * Available $options are:
188 *
189 * - before: The character or string in front of the name of the variable placeholder (Defaults to `:`)
190 * - after: The character or string after the name of the variable placeholder (Defaults to null)
191 * - escape: The character or string used to escape the before character / string (Defaults to `\`)
192 * - format: A regex to use for matching variable placeholders. Default is: `/(?<!\\)\:%s/`
193 * (Overwrites before, after, breaks escape / clean)
194 * - clean: A boolean or array with instructions for String::cleanInsert
195 *
196 * @param string $str A string containing variable placeholders
197 * @param string $data A key => val array where each key stands for a placeholder variable name
198 * to be replaced with val
199 * @param string $options An array of options, see description above
200 * @return string
201 */
202 public static function insert($str, $data, $options = array()) {
203 $defaults = array(
204 'before' => ':', 'after' => null, 'escape' => '\\', 'format' => null, 'clean' => false
205 );
206 $options += $defaults;
207 $format = $options['format'];
208 $data = (array)$data;
209 if (empty($data)) {
210 return ($options['clean']) ? String::cleanInsert($str, $options) : $str;
211 }
212  
213 if (!isset($format)) {
214 $format = sprintf(
215 '/(?<!%s)%s%%s%s/',
216 preg_quote($options['escape'], '/'),
217 str_replace('%', '%%', preg_quote($options['before'], '/')),
218 str_replace('%', '%%', preg_quote($options['after'], '/'))
219 );
220 }
221  
222 if (strpos($str, '?') !== false && is_numeric(key($data))) {
223 $offset = 0;
224 while (($pos = strpos($str, '?', $offset)) !== false) {
225 $val = array_shift($data);
226 $offset = $pos + strlen($val);
227 $str = substr_replace($str, $val, $pos, 1);
228 }
229 return ($options['clean']) ? String::cleanInsert($str, $options) : $str;
230 } else {
231 asort($data);
232  
233 $hashKeys = array();
234 foreach ($data as $key => $value) {
235 $hashKeys[] = crc32($key);
236 }
237  
238 $tempData = array_combine(array_keys($data), array_values($hashKeys));
239 krsort($tempData);
240 foreach ($tempData as $key => $hashVal) {
241 $key = sprintf($format, preg_quote($key, '/'));
242 $str = preg_replace($key, $hashVal, $str);
243 }
244 $dataReplacements = array_combine($hashKeys, array_values($data));
245 foreach ($dataReplacements as $tmpHash => $tmpValue) {
246 $tmpValue = (is_array($tmpValue)) ? '' : $tmpValue;
247 $str = str_replace($tmpHash, $tmpValue, $str);
248 }
249 }
250  
251 if (!isset($options['format']) && isset($options['before'])) {
252 $str = str_replace($options['escape'] . $options['before'], $options['before'], $str);
253 }
254 return ($options['clean']) ? String::cleanInsert($str, $options) : $str;
255 }
256  
257 /**
258 * Cleans up a String::insert() formatted string with given $options depending on the 'clean' key in
259 * $options. The default method used is text but html is also available. The goal of this function
260 * is to replace all whitespace and unneeded markup around placeholders that did not get replaced
261 * by String::insert().
262 *
263 * @param string $str
264 * @param string $options
265 * @return string
266 * @see String::insert()
267 */
268 public static function cleanInsert($str, $options) {
269 $clean = $options['clean'];
270 if (!$clean) {
271 return $str;
272 }
273 if ($clean === true) {
274 $clean = array('method' => 'text');
275 }
276 if (!is_array($clean)) {
277 $clean = array('method' => $options['clean']);
278 }
279 switch ($clean['method']) {
280 case 'html':
281 $clean = array_merge(array(
282 'word' => '[\w,.]+',
283 'andText' => true,
284 'replacement' => '',
285 ), $clean);
286 $kleenex = sprintf(
287 '/[\s]*[a-z]+=(")(%s%s%s[\s]*)+\\1/i',
288 preg_quote($options['before'], '/'),
289 $clean['word'],
290 preg_quote($options['after'], '/')
291 );
292 $str = preg_replace($kleenex, $clean['replacement'], $str);
293 if ($clean['andText']) {
294 $options['clean'] = array('method' => 'text');
295 $str = String::cleanInsert($str, $options);
296 }
297 break;
298 case 'text':
299 $clean = array_merge(array(
300 'word' => '[\w,.]+',
301 'gap' => '[\s]*(?:(?:and|or)[\s]*)?',
302 'replacement' => '',
303 ), $clean);
304  
305 $kleenex = sprintf(
306 '/(%s%s%s%s|%s%s%s%s)/',
307 preg_quote($options['before'], '/'),
308 $clean['word'],
309 preg_quote($options['after'], '/'),
310 $clean['gap'],
311 $clean['gap'],
312 preg_quote($options['before'], '/'),
313 $clean['word'],
314 preg_quote($options['after'], '/')
315 );
316 $str = preg_replace($kleenex, $clean['replacement'], $str);
317 break;
318 }
319 return $str;
320 }
321  
322 /**
323 * Wraps text to a specific width, can optionally wrap at word breaks.
324 *
325 * ### Options
326 *
327 * - `width` The width to wrap to. Defaults to 72
328 * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true.
329 * - `indent` String to indent with. Defaults to null.
330 * - `indentAt` 0 based index to start indenting at. Defaults to 0.
331 *
332 * @param string $text Text the text to format.
333 * @param mixed $options Array of options to use, or an integer to wrap the text to.
334 * @return string Formatted text.
335 */
336 public static function wrap($text, $options = array()) {
337 if (is_numeric($options)) {
338 $options = array('width' => $options);
339 }
340 $options += array('width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0);
341 if ($options['wordWrap']) {
342 $wrapped = wordwrap($text, $options['width'], "\n");
343 } else {
344 $wrapped = trim(chunk_split($text, $options['width'] - 1, "\n"));
345 }
346 if (!empty($options['indent'])) {
347 $chunks = explode("\n", $wrapped);
348 for ($i = $options['indentAt'], $len = count($chunks); $i < $len; $i++) {
349 $chunks[$i] = $options['indent'] . $chunks[$i];
350 }
351 $wrapped = implode("\n", $chunks);
352 }
353 return $wrapped;
354 }
355  
356 /**
357 * Highlights a given phrase in a text. You can specify any expression in highlighter that
358 * may include the \1 expression to include the $phrase found.
359 *
360 * ### Options:
361 *
362 * - `format` The piece of html with that the phrase will be highlighted
363 * - `html` If true, will ignore any HTML tags, ensuring that only the correct text is highlighted
364 *
365 * @param string $text Text to search the phrase in
366 * @param string $phrase The phrase that will be searched
367 * @param array $options An array of html attributes and options.
368 * @return string The highlighted text
369 * @link http://book.cakephp.org/2.0/en/core-libraries/helpers/text.html#TextHelper::highlight
370 */
371 public static function highlight($text, $phrase, $options = array()) {
372 if (empty($phrase)) {
373 return $text;
374 }
375  
376 $default = array(
377 'format' => '<span class="highlight">\1</span>',
378 'html' => false
379 );
380 $options = array_merge($default, $options);
381 extract($options);
382  
383 if (is_array($phrase)) {
384 $replace = array();
385 $with = array();
386  
387 foreach ($phrase as $key => $segment) {
388 $segment = '(' . preg_quote($segment, '|') . ')';
389 if ($html) {
390 $segment = "(?![^<]+>)$segment(?![^<]+>)";
391 }
392  
393 $with[] = (is_array($format)) ? $format[$key] : $format;
394 $replace[] = "|$segment|iu";
395 }
396  
397 return preg_replace($replace, $with, $text);
398 } else {
399 $phrase = '(' . preg_quote($phrase, '|') . ')';
400 if ($html) {
401 $phrase = "(?![^<]+>)$phrase(?![^<]+>)";
402 }
403  
404 return preg_replace("|$phrase|iu", $format, $text);
405 }
406 }
407  
408 /**
409 * Strips given text of all links (<a href=....)
410 *
411 * @param string $text Text
412 * @return string The text without links
413 * @link http://book.cakephp.org/2.0/en/core-libraries/helpers/text.html#TextHelper::stripLinks
414 */
415 public static function stripLinks($text) {
416 return preg_replace('|<a\s+[^>]+>|im', '', preg_replace('|<\/a>|im', '', $text));
417 }
418  
419 /**
420 * Truncates text.
421 *
422 * Cuts a string to the length of $length and replaces the last characters
423 * with the ending if the text is longer than length.
424 *
425 * ### Options:
426 *
427 * - `ending` Will be used as Ending and appended to the trimmed string
428 * - `exact` If false, $text will not be cut mid-word
429 * - `html` If true, HTML tags would be handled correctly
430 *
431 * @param string $text String to truncate.
432 * @param integer $length Length of returned string, including ellipsis.
433 * @param array $options An array of html attributes and options.
434 * @return string Trimmed string.
435 * @link http://book.cakephp.org/2.0/en/core-libraries/helpers/text.html#TextHelper::truncate
436 */
437 public static function truncate($text, $length = 100, $options = array()) {
438 $default = array(
439 'ending' => '...', 'exact' => true, 'html' => false
440 );
441 $options = array_merge($default, $options);
442 extract($options);
443  
444 if (!function_exists('mb_strlen')) {
445 class_exists('Multibyte');
446 }
447  
448 if ($html) {
449 if (mb_strlen(preg_replace('/<.*?>/', '', $text)) <= $length) {
450 return $text;
451 }
452 $totalLength = mb_strlen(strip_tags($ending));
453 $openTags = array();
454 $truncate = '';
455  
456 preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $text, $tags, PREG_SET_ORDER);
457 foreach ($tags as $tag) {
458 if (!preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/s', $tag[2])) {
459 if (preg_match('/<[\w]+[^>]*>/s', $tag[0])) {
460 array_unshift($openTags, $tag[2]);
461 } elseif (preg_match('/<\/([\w]+)[^>]*>/s', $tag[0], $closeTag)) {
462 $pos = array_search($closeTag[1], $openTags);
463 if ($pos !== false) {
464 array_splice($openTags, $pos, 1);
465 }
466 }
467 }
468 $truncate .= $tag[1];
469  
470 $contentLength = mb_strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', ' ', $tag[3]));
471 if ($contentLength + $totalLength > $length) {
472 $left = $length - $totalLength;
473 $entitiesLength = 0;
474 if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', $tag[3], $entities, PREG_OFFSET_CAPTURE)) {
475 foreach ($entities[0] as $entity) {
476 if ($entity[1] + 1 - $entitiesLength <= $left) {
477 $left--;
478 $entitiesLength += mb_strlen($entity[0]);
479 } else {
480 break;
481 }
482 }
483 }
484  
485 $truncate .= mb_substr($tag[3], 0 , $left + $entitiesLength);
486 break;
487 } else {
488 $truncate .= $tag[3];
489 $totalLength += $contentLength;
490 }
491 if ($totalLength >= $length) {
492 break;
493 }
494 }
495 } else {
496 if (mb_strlen($text) <= $length) {
497 return $text;
498 } else {
499 $truncate = mb_substr($text, 0, $length - mb_strlen($ending));
500 }
501 }
502 if (!$exact) {
503 $spacepos = mb_strrpos($truncate, ' ');
504 if ($html) {
505 $truncateCheck = mb_substr($truncate, 0, $spacepos);
506 $lastOpenTag = mb_strrpos($truncateCheck, '<');
507 $lastCloseTag = mb_strrpos($truncateCheck, '>');
508 if ($lastOpenTag > $lastCloseTag) {
509 preg_match_all('/<[\w]+[^>]*>/s', $truncate, $lastTagMatches);
510 $lastTag = array_pop($lastTagMatches[0]);
511 $spacepos = mb_strrpos($truncate, $lastTag) + mb_strlen($lastTag);
512 }
513 $bits = mb_substr($truncate, $spacepos);
514 preg_match_all('/<\/([a-z]+)>/', $bits, $droppedTags, PREG_SET_ORDER);
515 if (!empty($droppedTags)) {
516 if (!empty($openTags)) {
517 foreach ($droppedTags as $closingTag) {
518 if (!in_array($closingTag[1], $openTags)) {
519 array_unshift($openTags, $closingTag[1]);
520 }
521 }
522 } else {
523 foreach ($droppedTags as $closingTag) {
524 array_push($openTags, $closingTag[1]);
525 }
526 }
527 }
528 }
529 $truncate = mb_substr($truncate, 0, $spacepos);
530 }
531 $truncate .= $ending;
532  
533 if ($html) {
534 foreach ($openTags as $tag) {
535 $truncate .= '</' . $tag . '>';
536 }
537 }
538  
539 return $truncate;
540 }
541  
542 /**
543 * Extracts an excerpt from the text surrounding the phrase with a number of characters on each side
544 * determined by radius.
545 *
546 * @param string $text String to search the phrase in
547 * @param string $phrase Phrase that will be searched for
548 * @param integer $radius The amount of characters that will be returned on each side of the founded phrase
549 * @param string $ending Ending that will be appended
550 * @return string Modified string
551 * @link http://book.cakephp.org/2.0/en/core-libraries/helpers/text.html#TextHelper::excerpt
552 */
553 public static function excerpt($text, $phrase, $radius = 100, $ending = '...') {
554 if (empty($text) || empty($phrase)) {
555 return self::truncate($text, $radius * 2, array('ending' => $ending));
556 }
557  
558 $append = $prepend = $ending;
559  
560 $phraseLen = mb_strlen($phrase);
561 $textLen = mb_strlen($text);
562  
563 $pos = mb_strpos(mb_strtolower($text), mb_strtolower($phrase));
564 if ($pos === false) {
565 return mb_substr($text, 0, $radius) . $ending;
566 }
567  
568 $startPos = $pos - $radius;
569 if ($startPos <= 0) {
570 $startPos = 0;
571 $prepend = '';
572 }
573  
574 $endPos = $pos + $phraseLen + $radius;
575 if ($endPos >= $textLen) {
576 $endPos = $textLen;
577 $append = '';
578 }
579  
580 $excerpt = mb_substr($text, $startPos, $endPos - $startPos);
581 $excerpt = $prepend . $excerpt . $append;
582  
583 return $excerpt;
584 }
585  
586 /**
587 * Creates a comma separated list where the last two items are joined with 'and', forming natural English
588 *
589 * @param array $list The list to be joined
590 * @param string $and The word used to join the last and second last items together with. Defaults to 'and'
591 * @param string $separator The separator used to join all the other items together. Defaults to ', '
592 * @return string The glued together string.
593 * @link http://book.cakephp.org/2.0/en/core-libraries/helpers/text.html#TextHelper::toList
594 */
595 public static function toList($list, $and = 'and', $separator = ', ') {
596 if (count($list) > 1) {
597 return implode($separator, array_slice($list, null, -1)) . ' ' . $and . ' ' . array_pop($list);
598 } else {
599 return array_pop($list);
600 }
601 }
602  
603 }
604  
605