cake/libs/inflector.php

1 <?php
2 /* SVN FILE: $Id$ */
3 /**
4 * Pluralize and singularize English words.
5 *
6 * Used by Cake's naming conventions throughout the framework.
7 *
8 * PHP versions 4 and 5
9 *
10 * CakePHP(tm) : Rapid Development Framework (http://www.cakephp.org)
11 * Copyright 2005-2008, Cake Software Foundation, Inc. (http://www.cakefoundation.org)
12 *
13 * Licensed under The MIT License
14 * Redistributions of files must retain the above copyright notice.
15 *
16 * @filesource
17 * @copyright Copyright 2005-2008, Cake Software Foundation, Inc. (http://www.cakefoundation.org)
18 * @link http://www.cakefoundation.org/projects/info/cakephp CakePHP(tm) Project
19 * @package cake
20 * @subpackage cake.cake.libs
21 * @since CakePHP(tm) v 0.2.9
22 * @version $Revision$
23 * @modifiedby $LastChangedBy$
24 * @lastmodified $Date$
25 * @license http://www.opensource.org/licenses/mit-license.php The MIT License
26 */
27 /**
28 * Included libraries.
29 *
30 */
31 if (!class_exists('Object')) {
32 uses('object');
33 }
34 if (!class_exists('Set')) {
35 require LIBS . 'set.php';
36 }
37 /**
38 * Pluralize and singularize English words.
39 *
40 * Inflector pluralizes and singularizes English nouns.
41 * Used by Cake's naming conventions throughout the framework.
42 * Test with $i = new Inflector(); $i->test();
43 *
44 * @package cake
45 * @subpackage cake.cake.libs
46 * @link http://book.cakephp.org/view/491/Inflector
47 */
48 class Inflector extends Object {
49 /**
50 * Pluralized words.
51 *
52 * @var array
53 * @access private
54 **/
55 var $pluralized = array();
56 /**
57 * List of pluralization rules in the form of pattern => replacement.
58 *
59 * @var array
60 * @access public
61 * @link http://book.cakephp.org/view/47/Custom-Inflections
62 **/
63 var $pluralRules = array();
64 /**
65 * Singularized words.
66 *
67 * @var array
68 * @access private
69 **/
70 var $singularized = array();
71 /**
72 * List of singularization rules in the form of pattern => replacement.
73 *
74 * @var array
75 * @access public
76 * @link http://book.cakephp.org/view/47/Custom-Inflections
77 **/
78 var $singularRules = array();
79 /**
80 * Plural rules from inflections.php
81 *
82 * @var array
83 * @access private
84 **/
85 var $__pluralRules = array();
86 /**
87 * Un-inflected plural rules from inflections.php
88 *
89 * @var array
90 * @access private
91 **/
92 var $__uninflectedPlural = array();
93 /**
94 * Irregular plural rules from inflections.php
95 *
96 * @var array
97 * @access private
98 **/
99 var $__irregularPlural = array();
100 /**
101 * Singular rules from inflections.php
102 *
103 * @var array
104 * @access private
105 **/
106 var $__singularRules = array();
107 /**
108 * Un-inflectd singular rules from inflections.php
109 *
110 * @var array
111 * @access private
112 **/
113 var $__uninflectedSingular = array();
114 /**
115 * Irregular singular rules from inflections.php
116 *
117 * @var array
118 * @access private
119 **/
120 var $__irregularSingular = array();
121 /**
122 * Gets a reference to the Inflector object instance
123 *
124 * @return object
125 * @access public
126 */
127 function &getInstance() {
128 static $instance = array();
129  
130 if (!$instance) {
131 $instance[0] =& new Inflector();
132 if (file_exists(CONFIGS.'inflections.php')) {
133 include(CONFIGS.'inflections.php');
134 $instance[0]->__pluralRules = $pluralRules;
135 $instance[0]->__uninflectedPlural = $uninflectedPlural;
136 $instance[0]->__irregularPlural = $irregularPlural;
137 $instance[0]->__singularRules = $singularRules;
138 $instance[0]->__uninflectedSingular = $uninflectedPlural;
139 $instance[0]->__irregularSingular = array_flip($irregularPlural);
140 }
141 }
142 return $instance[0];
143 }
144 /**
145 * Initializes plural inflection rules.
146 *
147 * @return void
148 * @access private
149 */
150 function __initPluralRules() {
151 $corePluralRules = array(
152 '/(s)tatus$/i' => '\1\2tatuses',
153 '/(quiz)$/i' => '\1zes',
154 '/^(ox)$/i' => '\1\2en',
155 '/([m|l])ouse$/i' => '\1ice',
156 '/(matr|vert|ind)(ix|ex)$/i' => '\1ices',
157 '/(x|ch|ss|sh)$/i' => '\1es',
158 '/([^aeiouy]|qu)y$/i' => '\1ies',
159 '/(hive)$/i' => '\1s',
160 '/(?:([^f])fe|([lr])f)$/i' => '\1\2ves',
161 '/sis$/i' => 'ses',
162 '/([ti])um$/i' => '\1a',
163 '/(p)erson$/i' => '\1eople',
164 '/(m)an$/i' => '\1en',
165 '/(c)hild$/i' => '\1hildren',
166 '/(buffal|tomat)o$/i' => '\1\2oes',
167 '/(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|vir)us$/i' => '\1i',
168 '/us$/' => 'uses',
169 '/(alias)$/i' => '\1es',
170 '/(ax|cris|test)is$/i' => '\1es',
171 '/s$/' => 's',
172 '/^$/' => '',
173 '/$/' => 's');
174  
175 $coreUninflectedPlural = array(
176 '.*[nrlm]ese', '.*deer', '.*fish', '.*measles', '.*ois', '.*pox', '.*sheep', 'Amoyese',
177 'bison', 'Borghese', 'bream', 'breeches', 'britches', 'buffalo', 'cantus', 'carp', 'chassis', 'clippers',
178 'cod', 'coitus', 'Congoese', 'contretemps', 'corps', 'debris', 'diabetes', 'djinn', 'eland', 'elk',
179 'equipment', 'Faroese', 'flounder', 'Foochowese', 'gallows', 'Genevese', 'Genoese', 'Gilbertese', 'graffiti',
180 'headquarters', 'herpes', 'hijinks', 'Hottentotese', 'information', 'innings', 'jackanapes', 'Kiplingese',
181 'Kongoese', 'Lucchese', 'mackerel', 'Maltese', 'media', 'mews', 'moose', 'mumps', 'Nankingese', 'news',
182 'nexus', 'Niasese', 'Pekingese', 'People', 'Piedmontese', 'pincers', 'Pistoiese', 'pliers', 'Portuguese', 'proceedings',
183 'rabies', 'rice', 'rhinoceros', 'salmon', 'Sarawakese', 'scissors', 'sea[- ]bass', 'series', 'Shavese', 'shears',
184 'siemens', 'species', 'swine', 'testes', 'trousers', 'trout', 'tuna', 'Vermontese', 'Wenchowese',
185 'whiting', 'wildebeest', 'Yengeese');
186  
187 $coreIrregularPlural = array(
188 'atlas' => 'atlases',
189 'beef' => 'beefs',
190 'brother' => 'brothers',
191 'child' => 'children',
192 'corpus' => 'corpuses',
193 'cow' => 'cows',
194 'ganglion' => 'ganglions',
195 'genie' => 'genies',
196 'genus' => 'genera',
197 'graffito' => 'graffiti',
198 'hoof' => 'hoofs',
199 'loaf' => 'loaves',
200 'man' => 'men',
201 'money' => 'monies',
202 'mongoose' => 'mongooses',
203 'move' => 'moves',
204 'mythos' => 'mythoi',
205 'numen' => 'numina',
206 'occiput' => 'occiputs',
207 'octopus' => 'octopuses',
208 'opus' => 'opuses',
209 'ox' => 'oxen',
210 'penis' => 'penises',
211 'person' => 'people',
212 'sex' => 'sexes',
213 'soliloquy' => 'soliloquies',
214 'testis' => 'testes',
215 'trilby' => 'trilbys',
216 'turf' => 'turfs');
217  
218 $pluralRules = Set::pushDiff($this->__pluralRules, $corePluralRules);
219 $uninflected = Set::pushDiff($this->__uninflectedPlural, $coreUninflectedPlural);
220 $irregular = Set::pushDiff($this->__irregularPlural, $coreIrregularPlural);
221  
222 $this->pluralRules = array('pluralRules' => $pluralRules, 'uninflected' => $uninflected, 'irregular' => $irregular);
223 $this->pluralized = array();
224 }
225 /**
226 * Return $word in plural form.
227 *
228 * @param string $word Word in singular
229 * @return string Word in plural
230 * @access public
231 * @static
232 * @link http://book.cakephp.org/view/572/Class-methods
233 */
234 function pluralize($word) {
235 $_this =& Inflector::getInstance();
236 if (!isset($_this->pluralRules) || empty($_this->pluralRules)) {
237 $_this->__initPluralRules();
238 }
239  
240 if (isset($_this->pluralized[$word])) {
241 return $_this->pluralized[$word];
242 }
243 extract($_this->pluralRules);
244  
245 if (!isset($regexUninflected) || !isset($regexIrregular)) {
246 $regexUninflected = __enclose(join( '|', $uninflected));
247 $regexIrregular = __enclose(join( '|', array_keys($irregular)));
248 $_this->pluralRules['regexUninflected'] = $regexUninflected;
249 $_this->pluralRules['regexIrregular'] = $regexIrregular;
250 }
251  
252 if (preg_match('/^(' . $regexUninflected . ')$/i', $word, $regs)) {
253 $_this->pluralized[$word] = $word;
254 return $word;
255 }
256  
257 if (preg_match('/(.*)\\b(' . $regexIrregular . ')$/i', $word, $regs)) {
258 $_this->pluralized[$word] = $regs[1] . substr($word, 0, 1) . substr($irregular[strtolower($regs[2])], 1);
259 return $_this->pluralized[$word];
260 }
261  
262 foreach ($pluralRules as $rule => $replacement) {
263 if (preg_match($rule, $word)) {
264 $_this->pluralized[$word] = preg_replace($rule, $replacement, $word);
265 return $_this->pluralized[$word];
266 }
267 }
268 }
269 /**
270 * Initializes singular inflection rules.
271 *
272 * @return void
273 * @access protected
274 */
275 function __initSingularRules() {
276 $coreSingularRules = array(
277 '/(s)tatuses$/i' => '\1\2tatus',
278 '/^(.*)(menu)s$/i' => '\1\2',
279 '/(quiz)zes$/i' => '\\1',
280 '/(matr)ices$/i' => '\1ix',
281 '/(vert|ind)ices$/i' => '\1ex',
282 '/^(ox)en/i' => '\1',
283 '/(alias)(es)*$/i' => '\1',
284 '/(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|viri?)i$/i' => '\1us',
285 '/([ftw]ax)es/' => '\1',
286 '/(cris|ax|test)es$/i' => '\1is',
287 '/(shoe)s$/i' => '\1',
288 '/(o)es$/i' => '\1',
289 '/ouses$/' => 'ouse',
290 '/uses$/' => 'us',
291 '/([m|l])ice$/i' => '\1ouse',
292 '/(x|ch|ss|sh)es$/i' => '\1',
293 '/(m)ovies$/i' => '\1\2ovie',
294 '/(s)eries$/i' => '\1\2eries',
295 '/([^aeiouy]|qu)ies$/i' => '\1y',
296 '/([lr])ves$/i' => '\1f',
297 '/(tive)s$/i' => '\1',
298 '/(hive)s$/i' => '\1',
299 '/(drive)s$/i' => '\1',
300 '/([^fo])ves$/i' => '\1fe',
301 '/(^analy)ses$/i' => '\1sis',
302 '/((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$/i' => '\1\2sis',
303 '/([ti])a$/i' => '\1um',
304 '/(p)eople$/i' => '\1\2erson',
305 '/(m)en$/i' => '\1an',
306 '/(c)hildren$/i' => '\1\2hild',
307 '/(n)ews$/i' => '\1\2ews',
308 '/^(.*us)$/' => '\\1',
309 '/s$/i' => '');
310  
311 $coreUninflectedSingular = array(
312 '.*[nrlm]ese', '.*deer', '.*fish', '.*measles', '.*ois', '.*pox', '.*sheep', '.*ss', 'Amoyese',
313 'bison', 'Borghese', 'bream', 'breeches', 'britches', 'buffalo', 'cantus', 'carp', 'chassis', 'clippers',
314 'cod', 'coitus', 'Congoese', 'contretemps', 'corps', 'debris', 'diabetes', 'djinn', 'eland', 'elk',
315 'equipment', 'Faroese', 'flounder', 'Foochowese', 'gallows', 'Genevese', 'Genoese', 'Gilbertese', 'graffiti',
316 'headquarters', 'herpes', 'hijinks', 'Hottentotese', 'information', 'innings', 'jackanapes', 'Kiplingese',
317 'Kongoese', 'Lucchese', 'mackerel', 'Maltese', 'media', 'mews', 'moose', 'mumps', 'Nankingese', 'news',
318 'nexus', 'Niasese', 'Pekingese', 'Piedmontese', 'pincers', 'Pistoiese', 'pliers', 'Portuguese', 'proceedings',
319 'rabies', 'rice', 'rhinoceros', 'salmon', 'Sarawakese', 'scissors', 'sea[- ]bass', 'series', 'Shavese', 'shears',
320 'siemens', 'species', 'swine', 'testes', 'trousers', 'trout', 'tuna', 'Vermontese', 'Wenchowese',
321 'whiting', 'wildebeest', 'Yengeese'
322 );
323  
324 $coreIrregularSingular = array(
325 'atlases' => 'atlas',
326 'beefs' => 'beef',
327 'brothers' => 'brother',
328 'children' => 'child',
329 'corpuses' => 'corpus',
330 'cows' => 'cow',
331 'ganglions' => 'ganglion',
332 'genies' => 'genie',
333 'genera' => 'genus',
334 'graffiti' => 'graffito',
335 'hoofs' => 'hoof',
336 'loaves' => 'loaf',
337 'men' => 'man',
338 'monies' => 'money',
339 'mongooses' => 'mongoose',
340 'moves' => 'move',
341 'mythoi' => 'mythos',
342 'numina' => 'numen',
343 'occiputs' => 'occiput',
344 'octopuses' => 'octopus',
345 'opuses' => 'opus',
346 'oxen' => 'ox',
347 'penises' => 'penis',
348 'people' => 'person',
349 'sexes' => 'sex',
350 'soliloquies' => 'soliloquy',
351 'testes' => 'testis',
352 'trilbys' => 'trilby',
353 'turfs' => 'turf',
354 'waves' => 'wave'
355 );
356  
357 $singularRules = Set::pushDiff($this->__singularRules, $coreSingularRules);
358 $uninflected = Set::pushDiff($this->__uninflectedSingular, $coreUninflectedSingular);
359 $irregular = Set::pushDiff($this->__irregularSingular, $coreIrregularSingular);
360  
361 $this->singularRules = array('singularRules' => $singularRules, 'uninflected' => $uninflected, 'irregular' => $irregular);
362 $this->singularized = array();
363 }
364 /**
365 * Return $word in singular form.
366 *
367 * @param string $word Word in plural
368 * @return string Word in singular
369 * @access public
370 * @static
371 * @link http://book.cakephp.org/view/572/Class-methods
372 */
373 function singularize($word) {
374 $_this =& Inflector::getInstance();
375 if (!isset($_this->singularRules) || empty($_this->singularRules)) {
376 $_this->__initSingularRules();
377 }
378  
379 if (isset($_this->singularized[$word])) {
380 return $_this->singularized[$word];
381 }
382 extract($_this->singularRules);
383  
384 if (!isset($regexUninflected) || !isset($regexIrregular)) {
385 $regexUninflected = __enclose(join( '|', $uninflected));
386 $regexIrregular = __enclose(join( '|', array_keys($irregular)));
387 $_this->singularRules['regexUninflected'] = $regexUninflected;
388 $_this->singularRules['regexIrregular'] = $regexIrregular;
389 }
390  
391 if (preg_match('/^(' . $regexUninflected . ')$/i', $word, $regs)) {
392 $_this->singularized[$word] = $word;
393 return $word;
394 }
395  
396 if (preg_match('/(.*)\\b(' . $regexIrregular . ')$/i', $word, $regs)) {
397 $_this->singularized[$word] = $regs[1] . substr($word, 0, 1) . substr($irregular[strtolower($regs[2])], 1);
398 return $_this->singularized[$word];
399 }
400  
401 foreach ($singularRules as $rule => $replacement) {
402 if (preg_match($rule, $word)) {
403 $_this->singularized[$word] = preg_replace($rule, $replacement, $word);
404 return $_this->singularized[$word];
405 }
406 }
407 $_this->singularized[$word] = $word;
408 return $word;
409 }
410 /**
411 * Returns the given lower_case_and_underscored_word as a CamelCased word.
412 *
413 * @param string $lower_case_and_underscored_word Word to camelize
414 * @return string Camelized word. LikeThis.
415 * @access public
416 * @static
417 * @link http://book.cakephp.org/view/572/Class-methods
418 */
419 function camelize($lowerCaseAndUnderscoredWord) {
420 return str_replace(" ", "", ucwords(str_replace("_", " ", $lowerCaseAndUnderscoredWord)));
421 }
422 /**
423 * Returns the given camelCasedWord as an underscored_word.
424 *
425 * @param string $camelCasedWord Camel-cased word to be "underscorized"
426 * @return string Underscore-syntaxed version of the $camelCasedWord
427 * @access public
428 * @static
429 * @link http://book.cakephp.org/view/572/Class-methods
430 */
431 function underscore($camelCasedWord) {
432 return strtolower(preg_replace('/(?<=\\w)([A-Z])/', '_\\1', $camelCasedWord));
433 }
434 /**
435 * Returns the given underscored_word_group as a Human Readable Word Group.
436 * (Underscores are replaced by spaces and capitalized following words.)
437 *
438 * @param string $lower_case_and_underscored_word String to be made more readable
439 * @return string Human-readable string
440 * @access public
441 * @static
442 * @link http://book.cakephp.org/view/572/Class-methods
443 */
444 function humanize($lowerCaseAndUnderscoredWord) {
445 return ucwords(str_replace("_", " ", $lowerCaseAndUnderscoredWord));
446 }
447 /**
448 * Returns corresponding table name for given model $className. ("people" for the model class "Person").
449 *
450 * @param string $className Name of class to get database table name for
451 * @return string Name of the database table for given class
452 * @access public
453 * @static
454 * @link http://book.cakephp.org/view/572/Class-methods
455 */
456 function tableize($className) {
457 return Inflector::pluralize(Inflector::underscore($className));
458 }
459 /**
460 * Returns Cake model class name ("Person" for the database table "people".) for given database table.
461 *
462 * @param string $tableName Name of database table to get class name for
463 * @return string Class name
464 * @access public
465 * @static
466 * @link http://book.cakephp.org/view/572/Class-methods
467 */
468 function classify($tableName) {
469 return Inflector::camelize(Inflector::singularize($tableName));
470 }
471 /**
472 * Returns camelBacked version of an underscored string.
473 *
474 * @param string $string
475 * @return string in variable form
476 * @access public
477 * @static
478 * @link http://book.cakephp.org/view/572/Class-methods
479 */
480 function variable($string) {
481 $string = Inflector::camelize(Inflector::underscore($string));
482 $replace = strtolower(substr($string, 0, 1));
483 return preg_replace('/\\w/', $replace, $string, 1);
484 }
485 /**
486 * Returns a string with all spaces converted to underscores (by default), accented
487 * characters converted to non-accented characters, and non word characters removed.
488 *
489 * @param string $string
490 * @param string $replacement
491 * @return string
492 * @access public
493 * @static
494 * @link http://book.cakephp.org/view/572/Class-methods
495 */
496 function slug($string, $replacement = '_') {
497 if (!class_exists('String')) {
498 require LIBS . 'string.php';
499 }
500 $map = array(
501 '/à|á|å|â/' => 'a',
502 '/è|é|ê|ẽ|ë/' => 'e',
503 '/ì|í|î/' => 'i',
504 '/ò|ó|ô|ø/' => 'o',
505 '/ù|ú|ů|û/' => 'u',
506 '/ç/' => 'c',
507 '/ñ/' => 'n',
508 '/ä|æ/' => 'ae',
509 '/ö/' => 'oe',
510 '/ü/' => 'ue',
511 '/Ä/' => 'Ae',
512 '/Ü/' => 'Ue',
513 '/Ö/' => 'Oe',
514 '/ß/' => 'ss',
515 '/[^\w\s]/' => ' ',
516 '/\\s+/' => $replacement,
517 String::insert('/^[:replacement]+|[:replacement]+$/', array('replacement' => preg_quote($replacement, '/'))) => '',
518 );
519 return preg_replace(array_keys($map), array_values($map), $string);
520 }
521 }
522 /**
523 * Enclose a string for preg matching.
524 *
525 * @param string $string String to enclose
526 * @return string Enclosed string
527 */
528 function __enclose($string) {
529 return '(?:' . $string . ')';
530 }
531 ?>
532