inflector.php

Go to the documentation of this file.
00001 <?php
00002 /* SVN FILE: $Id: inflector_8php-source.html 675 2008-12-26 00:27:14Z gwoo $ */
00003 /**
00004  * Pluralize and singularize English words.
00005  *
00006  * Used by Cake's naming conventions throughout the framework.
00007  *
00008  * PHP versions 4 and 5
00009  *
00010  * CakePHP(tm) :  Rapid Development Framework <http://www.cakephp.org/>
00011  * Copyright 2005-2008, Cake Software Foundation, Inc.
00012  *                              1785 E. Sahara Avenue, Suite 490-204
00013  *                              Las Vegas, Nevada 89104
00014  *
00015  * Licensed under The MIT License
00016  * Redistributions of files must retain the above copyright notice.
00017  *
00018  * @filesource
00019  * @copyright       Copyright 2005-2008, Cake Software Foundation, Inc.
00020  * @link                http://www.cakefoundation.org/projects/info/cakephp CakePHP(tm) Project
00021  * @package         cake
00022  * @subpackage      cake.cake.libs
00023  * @since           CakePHP(tm) v 0.2.9
00024  * @version         $Revision: 675 $
00025  * @modifiedby      $LastChangedBy: gwoo $
00026  * @lastmodified    $Date: 2008-12-25 18:27:14 -0600 (Thu, 25 Dec 2008) $
00027  * @license         http://www.opensource.org/licenses/mit-license.php The MIT License
00028  */
00029 /**
00030  * Included libraries.
00031  *
00032  */
00033     if (!class_exists('Object')) {
00034          uses('object');
00035     }
00036     uses('Set');
00037 /**
00038  * Pluralize and singularize English words.
00039  *
00040  * Inflector pluralizes and singularizes English nouns.
00041  * Used by Cake's naming conventions throughout the framework.
00042  * Test with $i = new Inflector(); $i->test();
00043  *
00044  * @package     cake
00045  * @subpackage  cake.cake.libs
00046  */
00047 class Inflector extends Object {
00048 /**
00049  * Constructor.
00050  *
00051  */
00052     function __construct() {
00053         parent::__construct();
00054     }
00055 /**
00056  * Gets a reference to the Inflector object instance
00057  *
00058  * @return object
00059  * @access public
00060  */
00061     function &getInstance() {
00062         static $instance = array();
00063 
00064         if (!isset($instance[0]) || !$instance[0]) {
00065             $instance[0] =& new Inflector();
00066         }
00067 
00068         return $instance[0];
00069     }
00070 /**
00071  * Initializes plural inflection rules
00072  *
00073  * @access protected
00074  */
00075     function __initPluralRules() {
00076         $_this =& Inflector::getInstance();
00077         $corePluralRules = array('/(s)tatus$/i' => '\1\2tatuses',
00078                                     '/(quiz)$/i' => '\1zes',
00079                                     '/^(ox)$/i' => '\1\2en',
00080                                     '/([m|l])ouse$/i' => '\1ice',
00081                                     '/(matr|vert|ind)(ix|ex)$/i'  => '\1ices',
00082                                     '/(x|ch|ss|sh)$/i' => '\1es',
00083                                     '/([^aeiouy]|qu)y$/i' => '\1ies',
00084                                     '/(hive)$/i' => '\1s',
00085                                     '/(?:([^f])fe|([lr])f)$/i' => '\1\2ves',
00086                                     '/sis$/i' => 'ses',
00087                                     '/([ti])um$/i' => '\1a',
00088                                     '/(p)erson$/i' => '\1eople',
00089                                     '/(m)an$/i' => '\1en',
00090                                     '/(c)hild$/i' => '\1hildren',
00091                                     '/(buffal|tomat)o$/i' => '\1\2oes',
00092                                     '/(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|vir)us$/i' => '\1i',
00093                                     '/us$/' => 'uses',
00094                                     '/(alias)$/i' => '\1es',
00095                                     '/(ax|cri|test)is$/i' => '\1es',
00096                                     '/s$/' => 's',
00097                                     '/$/' => 's',);
00098 
00099         $coreUninflectedPlural = array('.*[nrlm]ese', '.*deer', '.*fish', '.*measles', '.*ois', '.*pox', '.*sheep', 'Amoyese',
00100                                             'bison', 'Borghese', 'bream', 'breeches', 'britches', 'buffalo', 'cantus', 'carp', 'chassis', 'clippers',
00101                                             'cod', 'coitus', 'Congoese', 'contretemps', 'corps', 'debris', 'diabetes', 'djinn', 'eland', 'elk',
00102                                             'equipment', 'Faroese', 'flounder', 'Foochowese', 'gallows', 'Genevese', 'Genoese', 'Gilbertese', 'graffiti',
00103                                             'headquarters', 'herpes', 'hijinks', 'Hottentotese', 'information', 'innings', 'jackanapes', 'Kiplingese',
00104                                             'Kongoese', 'Lucchese', 'mackerel', 'Maltese', 'media', 'mews', 'moose', 'mumps', 'Nankingese', 'news',
00105                                             'nexus', 'Niasese', 'Pekingese', 'People', 'Piedmontese', 'pincers', 'Pistoiese', 'pliers', 'Portuguese', 'proceedings',
00106                                             'rabies', 'rice', 'rhinoceros', 'salmon', 'Sarawakese', 'scissors', 'sea[- ]bass', 'series', 'Shavese', 'shears',
00107                                             'siemens', 'species', 'swine', 'testes', 'trousers', 'trout', 'tuna', 'Vermontese', 'Wenchowese',
00108                                             'whiting', 'wildebeest', 'Yengeese',);
00109 
00110         $coreIrregularPlural = array('atlas' => 'atlases',
00111                                         'beef' => 'beefs',
00112                                         'brother' => 'brothers',
00113                                         'child' => 'children',
00114                                         'corpus' => 'corpuses',
00115                                         'cow' => 'cows',
00116                                         'ganglion' => 'ganglions',
00117                                         'genie' => 'genies',
00118                                         'genus' => 'genera',
00119                                         'graffito' => 'graffiti',
00120                                         'hoof' => 'hoofs',
00121                                         'loaf' => 'loaves',
00122                                         'man' => 'men',
00123                                         'money' => 'monies',
00124                                         'mongoose' => 'mongooses',
00125                                         'move' => 'moves',
00126                                         'mythos' => 'mythoi',
00127                                         'numen' => 'numina',
00128                                         'occiput' => 'occiputs',
00129                                         'octopus' => 'octopuses',
00130                                         'opus' => 'opuses',
00131                                         'ox' => 'oxen',
00132                                         'penis' => 'penises',
00133                                         'person' => 'people',
00134                                         'sex' => 'sexes',
00135                                         'soliloquy' => 'soliloquies',
00136                                         'testis' => 'testes',
00137                                         'trilby' => 'trilbys',
00138                                         'turf' => 'turfs',);
00139 
00140         $pluralRules = $corePluralRules;
00141         $uninflected = $coreUninflectedPlural;
00142         $irregular = $coreIrregularPlural;
00143 
00144         if (file_exists(CONFIGS . 'inflections.php')) {
00145             include(CONFIGS.'inflections.php');
00146             $pluralRules = Set::pushDiff($pluralRules, $corePluralRules);
00147             $uninflected = Set::pushDiff($uninflectedPlural, $coreUninflectedPlural);
00148             $irregular = Set::pushDiff($irregularPlural, $coreIrregularPlural);
00149         }
00150         $_this->pluralRules = array('pluralRules' => $pluralRules, 'uninflected' => $uninflected, 'irregular' => $irregular);
00151         $_this->pluralized = array();
00152     }
00153 /**
00154  * Return $word in plural form.
00155  *
00156  * @param string $word Word in singular
00157  * @return string Word in plural
00158  * @access public
00159  * @static
00160  */
00161     function pluralize($word) {
00162 
00163         $_this =& Inflector::getInstance();
00164         if (!isset($_this->pluralRules) || empty($_this->pluralRules)) {
00165             $_this->__initPluralRules();
00166         }
00167 
00168         if (isset($_this->pluralized[$word])) {
00169             return $_this->pluralized[$word];
00170         }
00171 
00172         extract($_this->pluralRules);
00173         if (!isset($regexUninflected) || !isset($regexIrregular)) {
00174             $regexUninflected = __enclose(join( '|', $uninflected));
00175             $regexIrregular = __enclose(join( '|', array_keys($irregular)));
00176             $_this->pluralRules['regexUninflected'] = $regexUninflected;
00177             $_this->pluralRules['regexIrregular'] = $regexIrregular;
00178         }
00179 
00180         if (preg_match('/(.*)\\b(' . $regexIrregular . ')$/i', $word, $regs)) {
00181             $_this->pluralized[$word] = $regs[1] . substr($word, 0, 1) . substr($irregular[strtolower($regs[2])], 1);
00182             return $_this->pluralized[$word];
00183         }
00184 
00185         if (preg_match('/^(' . $regexUninflected . ')$/i', $word, $regs)) {
00186             $_this->pluralized[$word] = $word;
00187             return $word;
00188         }
00189 
00190         foreach ($pluralRules as $rule => $replacement) {
00191             if (preg_match($rule, $word)) {
00192                 $_this->pluralized[$word] = preg_replace($rule, $replacement, $word);
00193                 return $_this->pluralized[$word];
00194             }
00195         }
00196         $_this->pluralized[$word] = $word;
00197         return $word;
00198     }
00199 /**
00200  * Initializes singular inflection rules
00201  *
00202  * @access protected
00203  */
00204     function __initSingularRules() {
00205 
00206         $_this =& Inflector::getInstance();
00207         $coreSingularRules = array('/(s)tatuses$/i' => '\1\2tatus',
00208                                     '/^(.*)(menu)s$/i' => '\1\2',
00209                                     '/(quiz)zes$/i' => '\\1',
00210                                     '/(matr)ices$/i' => '\1ix',
00211                                     '/(vert|ind)ices$/i' => '\1ex',
00212                                     '/^(ox)en/i' => '\1',
00213                                     '/(alias)(es)*$/i' => '\1',
00214                                     '/(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|viri?)i$/i' => '\1us',
00215                                     '/(cris|ax|test)es$/i' => '\1is',
00216                                     '/(shoe)s$/i' => '\1',
00217                                     '/(o)es$/i' => '\1',
00218                                     '/ouses$/' => 'ouse',
00219                                     '/uses$/' => 'us',
00220                                     '/([m|l])ice$/i' => '\1ouse',
00221                                     '/(x|ch|ss|sh)es$/i' => '\1',
00222                                     '/(m)ovies$/i' => '\1\2ovie',
00223                                     '/(s)eries$/i' => '\1\2eries',
00224                                     '/([^aeiouy]|qu)ies$/i' => '\1y',
00225                                     '/([lr])ves$/i' => '\1f',
00226                                     '/(tive)s$/i' => '\1',
00227                                     '/(hive)s$/i' => '\1',
00228                                     '/(drive)s$/i' => '\1',
00229                                     '/([^f])ves$/i' => '\1fe',
00230                                     '/(^analy)ses$/i' => '\1sis',
00231                                     '/((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$/i' => '\1\2sis',
00232                                     '/([ti])a$/i' => '\1um',
00233                                     '/(p)eople$/i' => '\1\2erson',
00234                                     '/(m)en$/i' => '\1an',
00235                                     '/(c)hildren$/i' => '\1\2hild',
00236                                     '/(n)ews$/i' => '\1\2ews',
00237                                     '/^(.*us)$/' => '\\1',
00238                                     '/s$/i' => '');
00239 
00240         $coreUninflectedSingular = array('.*[nrlm]ese', '.*deer', '.*fish', '.*measles', '.*ois', '.*pox', '.*sheep', '.*ss', 'Amoyese',
00241                                             'bison', 'Borghese', 'bream', 'breeches', 'britches', 'buffalo', 'cantus', 'carp', 'chassis', 'clippers',
00242                                             'cod', 'coitus', 'Congoese', 'contretemps', 'corps', 'debris', 'diabetes', 'djinn', 'eland', 'elk',
00243                                             'equipment', 'Faroese', 'flounder', 'Foochowese', 'gallows', 'Genevese', 'Genoese', 'Gilbertese', 'graffiti',
00244                                             'headquarters', 'herpes', 'hijinks', 'Hottentotese', 'information', 'innings', 'jackanapes', 'Kiplingese',
00245                                             'Kongoese', 'Lucchese', 'mackerel', 'Maltese', 'media', 'mews', 'moose', 'mumps', 'Nankingese', 'news',
00246                                             'nexus', 'Niasese', 'Pekingese', 'Piedmontese', 'pincers', 'Pistoiese', 'pliers', 'Portuguese', 'proceedings',
00247                                             'rabies', 'rice', 'rhinoceros', 'salmon', 'Sarawakese', 'scissors', 'sea[- ]bass', 'series', 'Shavese', 'shears',
00248                                             'siemens', 'species', 'swine', 'testes', 'trousers', 'trout', 'tuna', 'Vermontese', 'Wenchowese',
00249                                             'whiting', 'wildebeest', 'Yengeese',);
00250 
00251         $coreIrregularSingular = array('atlases' => 'atlas',
00252                                         'beefs' => 'beef',
00253                                         'brothers' => 'brother',
00254                                         'children' => 'child',
00255                                         'corpuses' => 'corpus',
00256                                         'cows' => 'cow',
00257                                         'ganglions' => 'ganglion',
00258                                         'genies' => 'genie',
00259                                         'genera' => 'genus',
00260                                         'graffiti' => 'graffito',
00261                                         'hoofs' => 'hoof',
00262                                         'loaves' => 'loaf',
00263                                         'men' => 'man',
00264                                         'monies' => 'money',
00265                                         'mongooses' => 'mongoose',
00266                                         'moves' => 'move',
00267                                         'mythoi' => 'mythos',
00268                                         'numina' => 'numen',
00269                                         'occiputs' => 'occiput',
00270                                         'octopuses' => 'octopus',
00271                                         'opuses' => 'opus',
00272                                         'oxen' => 'ox',
00273                                         'penises' => 'penis',
00274                                         'people' => 'person',
00275                                         'sexes' => 'sex',
00276                                         'soliloquies' => 'soliloquy',
00277                                         'testes' => 'testis',
00278                                         'trilbys' => 'trilby',
00279                                         'turfs' => 'turf',);
00280 
00281         $singularRules = $coreSingularRules;
00282         $uninflected = $coreUninflectedSingular;
00283         $irregular = $coreIrregularSingular;
00284 
00285         if (file_exists(CONFIGS . 'inflections.php')) {
00286             include(CONFIGS.'inflections.php');
00287             $singularRules = Set::pushDiff($singularRules, $coreSingularRules);
00288             $uninflected = Set::pushDiff($uninflectedSingular, $coreUninflectedSingular);
00289             $irregular = Set::pushDiff($irregularSingular, $coreIrregularSingular);
00290         }
00291         $_this->singularRules = array('singularRules' => $singularRules, 'uninflected' => $uninflected, 'irregular' => $irregular);
00292         $_this->singularized = array();
00293     }
00294 /**
00295  * Return $word in singular form.
00296  *
00297  * @param string $word Word in plural
00298  * @return string Word in singular
00299  * @access public
00300  * @static
00301  */
00302     function singularize($word) {
00303         $_this =& Inflector::getInstance();
00304         if (!isset($_this->singularRules) || empty($_this->singularRules)) {
00305             $_this->__initSingularRules();
00306         }
00307 
00308         if (isset($_this->singularized[$word])) {
00309             return $_this->singularized[$word];
00310         }
00311 
00312         extract($_this->singularRules);
00313         if (!isset($regexUninflected) || !isset($regexIrregular)) {
00314             $regexUninflected = __enclose(join( '|', $uninflected));
00315             $regexIrregular = __enclose(join( '|', array_keys($irregular)));
00316             $_this->singularRules['regexUninflected'] = $regexUninflected;
00317             $_this->singularRules['regexIrregular'] = $regexIrregular;
00318         }
00319 
00320         if (preg_match('/(.*)\\b(' . $regexIrregular . ')$/i', $word, $regs)) {
00321             $_this->singularized[$word] = $regs[1] . substr($word, 0, 1) . substr($irregular[strtolower($regs[2])], 1);
00322             return $_this->singularized[$word];
00323         }
00324 
00325         if (preg_match('/^(' . $regexUninflected . ')$/i', $word, $regs)) {
00326             $_this->singularized[$word] = $word;
00327             return $word;
00328         }
00329 
00330         foreach ($singularRules as $rule => $replacement) {
00331             if (preg_match($rule, $word)) {
00332                 $_this->singularized[$word] = preg_replace($rule, $replacement, $word);
00333                 return $_this->singularized[$word];
00334             }
00335         }
00336         $_this->singularized[$word] = $word;
00337         return $word;
00338     }
00339 /**
00340  * Returns given $lower_case_and_underscored_word as a camelCased word.
00341  *
00342  * @param string $lower_case_and_underscored_word Word to camelize
00343  * @return string Camelized word. likeThis.
00344  * @access public
00345  * @static
00346  */
00347     function camelize($lowerCaseAndUnderscoredWord) {
00348         $replace = str_replace(" ", "", ucwords(str_replace("_", " ", $lowerCaseAndUnderscoredWord)));
00349         return $replace;
00350     }
00351 /**
00352  * Returns an underscore-syntaxed ($like_this_dear_reader) version of the $camel_cased_word.
00353  *
00354  * @param string $camel_cased_word Camel-cased word to be "underscorized"
00355  * @return string Underscore-syntaxed version of the $camel_cased_word
00356  * @access public
00357  * @static
00358  */
00359     function underscore($camelCasedWord) {
00360         $replace = strtolower(preg_replace('/(?<=\\w)([A-Z])/', '_\\1', $camelCasedWord));
00361         return $replace;
00362     }
00363 /**
00364  * Returns a human-readable string from $lower_case_and_underscored_word,
00365  * by replacing underscores with a space, and by upper-casing the initial characters.
00366  *
00367  * @param string $lower_case_and_underscored_word String to be made more readable
00368  * @return string Human-readable string
00369  * @access public
00370  * @static
00371  */
00372     function humanize($lowerCaseAndUnderscoredWord) {
00373         $replace = ucwords(str_replace("_", " ", $lowerCaseAndUnderscoredWord));
00374         return $replace;
00375     }
00376 /**
00377  * Returns corresponding table name for given $class_name. ("posts" for the model class "Post").
00378  *
00379  * @param string $class_name Name of class to get database table name for
00380  * @return string Name of the database table for given class
00381  * @access public
00382  * @static
00383  */
00384     function tableize($className) {
00385         $replace = Inflector::pluralize(Inflector::underscore($className));
00386         return $replace;
00387     }
00388 /**
00389  * Returns Cake model class name ("Post" for the database table "posts".) for given database table.
00390  *
00391  * @param string $tableName Name of database table to get class name for
00392  * @return string
00393  * @access public
00394  * @static
00395  */
00396     function classify($tableName) {
00397         $replace = Inflector::camelize(Inflector::singularize($tableName));
00398         return $replace;
00399     }
00400 /**
00401  * Returns camelBacked version of a string.
00402  *
00403  * @param string $string
00404  * @return string
00405  * @access public
00406  * @static
00407  */
00408     function variable($string) {
00409         $string = Inflector::camelize(Inflector::underscore($string));
00410         $replace = strtolower(substr($string, 0, 1));
00411         $variable = preg_replace('/\\w/', $replace, $string, 1);
00412         return $variable;
00413     }
00414 /**
00415  * Returns a string with all spaces converted to $replacement and non word characters removed.
00416  *
00417  * @param string $string
00418  * @param string $replacement
00419  * @return string
00420  * @access public
00421  * @static
00422  */
00423     function slug($string, $replacement = '_') {
00424         $string = preg_replace(array('/[^\w\s]/', '/\\s+/') , array(' ', $replacement), $string);
00425         return $string;
00426     }
00427 }
00428 /**
00429  * Enclose a string for preg matching.
00430  *
00431  * @param string $string String to enclose
00432  * @return string Enclosed string
00433  */
00434     function __enclose($string) {
00435         return '(?:' . $string . ')';
00436     }
00437 ?>