inflector.php

Go to the documentation of this file.
00001 <?php
00002 /* SVN FILE: $Id: inflector_8php-source.html 580 2008-07-01 14:45:49Z gwoo $ */
00003 /**
00004  * Pluralize and singularize English words.
00005  *
00006  * Used by Cake's naming conventions throughout the framework.
00007  *
00008  * PHP versions 4 and 5
00009  *
00010  * CakePHP(tm) :  Rapid Development Framework <http://www.cakephp.org/>
00011  * Copyright 2005-2008, Cake Software Foundation, Inc.
00012  *                              1785 E. Sahara Avenue, Suite 490-204
00013  *                              Las Vegas, Nevada 89104
00014  *
00015  * Licensed under The MIT License
00016  * Redistributions of files must retain the above copyright notice.
00017  *
00018  * @filesource
00019  * @copyright       Copyright 2005-2008, Cake Software Foundation, Inc.
00020  * @link                http://www.cakefoundation.org/projects/info/cakephp CakePHP(tm) Project
00021  * @package         cake
00022  * @subpackage      cake.cake.libs
00023  * @since           CakePHP(tm) v 0.2.9
00024  * @version         $Revision: 580 $
00025  * @modifiedby      $LastChangedBy: gwoo $
00026  * @lastmodified    $Date: 2008-07-01 09:45:49 -0500 (Tue, 01 Jul 2008) $
00027  * @license         http://www.opensource.org/licenses/mit-license.php The MIT License
00028  */
00029 /**
00030  * Included libraries.
00031  *
00032  */
00033 if (!class_exists('Object')) {
00034     uses('object');
00035 }
00036 if (!class_exists('Set')) {
00037     uses('set');
00038 }
00039 /**
00040  * Pluralize and singularize English words.
00041  *
00042  * Inflector pluralizes and singularizes English nouns.
00043  * Used by Cake's naming conventions throughout the framework.
00044  * Test with $i = new Inflector(); $i->test();
00045  *
00046  * @package     cake
00047  * @subpackage  cake.cake.libs
00048  */
00049 class Inflector extends Object {
00050 /**
00051  * Constructor.
00052  *
00053  */
00054     function __construct() {
00055         parent::__construct();
00056     }
00057 /**
00058  * Gets a reference to the Inflector object instance
00059  *
00060  * @return object
00061  * @access public
00062  */
00063     function &getInstance() {
00064         static $instance = array();
00065 
00066         if (!isset($instance[0]) || !$instance[0]) {
00067             $instance[0] =& new Inflector();
00068         }
00069 
00070         return $instance[0];
00071     }
00072 /**
00073  * Initializes plural inflection rules
00074  *
00075  * @access protected
00076  */
00077     function __initPluralRules() {
00078         $_this =& Inflector::getInstance();
00079         $corePluralRules = array(
00080             '/(s)tatus$/i' => '\1\2tatuses',
00081             '/(quiz)$/i' => '\1zes',
00082             '/^(ox)$/i' => '\1\2en',
00083             '/([m|l])ouse$/i' => '\1ice',
00084             '/(matr|vert|ind)(ix|ex)$/i'  => '\1ices',
00085             '/(x|ch|ss|sh)$/i' => '\1es',
00086             '/([^aeiouy]|qu)y$/i' => '\1ies',
00087             '/(hive)$/i' => '\1s',
00088             '/(?:([^f])fe|([lr])f)$/i' => '\1\2ves',
00089             '/sis$/i' => 'ses',
00090             '/([ti])um$/i' => '\1a',
00091             '/(p)erson$/i' => '\1eople',
00092             '/(m)an$/i' => '\1en',
00093             '/(c)hild$/i' => '\1hildren',
00094             '/(buffal|tomat)o$/i' => '\1\2oes',
00095             '/(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|vir)us$/i' => '\1i',
00096             '/us$/' => 'uses',
00097             '/(alias)$/i' => '\1es',
00098             '/(ax|cri|test)is$/i' => '\1es',
00099             '/s$/' => 's',
00100             '/$/' => 's');
00101 
00102         $coreUninflectedPlural = array(
00103             '.*[nrlm]ese', '.*deer', '.*fish', '.*measles', '.*ois', '.*pox', '.*sheep', 'Amoyese',
00104             'bison', 'Borghese', 'bream', 'breeches', 'britches', 'buffalo', 'cantus', 'carp', 'chassis', 'clippers',
00105             'cod', 'coitus', 'Congoese', 'contretemps', 'corps', 'debris', 'diabetes', 'djinn', 'eland', 'elk',
00106             'equipment', 'Faroese', 'flounder', 'Foochowese', 'gallows', 'Genevese', 'Genoese', 'Gilbertese', 'graffiti',
00107             'headquarters', 'herpes', 'hijinks', 'Hottentotese', 'information', 'innings', 'jackanapes', 'Kiplingese',
00108             'Kongoese', 'Lucchese', 'mackerel', 'Maltese', 'media', 'mews', 'moose', 'mumps', 'Nankingese', 'news',
00109             'nexus', 'Niasese', 'Pekingese', 'People', 'Piedmontese', 'pincers', 'Pistoiese', 'pliers', 'Portuguese', 'proceedings',
00110             'rabies', 'rice', 'rhinoceros', 'salmon', 'Sarawakese', 'scissors', 'sea[- ]bass', 'series', 'Shavese', 'shears',
00111             'siemens', 'species', 'swine', 'testes', 'trousers', 'trout', 'tuna', 'Vermontese', 'Wenchowese',
00112             'whiting', 'wildebeest', 'Yengeese');
00113 
00114         $coreIrregularPlural = array(
00115             'atlas' => 'atlases',
00116             'beef' => 'beefs',
00117             'brother' => 'brothers',
00118             'child' => 'children',
00119             'corpus' => 'corpuses',
00120             'cow' => 'cows',
00121             'ganglion' => 'ganglions',
00122             'genie' => 'genies',
00123             'genus' => 'genera',
00124             'graffito' => 'graffiti',
00125             'hoof' => 'hoofs',
00126             'loaf' => 'loaves',
00127             'man' => 'men',
00128             'money' => 'monies',
00129             'mongoose' => 'mongooses',
00130             'move' => 'moves',
00131             'mythos' => 'mythoi',
00132             'numen' => 'numina',
00133             'occiput' => 'occiputs',
00134             'octopus' => 'octopuses',
00135             'opus' => 'opuses',
00136             'ox' => 'oxen',
00137             'penis' => 'penises',
00138             'person' => 'people',
00139             'sex' => 'sexes',
00140             'soliloquy' => 'soliloquies',
00141             'testis' => 'testes',
00142             'trilby' => 'trilbys',
00143             'turf' => 'turfs');
00144 
00145         $pluralRules = $corePluralRules;
00146         $uninflected = $coreUninflectedPlural;
00147         $irregular = $coreIrregularPlural;
00148 
00149         if (file_exists(CONFIGS . 'inflections.php')) {
00150             include(CONFIGS.'inflections.php');
00151             $pluralRules = Set::pushDiff($pluralRules, $corePluralRules);
00152             $uninflected = Set::pushDiff($uninflectedPlural, $coreUninflectedPlural);
00153             $irregular = Set::pushDiff($irregularPlural, $coreIrregularPlural);
00154         }
00155         $_this->pluralRules = array('pluralRules' => $pluralRules, 'uninflected' => $uninflected, 'irregular' => $irregular);
00156         $_this->pluralized = array();
00157     }
00158 /**
00159  * Return $word in plural form.
00160  *
00161  * @param string $word Word in singular
00162  * @return string Word in plural
00163  * @access public
00164  * @static
00165  */
00166     function pluralize($word) {
00167 
00168         $_this =& Inflector::getInstance();
00169         if (!isset($_this->pluralRules) || empty($_this->pluralRules)) {
00170             $_this->__initPluralRules();
00171         }
00172 
00173         if (isset($_this->pluralized[$word])) {
00174             return $_this->pluralized[$word];
00175         }
00176 
00177         extract($_this->pluralRules);
00178         if (!isset($regexUninflected) || !isset($regexIrregular)) {
00179             $regexUninflected = __enclose(join( '|', $uninflected));
00180             $regexIrregular = __enclose(join( '|', array_keys($irregular)));
00181             $_this->pluralRules['regexUninflected'] = $regexUninflected;
00182             $_this->pluralRules['regexIrregular'] = $regexIrregular;
00183         }
00184 
00185         if (preg_match('/(.*)\\b(' . $regexIrregular . ')$/i', $word, $regs)) {
00186             $_this->pluralized[$word] = $regs[1] . substr($word, 0, 1) . substr($irregular[strtolower($regs[2])], 1);
00187             return $_this->pluralized[$word];
00188         }
00189 
00190         if (preg_match('/^(' . $regexUninflected . ')$/i', $word, $regs)) {
00191             $_this->pluralized[$word] = $word;
00192             return $word;
00193         }
00194 
00195         foreach ($pluralRules as $rule => $replacement) {
00196             if (preg_match($rule, $word)) {
00197                 $_this->pluralized[$word] = preg_replace($rule, $replacement, $word);
00198                 return $_this->pluralized[$word];
00199             }
00200         }
00201         $_this->pluralized[$word] = $word;
00202         return $word;
00203     }
00204 /**
00205  * Initializes singular inflection rules
00206  *
00207  * @access protected
00208  */
00209     function __initSingularRules() {
00210 
00211         $_this =& Inflector::getInstance();
00212         $coreSingularRules = array(
00213             '/(s)tatuses$/i' => '\1\2tatus',
00214             '/^(.*)(menu)s$/i' => '\1\2',
00215             '/(quiz)zes$/i' => '\\1',
00216             '/(matr)ices$/i' => '\1ix',
00217             '/(vert|ind)ices$/i' => '\1ex',
00218             '/^(ox)en/i' => '\1',
00219             '/(alias)(es)*$/i' => '\1',
00220             '/(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|viri?)i$/i' => '\1us',
00221             '/(cris|ax|test)es$/i' => '\1is',
00222             '/(shoe)s$/i' => '\1',
00223             '/(o)es$/i' => '\1',
00224             '/ouses$/' => 'ouse',
00225             '/uses$/' => 'us',
00226             '/([m|l])ice$/i' => '\1ouse',
00227             '/(x|ch|ss|sh)es$/i' => '\1',
00228             '/(m)ovies$/i' => '\1\2ovie',
00229             '/(s)eries$/i' => '\1\2eries',
00230             '/([^aeiouy]|qu)ies$/i' => '\1y',
00231             '/([lr])ves$/i' => '\1f',
00232             '/(tive)s$/i' => '\1',
00233             '/(hive)s$/i' => '\1',
00234             '/(drive)s$/i' => '\1',
00235             '/([^f])ves$/i' => '\1fe',
00236             '/(^analy)ses$/i' => '\1sis',
00237             '/((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$/i' => '\1\2sis',
00238             '/([ti])a$/i' => '\1um',
00239             '/(p)eople$/i' => '\1\2erson',
00240             '/(m)en$/i' => '\1an',
00241             '/(c)hildren$/i' => '\1\2hild',
00242             '/(n)ews$/i' => '\1\2ews',
00243             '/^(.*us)$/' => '\\1',
00244             '/s$/i' => '');
00245 
00246         $coreUninflectedSingular = array(
00247             '.*[nrlm]ese', '.*deer', '.*fish', '.*measles', '.*ois', '.*pox', '.*sheep', '.*ss', 'Amoyese',
00248             'bison', 'Borghese', 'bream', 'breeches', 'britches', 'buffalo', 'cantus', 'carp', 'chassis', 'clippers',
00249             'cod', 'coitus', 'Congoese', 'contretemps', 'corps', 'debris', 'diabetes', 'djinn', 'eland', 'elk',
00250             'equipment', 'Faroese', 'flounder', 'Foochowese', 'gallows', 'Genevese', 'Genoese', 'Gilbertese', 'graffiti',
00251             'headquarters', 'herpes', 'hijinks', 'Hottentotese', 'information', 'innings', 'jackanapes', 'Kiplingese',
00252             'Kongoese', 'Lucchese', 'mackerel', 'Maltese', 'media', 'mews', 'moose', 'mumps', 'Nankingese', 'news',
00253             'nexus', 'Niasese', 'Pekingese', 'Piedmontese', 'pincers', 'Pistoiese', 'pliers', 'Portuguese', 'proceedings',
00254             'rabies', 'rice', 'rhinoceros', 'salmon', 'Sarawakese', 'scissors', 'sea[- ]bass', 'series', 'Shavese', 'shears',
00255             'siemens', 'species', 'swine', 'testes', 'trousers', 'trout', 'tuna', 'Vermontese', 'Wenchowese',
00256             'whiting', 'wildebeest', 'Yengeese');
00257 
00258         $coreIrregularSingular = array(
00259             'atlases' => 'atlas',
00260             'beefs' => 'beef',
00261             'brothers' => 'brother',
00262             'children' => 'child',
00263             'corpuses' => 'corpus',
00264             'cows' => 'cow',
00265             'ganglions' => 'ganglion',
00266             'genies' => 'genie',
00267             'genera' => 'genus',
00268             'graffiti' => 'graffito',
00269             'hoofs' => 'hoof',
00270             'loaves' => 'loaf',
00271             'men' => 'man',
00272             'monies' => 'money',
00273             'mongooses' => 'mongoose',
00274             'moves' => 'move',
00275             'mythoi' => 'mythos',
00276             'numina' => 'numen',
00277             'occiputs' => 'occiput',
00278             'octopuses' => 'octopus',
00279             'opuses' => 'opus',
00280             'oxen' => 'ox',
00281             'penises' => 'penis',
00282             'people' => 'person',
00283             'sexes' => 'sex',
00284             'soliloquies' => 'soliloquy',
00285             'testes' => 'testis',
00286             'trilbys' => 'trilby',
00287             'turfs' => 'turf');
00288 
00289         $singularRules = $coreSingularRules;
00290         $uninflected = $coreUninflectedSingular;
00291         $irregular = $coreIrregularSingular;
00292 
00293         if (file_exists(CONFIGS . 'inflections.php')) {
00294             include(CONFIGS.'inflections.php');
00295             $singularRules = Set::pushDiff($singularRules, $coreSingularRules);
00296             $uninflected = Set::pushDiff($uninflectedSingular, $coreUninflectedSingular);
00297             $irregular = Set::pushDiff($irregularSingular, $coreIrregularSingular);
00298         }
00299         $_this->singularRules = array('singularRules' => $singularRules, 'uninflected' => $uninflected, 'irregular' => $irregular);
00300         $_this->singularized = array();
00301     }
00302 /**
00303  * Return $word in singular form.
00304  *
00305  * @param string $word Word in plural
00306  * @return string Word in singular
00307  * @access public
00308  * @static
00309  */
00310     function singularize($word) {
00311         $_this =& Inflector::getInstance();
00312         if (!isset($_this->singularRules) || empty($_this->singularRules)) {
00313             $_this->__initSingularRules();
00314         }
00315 
00316         if (isset($_this->singularized[$word])) {
00317             return $_this->singularized[$word];
00318         }
00319 
00320         extract($_this->singularRules);
00321         if (!isset($regexUninflected) || !isset($regexIrregular)) {
00322             $regexUninflected = __enclose(join( '|', $uninflected));
00323             $regexIrregular = __enclose(join( '|', array_keys($irregular)));
00324             $_this->singularRules['regexUninflected'] = $regexUninflected;
00325             $_this->singularRules['regexIrregular'] = $regexIrregular;
00326         }
00327 
00328         if (preg_match('/(.*)\\b(' . $regexIrregular . ')$/i', $word, $regs)) {
00329             $_this->singularized[$word] = $regs[1] . substr($word, 0, 1) . substr($irregular[strtolower($regs[2])], 1);
00330             return $_this->singularized[$word];
00331         }
00332 
00333         if (preg_match('/^(' . $regexUninflected . ')$/i', $word, $regs)) {
00334             $_this->singularized[$word] = $word;
00335             return $word;
00336         }
00337 
00338         foreach ($singularRules as $rule => $replacement) {
00339             if (preg_match($rule, $word)) {
00340                 $_this->singularized[$word] = preg_replace($rule, $replacement, $word);
00341                 return $_this->singularized[$word];
00342             }
00343         }
00344         $_this->singularized[$word] = $word;
00345         return $word;
00346     }
00347 /**
00348  * Returns given $lower_case_and_underscored_word as a CamelCased word.
00349  *
00350  * @param string $lower_case_and_underscored_word Word to camelize
00351  * @return string Camelized word. LikeThis.
00352  * @access public
00353  * @static
00354  */
00355     function camelize($lowerCaseAndUnderscoredWord) {
00356         $replace = str_replace(" ", "", ucwords(str_replace("_", " ", $lowerCaseAndUnderscoredWord)));
00357         return $replace;
00358     }
00359 /**
00360  * Returns an underscore-syntaxed ($like_this_dear_reader) version of the $camel_cased_word.
00361  *
00362  * @param string $camel_cased_word Camel-cased word to be "underscorized"
00363  * @return string Underscore-syntaxed version of the $camel_cased_word
00364  * @access public
00365  * @static
00366  */
00367     function underscore($camelCasedWord) {
00368         $replace = strtolower(preg_replace('/(?<=\\w)([A-Z])/', '_\\1', $camelCasedWord));
00369         return $replace;
00370     }
00371 /**
00372  * Returns a human-readable string from $lower_case_and_underscored_word,
00373  * by replacing underscores with a space, and by upper-casing the initial characters.
00374  *
00375  * @param string $lower_case_and_underscored_word String to be made more readable
00376  * @return string Human-readable string
00377  * @access public
00378  * @static
00379  */
00380     function humanize($lowerCaseAndUnderscoredWord) {
00381         $replace = ucwords(str_replace("_", " ", $lowerCaseAndUnderscoredWord));
00382         return $replace;
00383     }
00384 /**
00385  * Returns corresponding table name for given $class_name. ("posts" for the model class "Post").
00386  *
00387  * @param string $class_name Name of class to get database table name for
00388  * @return string Name of the database table for given class
00389  * @access public
00390  * @static
00391  */
00392     function tableize($className) {
00393         $replace = Inflector::pluralize(Inflector::underscore($className));
00394         return $replace;
00395     }
00396 /**
00397  * Returns Cake model class name ("Post" for the database table "posts".) for given database table.
00398  *
00399  * @param string $tableName Name of database table to get class name for
00400  * @return string
00401  * @access public
00402  * @static
00403  */
00404     function classify($tableName) {
00405         $replace = Inflector::camelize(Inflector::singularize($tableName));
00406         return $replace;
00407     }
00408 /**
00409  * Returns camelBacked version of a string.
00410  *
00411  * @param string $string
00412  * @return string
00413  * @access public
00414  * @static
00415  */
00416     function variable($string) {
00417         $string = Inflector::camelize(Inflector::underscore($string));
00418         $replace = strtolower(substr($string, 0, 1));
00419         $variable = preg_replace('/\\w/', $replace, $string, 1);
00420         return $variable;
00421     }
00422 /**
00423  * Returns a string with all spaces converted to $replacement and non word characters removed.
00424  *
00425  * @param string $string
00426  * @param string $replacement
00427  * @return string
00428  * @access public
00429  * @static
00430  */
00431     function slug($string, $replacement = '_') {
00432         if(!class_exists('String')) {
00433             require_once LIBS . 'string.php';
00434         }
00435         $map = array(
00436             '/à|á|å|â/' => 'a',
00437             '/è|é|ê|ẽ|ë/' => 'e',
00438             '/ì|í|î/' => 'i',
00439             '/ò|ó|ô|ø/' => 'o',
00440             '/ù|ú|ů|û/' => 'u',
00441             '/ç/' => 'c',
00442             '/ñ/' => 'n',
00443             '/ä|æ/' => 'ae',
00444             '/ö/' => 'oe',
00445             '/ü/' => 'ue',
00446             '/Ä/' => 'Ae',
00447             '/Ü/' => 'Ue',
00448             '/Ö/' => 'Oe',
00449             '/ß/' => 'ss',
00450             '/[^\w\s]/' => ' ',
00451             '/\\s+/' => $replacement,
00452             String::insert('/^[:replacement]+|[:replacement]+$/', array('replacement' => preg_quote($replacement, '/'))) => '',
00453         );
00454         $string = preg_replace(array_keys($map), array_values($map), $string);
00455         return $string;
00456     }
00457 }
00458 /**
00459  * Enclose a string for preg matching.
00460  *
00461  * @param string $string String to enclose
00462  * @return string Enclosed string
00463  */
00464     function __enclose($string) {
00465         return '(?:' . $string . ')';
00466     }
00467 ?>