<?php
declare(strict_types=1);
namespace Doctrine\Inflector;
use RuntimeException;
use function chr;
use function function_exists;
use function lcfirst;
use function mb_strtolower;
use function ord;
use function preg_match;
use function preg_replace;
use function sprintf;
use function str_replace;
use function strlen;
use function strtolower;
use function strtr;
use function trim;
use function ucwords;
class Inflector
{
private const ACCENTED_CHARACTERS = [
'À' => 'A',
'Á' => 'A',
'Â' => 'A',
'Ã' => 'A',
'Ä' => 'Ae',
'Æ' => 'Ae',
'Å' => 'Aa',
'æ' => 'a',
'Ç' => 'C',
'È' => 'E',
'É' => 'E',
'Ê' => 'E',
'Ë' => 'E',
'Ì' => 'I',
'Í' => 'I',
'Î' => 'I',
'Ï' => 'I',
'Ñ' => 'N',
'Ò' => 'O',
'Ó' => 'O',
'Ô' => 'O',
'Õ' => 'O',
'Ö' => 'Oe',
'Ù' => 'U',
'Ú' => 'U',
'Û' => 'U',
'Ü' => 'Ue',
'Ý' => 'Y',
'ß' => 'ss',
'à' => 'a',
'á' => 'a',
'â' => 'a',
'ã' => 'a',
'ä' => 'ae',
'å' => 'aa',
'ç' => 'c',
'è' => 'e',
'é' => 'e',
'ê' => 'e',
'ë' => 'e',
'ì' => 'i',
'í' => 'i',
'î' => 'i',
'ï' => 'i',
'ñ' => 'n',
'ò' => 'o',
'ó' => 'o',
'ô' => 'o',
'õ' => 'o',
'ö' => 'oe',
'ù' => 'u',
'ú' => 'u',
'û' => 'u',
'ü' => 'ue',
'ý' => 'y',
'ÿ' => 'y',
'Ā' => 'A',
'ā' => 'a',
'Ă' => 'A',
'ă' => 'a',
'Ą' => 'A',
'ą' => 'a',
'Ć' => 'C',
'ć' => 'c',
'Ĉ' => 'C',
'ĉ' => 'c',
'Ċ' => 'C',
'ċ' => 'c',
'Č' => 'C',
'č' => 'c',
'Ď' => 'D',
'ď' => 'd',
'Đ' => 'D',
'đ' => 'd',
'Ē' => 'E',
'ē' => 'e',
'Ĕ' => 'E',
'ĕ' => 'e',
'Ė' => 'E',
'ė' => 'e',
'Ę' => 'E',
'ę' => 'e',
'Ě' => 'E',
'ě' => 'e',
'Ĝ' => 'G',
'ĝ' => 'g',
'Ğ' => 'G',
'ğ' => 'g',
'Ġ' => 'G',
'ġ' => 'g',
'Ģ' => 'G',
'ģ' => 'g',
'Ĥ' => 'H',
'ĥ' => 'h',
'Ħ' => 'H',
'ħ' => 'h',
'Ĩ' => 'I',
'ĩ' => 'i',
'Ī' => 'I',
'ī' => 'i',
'Ĭ' => 'I',
'ĭ' => 'i',
'Į' => 'I',
'į' => 'i',
'İ' => 'I',
'ı' => 'i',
'IJ' => 'IJ',
'ij' => 'ij',
'Ĵ' => 'J',
'ĵ' => 'j',
'Ķ' => 'K',
'ķ' => 'k',
'ĸ' => 'k',
'Ĺ' => 'L',
'ĺ' => 'l',
'Ļ' => 'L',
'ļ' => 'l',
'Ľ' => 'L',
'ľ' => 'l',
'Ŀ' => 'L',
'ŀ' => 'l',
'Ł' => 'L',
'ł' => 'l',
'Ń' => 'N',
'ń' => 'n',
'Ņ' => 'N',
'ņ' => 'n',
'Ň' => 'N',
'ň' => 'n',
'ʼn' => 'N',
'Ŋ' => 'n',
'ŋ' => 'N',
'Ō' => 'O',
'ō' => 'o',
'Ŏ' => 'O',
'ŏ' => 'o',
'Ő' => 'O',
'ő' => 'o',
'Œ' => 'OE',
'œ' => 'oe',
'Ø' => 'O',
'ø' => 'o',
'Ŕ' => 'R',
'ŕ' => 'r',
'Ŗ' => 'R',
'ŗ' => 'r',
'Ř' => 'R',
'ř' => 'r',
'Ś' => 'S',
'ś' => 's',
'Ŝ' => 'S',
'ŝ' => 's',
'Ş' => 'S',
'ş' => 's',
'Š' => 'S',
'š' => 's',
'Ţ' => 'T',
'ţ' => 't',
'Ť' => 'T',
'ť' => 't',
'Ŧ' => 'T',
'ŧ' => 't',
'Ũ' => 'U',
'ũ' => 'u',
'Ū' => 'U',
'ū' => 'u',
'Ŭ' => 'U',
'ŭ' => 'u',
'Ů' => 'U',
'ů' => 'u',
'Ű' => 'U',
'ű' => 'u',
'Ų' => 'U',
'ų' => 'u',
'Ŵ' => 'W',
'ŵ' => 'w',
'Ŷ' => 'Y',
'ŷ' => 'y',
'Ÿ' => 'Y',
'Ź' => 'Z',
'ź' => 'z',
'Ż' => 'Z',
'ż' => 'z',
'Ž' => 'Z',
'ž' => 'z',
'ſ' => 's',
'€' => 'E',
'£' => '',
];
/** @var WordInflector */
private $singularizer;
/** @var WordInflector */
private $pluralizer;
public function __construct(WordInflector $singularizer, WordInflector $pluralizer)
{
$this->singularizer = $singularizer;
$this->pluralizer = $pluralizer;
}
/**
* Converts a word into the format for a Doctrine table name. Converts 'ModelName' to 'model_name'.
*/
public function tableize(string $word): string
{
$tableized = preg_replace('~(?<=\\w)([A-Z])~u', '_$1', $word);
if ($tableized === null) {
throw new RuntimeException(sprintf(
'preg_replace returned null for value "%s"',
$word
));
}
return mb_strtolower($tableized);
}
/**
* Converts a word into the format for a Doctrine class name. Converts 'table_name' to 'TableName'.
*/
public function classify(string $word): string
{
return str_replace([' ', '_', '-'], '', ucwords($word, ' _-'));
}
/**
* Camelizes a word. This uses the classify() method and turns the first character to lowercase.
*/
public function camelize(string $word): string
{
return lcfirst($this->classify($word));
}
/**
* Uppercases words with configurable delimiters between words.
*
* Takes a string and capitalizes all of the words, like PHP's built-in
* ucwords function. This extends that behavior, however, by allowing the
* word delimiters to be configured, rather than only separating on
* whitespace.
*
* Here is an example:
* <code>
* <?php
* $string = 'top-o-the-morning to all_of_you!';
* echo $inflector->capitalize($string);
* // Top-O-The-Morning To All_of_you!
*
* echo $inflector->capitalize($string, '-_ ');
* // Top-O-The-Morning To All_Of_You!
* ?>
* </code>
*
* @param string $string The string to operate on.
* @param string $delimiters A list of word separators.
*
* @return string The string with all delimiter-separated words capitalized.
*/
public function capitalize(string $string, string $delimiters = " \n\t\r\0\x0B-"): string
{
return ucwords($string, $delimiters);
}
/**
* Checks if the given string seems like it has utf8 characters in it.
*
* @param string $string The string to check for utf8 characters in.
*/
public function seemsUtf8(string $string): bool
{
for ($i = 0; $i < strlen($string); $i++) {
if (ord($string[$i]) < 0x80) {
continue; // 0bbbbbbb
}
if ((ord($string[$i]) & 0xE0) === 0xC0) {
$n = 1; // 110bbbbb
} elseif ((ord($string[$i]) & 0xF0) === 0xE0) {
$n = 2; // 1110bbbb
} elseif ((ord($string[$i]) & 0xF8) === 0xF0) {
$n = 3; // 11110bbb
} elseif ((ord($string[$i]) & 0xFC) === 0xF8) {
$n = 4; // 111110bb
} elseif ((ord($string[$i]) & 0xFE) === 0xFC) {
$n = 5; // 1111110b
} else {
return false; // Does not match any model
}
for ($j = 0; $j < $n; $j++) { // n bytes matching 10bbbbbb follow ?
if (++$i === strlen($string) || ((ord($string[$i]) & 0xC0) !== 0x80)) {
return false;
}
}
}
return true;
}
/**
* Remove any illegal characters, accents, etc.
*
* @param string $string String to unaccent
*
* @return string Unaccented string
*/
public function unaccent(string $string): string
{
if (preg_match('/[\x80-\xff]/', $string) === false) {
return $string;
}
if ($this->seemsUtf8($string)) {
$string = strtr($string, self::ACCENTED_CHARACTERS);
} else {
$characters = [];
// Assume ISO-8859-1 if not UTF-8
$characters['in'] =
chr(128)
. chr(131)
. chr(138)
. chr(142)
. chr(154)
. chr(158)
. chr(159)
. chr(162)
. chr(165)
. chr(181)
. chr(192)
. chr(193)
. chr(194)
. chr(195)
. chr(196)
. chr(197)
. chr(199)
. chr(200)
. chr(201)
. chr(202)
. chr(203)
. chr(204)
. chr(205)
. chr(206)
. chr(207)
. chr(209)
. chr(210)
. chr(211)
. chr(212)
. chr(213)
. chr(214)
. chr(216)
. chr(217)
. chr(218)
. chr(219)
. chr(220)
. chr(221)
. chr(224)
. chr(225)
. chr(226)
. chr(227)
. chr(228)
. chr(229)
. chr(231)
. chr(232)
. chr(233)
. chr(234)
. chr(235)
. chr(236)
. chr(237)
. chr(238)
. chr(239)
. chr(241)
. chr(242)
. chr(243)
. chr(244)
. chr(245)
. chr(246)
. chr(248)
. chr(249)
. chr(250)
. chr(251)
. chr(252)
. chr(253)
. chr(255);
$characters['out'] = 'EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy';
$string = strtr($string, $characters['in'], $characters['out']);
$doubleChars = [];
$doubleChars['in'] = [
chr(140),
chr(156),
chr(198),
chr(208),
chr(222),
chr(223),
chr(230),
chr(240),
chr(254),
];
$doubleChars['out'] = ['OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th'];
$string = str_replace($doubleChars['in'], $doubleChars['out'], $string);
}
return $string;
}
/**
* Convert any passed string to a url friendly string.
* Converts 'My first blog post' to 'my-first-blog-post'
*
* @param string $string String to urlize.
*
* @return string Urlized string.
*/
public function urlize(string $string): string
{
// Remove all non url friendly characters with the unaccent function
$unaccented = $this->unaccent($string);
if (function_exists('mb_strtolower')) {
$lowered = mb_strtolower($unaccented);
} else {
$lowered = strtolower($unaccented);
}
$replacements = [
'/\W/' => ' ',
'/([A-Z]+)([A-Z][a-z])/' => '\1_\2',
'/([a-z\d])([A-Z])/' => '\1_\2',
'/[^A-Z^a-z^0-9^\/]+/' => '-',
];
$urlized = $lowered;
foreach ($replacements as $pattern => $replacement) {
$replaced = preg_replace($pattern, $replacement, $urlized);
if ($replaced === null) {
throw new RuntimeException(sprintf(
'preg_replace returned null for value "%s"',
$urlized
));
}
$urlized = $replaced;
}
return trim($urlized, '-');
}
/**
* Returns a word in singular form.
*
* @param string $word The word in plural form.
*
* @return string The word in singular form.
*/
public function singularize(string $word): string
{
return $this->singularizer->inflect($word);
}
/**
* Returns a word in plural form.
*
* @param string $word The word in singular form.
*
* @return string The word in plural form.
*/
public function pluralize(string $word): string
{
return $this->pluralizer->inflect($word);
}
}