View file vendor/symfony/string/UnicodeString.php

File size: 12.57Kb
  1. <?php
  2.  
  3. /*
  4. * This file is part of the Symfony package.
  5. *
  6. * (c) Fabien Potencier <fabien@symfony.com>
  7. *
  8. * For the full copyright and license information, please view the LICENSE
  9. * file that was distributed with this source code.
  10. */
  11.  
  12. namespace Symfony\Component\String;
  13.  
  14. use Symfony\Component\String\Exception\ExceptionInterface;
  15. use Symfony\Component\String\Exception\InvalidArgumentException;
  16.  
  17. /**
  18. * Represents a string of Unicode grapheme clusters encoded as UTF-8.
  19. *
  20. * A letter followed by combining characters (accents typically) form what Unicode defines
  21. * as a grapheme cluster: a character as humans mean it in written texts. This class knows
  22. * about the concept and won't split a letter apart from its combining accents. It also
  23. * ensures all string comparisons happen on their canonically-composed representation,
  24. * ignoring e.g. the order in which accents are listed when a letter has many of them.
  25. *
  26. * @see https://unicode.org/reports/tr15/
  27. *
  28. * @author Nicolas Grekas <p@tchwork.com>
  29. * @author Hugo Hamon <hugohamon@neuf.fr>
  30. *
  31. * @throws ExceptionInterface
  32. */
  33. class UnicodeString extends AbstractUnicodeString
  34. {
  35. public function __construct(string $string = '')
  36. {
  37. $this->string = normalizer_is_normalized($string) ? $string : normalizer_normalize($string);
  38.  
  39. if (false === $this->string) {
  40. throw new InvalidArgumentException('Invalid UTF-8 string.');
  41. }
  42. }
  43.  
  44. public function append(string ...$suffix): AbstractString
  45. {
  46. $str = clone $this;
  47. $str->string = $this->string.(1 >= \count($suffix) ? ($suffix[0] ?? '') : implode('', $suffix));
  48. normalizer_is_normalized($str->string) ?: $str->string = normalizer_normalize($str->string);
  49.  
  50. if (false === $str->string) {
  51. throw new InvalidArgumentException('Invalid UTF-8 string.');
  52. }
  53.  
  54. return $str;
  55. }
  56.  
  57. public function chunk(int $length = 1): array
  58. {
  59. if (1 > $length) {
  60. throw new InvalidArgumentException('The chunk length must be greater than zero.');
  61. }
  62.  
  63. if ('' === $this->string) {
  64. return [];
  65. }
  66.  
  67. $rx = '/(';
  68. while (65535 < $length) {
  69. $rx .= '\X{65535}';
  70. $length -= 65535;
  71. }
  72. $rx .= '\X{'.$length.'})/u';
  73.  
  74. $str = clone $this;
  75. $chunks = [];
  76.  
  77. foreach (preg_split($rx, $this->string, -1, \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY) as $chunk) {
  78. $str->string = $chunk;
  79. $chunks[] = clone $str;
  80. }
  81.  
  82. return $chunks;
  83. }
  84.  
  85. public function endsWith($suffix): bool
  86. {
  87. if ($suffix instanceof AbstractString) {
  88. $suffix = $suffix->string;
  89. } elseif (\is_array($suffix) || $suffix instanceof \Traversable) {
  90. return parent::endsWith($suffix);
  91. } else {
  92. $suffix = (string) $suffix;
  93. }
  94.  
  95. $form = null === $this->ignoreCase ? \Normalizer::NFD : \Normalizer::NFC;
  96. normalizer_is_normalized($suffix, $form) ?: $suffix = normalizer_normalize($suffix, $form);
  97.  
  98. if ('' === $suffix || false === $suffix) {
  99. return false;
  100. }
  101.  
  102. if ($this->ignoreCase) {
  103. return 0 === mb_stripos(grapheme_extract($this->string, \strlen($suffix), \GRAPHEME_EXTR_MAXBYTES, \strlen($this->string) - \strlen($suffix)), $suffix, 0, 'UTF-8');
  104. }
  105.  
  106. return $suffix === grapheme_extract($this->string, \strlen($suffix), \GRAPHEME_EXTR_MAXBYTES, \strlen($this->string) - \strlen($suffix));
  107. }
  108.  
  109. public function equalsTo($string): bool
  110. {
  111. if ($string instanceof AbstractString) {
  112. $string = $string->string;
  113. } elseif (\is_array($string) || $string instanceof \Traversable) {
  114. return parent::equalsTo($string);
  115. } else {
  116. $string = (string) $string;
  117. }
  118.  
  119. $form = null === $this->ignoreCase ? \Normalizer::NFD : \Normalizer::NFC;
  120. normalizer_is_normalized($string, $form) ?: $string = normalizer_normalize($string, $form);
  121.  
  122. if ('' !== $string && false !== $string && $this->ignoreCase) {
  123. return \strlen($string) === \strlen($this->string) && 0 === mb_stripos($this->string, $string, 0, 'UTF-8');
  124. }
  125.  
  126. return $string === $this->string;
  127. }
  128.  
  129. public function indexOf($needle, int $offset = 0): ?int
  130. {
  131. if ($needle instanceof AbstractString) {
  132. $needle = $needle->string;
  133. } elseif (\is_array($needle) || $needle instanceof \Traversable) {
  134. return parent::indexOf($needle, $offset);
  135. } else {
  136. $needle = (string) $needle;
  137. }
  138.  
  139. $form = null === $this->ignoreCase ? \Normalizer::NFD : \Normalizer::NFC;
  140. normalizer_is_normalized($needle, $form) ?: $needle = normalizer_normalize($needle, $form);
  141.  
  142. if ('' === $needle || false === $needle) {
  143. return null;
  144. }
  145.  
  146. try {
  147. $i = $this->ignoreCase ? grapheme_stripos($this->string, $needle, $offset) : grapheme_strpos($this->string, $needle, $offset);
  148. } catch (\ValueError $e) {
  149. return null;
  150. }
  151.  
  152. return false === $i ? null : $i;
  153. }
  154.  
  155. public function indexOfLast($needle, int $offset = 0): ?int
  156. {
  157. if ($needle instanceof AbstractString) {
  158. $needle = $needle->string;
  159. } elseif (\is_array($needle) || $needle instanceof \Traversable) {
  160. return parent::indexOfLast($needle, $offset);
  161. } else {
  162. $needle = (string) $needle;
  163. }
  164.  
  165. $form = null === $this->ignoreCase ? \Normalizer::NFD : \Normalizer::NFC;
  166. normalizer_is_normalized($needle, $form) ?: $needle = normalizer_normalize($needle, $form);
  167.  
  168. if ('' === $needle || false === $needle) {
  169. return null;
  170. }
  171.  
  172. $string = $this->string;
  173.  
  174. if (0 > $offset) {
  175. // workaround https://bugs.php.net/74264
  176. if (0 > $offset += grapheme_strlen($needle)) {
  177. $string = grapheme_substr($string, 0, $offset);
  178. }
  179. $offset = 0;
  180. }
  181.  
  182. $i = $this->ignoreCase ? grapheme_strripos($string, $needle, $offset) : grapheme_strrpos($string, $needle, $offset);
  183.  
  184. return false === $i ? null : $i;
  185. }
  186.  
  187. public function join(array $strings, string $lastGlue = null): AbstractString
  188. {
  189. $str = parent::join($strings, $lastGlue);
  190. normalizer_is_normalized($str->string) ?: $str->string = normalizer_normalize($str->string);
  191.  
  192. return $str;
  193. }
  194.  
  195. public function length(): int
  196. {
  197. return grapheme_strlen($this->string);
  198. }
  199.  
  200. /**
  201. * @return static
  202. */
  203. public function normalize(int $form = self::NFC): parent
  204. {
  205. $str = clone $this;
  206.  
  207. if (\in_array($form, [self::NFC, self::NFKC], true)) {
  208. normalizer_is_normalized($str->string, $form) ?: $str->string = normalizer_normalize($str->string, $form);
  209. } elseif (!\in_array($form, [self::NFD, self::NFKD], true)) {
  210. throw new InvalidArgumentException('Unsupported normalization form.');
  211. } elseif (!normalizer_is_normalized($str->string, $form)) {
  212. $str->string = normalizer_normalize($str->string, $form);
  213. $str->ignoreCase = null;
  214. }
  215.  
  216. return $str;
  217. }
  218.  
  219. public function prepend(string ...$prefix): AbstractString
  220. {
  221. $str = clone $this;
  222. $str->string = (1 >= \count($prefix) ? ($prefix[0] ?? '') : implode('', $prefix)).$this->string;
  223. normalizer_is_normalized($str->string) ?: $str->string = normalizer_normalize($str->string);
  224.  
  225. if (false === $str->string) {
  226. throw new InvalidArgumentException('Invalid UTF-8 string.');
  227. }
  228.  
  229. return $str;
  230. }
  231.  
  232. public function replace(string $from, string $to): AbstractString
  233. {
  234. $str = clone $this;
  235. normalizer_is_normalized($from) ?: $from = normalizer_normalize($from);
  236.  
  237. if ('' !== $from && false !== $from) {
  238. $tail = $str->string;
  239. $result = '';
  240. $indexOf = $this->ignoreCase ? 'grapheme_stripos' : 'grapheme_strpos';
  241.  
  242. while ('' !== $tail && false !== $i = $indexOf($tail, $from)) {
  243. $slice = grapheme_substr($tail, 0, $i);
  244. $result .= $slice.$to;
  245. $tail = substr($tail, \strlen($slice) + \strlen($from));
  246. }
  247.  
  248. $str->string = $result.$tail;
  249. normalizer_is_normalized($str->string) ?: $str->string = normalizer_normalize($str->string);
  250.  
  251. if (false === $str->string) {
  252. throw new InvalidArgumentException('Invalid UTF-8 string.');
  253. }
  254. }
  255.  
  256. return $str;
  257. }
  258.  
  259. public function replaceMatches(string $fromRegexp, $to): AbstractString
  260. {
  261. $str = parent::replaceMatches($fromRegexp, $to);
  262. normalizer_is_normalized($str->string) ?: $str->string = normalizer_normalize($str->string);
  263.  
  264. return $str;
  265. }
  266.  
  267. public function slice(int $start = 0, int $length = null): AbstractString
  268. {
  269. $str = clone $this;
  270.  
  271. if (\PHP_VERSION_ID < 80000 && 0 > $start && grapheme_strlen($this->string) < -$start) {
  272. $start = 0;
  273. }
  274. $str->string = (string) grapheme_substr($this->string, $start, $length ?? 2147483647);
  275.  
  276. return $str;
  277. }
  278.  
  279. public function splice(string $replacement, int $start = 0, int $length = null): AbstractString
  280. {
  281. $str = clone $this;
  282.  
  283. if (\PHP_VERSION_ID < 80000 && 0 > $start && grapheme_strlen($this->string) < -$start) {
  284. $start = 0;
  285. }
  286. $start = $start ? \strlen(grapheme_substr($this->string, 0, $start)) : 0;
  287. $length = $length ? \strlen(grapheme_substr($this->string, $start, $length ?? 2147483647)) : $length;
  288. $str->string = substr_replace($this->string, $replacement, $start, $length ?? 2147483647);
  289. normalizer_is_normalized($str->string) ?: $str->string = normalizer_normalize($str->string);
  290.  
  291. if (false === $str->string) {
  292. throw new InvalidArgumentException('Invalid UTF-8 string.');
  293. }
  294.  
  295. return $str;
  296. }
  297.  
  298. public function split(string $delimiter, int $limit = null, int $flags = null): array
  299. {
  300. if (1 > $limit = $limit ?? 2147483647) {
  301. throw new InvalidArgumentException('Split limit must be a positive integer.');
  302. }
  303.  
  304. if ('' === $delimiter) {
  305. throw new InvalidArgumentException('Split delimiter is empty.');
  306. }
  307.  
  308. if (null !== $flags) {
  309. return parent::split($delimiter.'u', $limit, $flags);
  310. }
  311.  
  312. normalizer_is_normalized($delimiter) ?: $delimiter = normalizer_normalize($delimiter);
  313.  
  314. if (false === $delimiter) {
  315. throw new InvalidArgumentException('Split delimiter is not a valid UTF-8 string.');
  316. }
  317.  
  318. $str = clone $this;
  319. $tail = $this->string;
  320. $chunks = [];
  321. $indexOf = $this->ignoreCase ? 'grapheme_stripos' : 'grapheme_strpos';
  322.  
  323. while (1 < $limit && false !== $i = $indexOf($tail, $delimiter)) {
  324. $str->string = grapheme_substr($tail, 0, $i);
  325. $chunks[] = clone $str;
  326. $tail = substr($tail, \strlen($str->string) + \strlen($delimiter));
  327. --$limit;
  328. }
  329.  
  330. $str->string = $tail;
  331. $chunks[] = clone $str;
  332.  
  333. return $chunks;
  334. }
  335.  
  336. public function startsWith($prefix): bool
  337. {
  338. if ($prefix instanceof AbstractString) {
  339. $prefix = $prefix->string;
  340. } elseif (\is_array($prefix) || $prefix instanceof \Traversable) {
  341. return parent::startsWith($prefix);
  342. } else {
  343. $prefix = (string) $prefix;
  344. }
  345.  
  346. $form = null === $this->ignoreCase ? \Normalizer::NFD : \Normalizer::NFC;
  347. normalizer_is_normalized($prefix, $form) ?: $prefix = normalizer_normalize($prefix, $form);
  348.  
  349. if ('' === $prefix || false === $prefix) {
  350. return false;
  351. }
  352.  
  353. if ($this->ignoreCase) {
  354. return 0 === mb_stripos(grapheme_extract($this->string, \strlen($prefix), \GRAPHEME_EXTR_MAXBYTES), $prefix, 0, 'UTF-8');
  355. }
  356.  
  357. return $prefix === grapheme_extract($this->string, \strlen($prefix), \GRAPHEME_EXTR_MAXBYTES);
  358. }
  359.  
  360. public function __wakeup()
  361. {
  362. if (!\is_string($this->string)) {
  363. throw new \BadMethodCallException('Cannot unserialize '.__CLASS__);
  364. }
  365.  
  366. normalizer_is_normalized($this->string) ?: $this->string = normalizer_normalize($this->string);
  367. }
  368.  
  369. public function __clone()
  370. {
  371. if (null === $this->ignoreCase) {
  372. normalizer_is_normalized($this->string) ?: $this->string = normalizer_normalize($this->string);
  373. }
  374.  
  375. $this->ignoreCase = false;
  376. }
  377. }