vendor/twig/twig/src/Lexer.php line 430

Open in your IDE?
  1. <?php
  2. /*
  3. * This file is part of Twig.
  4. *
  5. * (c) Fabien Potencier
  6. * (c) Armin Ronacher
  7. *
  8. * For the full copyright and license information, please view the LICENSE
  9. * file that was distributed with this source code.
  10. */
  11. namespace Twig;
  12. use Twig\Error\SyntaxError;
  13. /**
  14. * @author Fabien Potencier <fabien@symfony.com>
  15. */
  16. class Lexer
  17. {
  18. private $isInitialized = false;
  19. private $tokens;
  20. private $code;
  21. private $cursor;
  22. private $lineno;
  23. private $end;
  24. private $state;
  25. private $states;
  26. private $brackets;
  27. private $env;
  28. private $source;
  29. private $options;
  30. private $regexes;
  31. private $position;
  32. private $positions;
  33. private $currentVarBlockLine;
  34. private array $openingBrackets = ['{', '(', '['];
  35. private array $closingBrackets = ['}', ')', ']'];
  36. public const STATE_DATA = 0;
  37. public const STATE_BLOCK = 1;
  38. public const STATE_VAR = 2;
  39. public const STATE_STRING = 3;
  40. public const STATE_INTERPOLATION = 4;
  41. public const REGEX_NAME = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A';
  42. public const REGEX_STRING = '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
  43. public const REGEX_NUMBER = '/(?(DEFINE)
  44. (?<LNUM>[0-9]+(_[0-9]+)*) # Integers (with underscores) 123_456
  45. (?<FRAC>\.(?&LNUM)) # Fractional part .456
  46. (?<EXPONENT>[eE][+-]?(?&LNUM)) # Exponent part E+10
  47. (?<DNUM>(?&LNUM)(?:(?&FRAC))?) # Decimal number 123_456.456
  48. )(?:(?&DNUM)(?:(?&EXPONENT))?) # 123_456.456E+10
  49. /Ax';
  50. public const REGEX_DQ_STRING_DELIM = '/"/A';
  51. public const REGEX_DQ_STRING_PART = '/[^#"\\\\]*(?:(?:\\\\.|#(?!\{))[^#"\\\\]*)*/As';
  52. public const REGEX_INLINE_COMMENT = '/#[^\n]*/A';
  53. public const PUNCTUATION = '()[]{}?:.,|';
  54. private const SPECIAL_CHARS = [
  55. 'f' => "\f",
  56. 'n' => "\n",
  57. 'r' => "\r",
  58. 't' => "\t",
  59. 'v' => "\v",
  60. ];
  61. public function __construct(Environment $env, array $options = [])
  62. {
  63. $this->env = $env;
  64. $this->options = array_merge([
  65. 'tag_comment' => ['{#', '#}'],
  66. 'tag_block' => ['{%', '%}'],
  67. 'tag_variable' => ['{{', '}}'],
  68. 'whitespace_trim' => '-',
  69. 'whitespace_line_trim' => '~',
  70. 'whitespace_line_chars' => ' \t\0\x0B',
  71. 'interpolation' => ['#{', '}'],
  72. ], $options);
  73. }
  74. private function initialize(): void
  75. {
  76. if ($this->isInitialized) {
  77. return;
  78. }
  79. // when PHP 7.3 is the min version, we will be able to remove the '#' part in preg_quote as it's part of the default
  80. $this->regexes = [
  81. // }}
  82. 'lex_var' => '{
  83. \s*
  84. (?:'.
  85. preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '#').'\s*'. // -}}\s*
  86. '|'.
  87. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_variable'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~}}[ \t\0\x0B]*
  88. '|'.
  89. preg_quote($this->options['tag_variable'][1], '#'). // }}
  90. ')
  91. }Ax',
  92. // %}
  93. 'lex_block' => '{
  94. \s*
  95. (?:'.
  96. preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*\n?'. // -%}\s*\n?
  97. '|'.
  98. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  99. '|'.
  100. preg_quote($this->options['tag_block'][1], '#').'\n?'. // %}\n?
  101. ')
  102. }Ax',
  103. // {% endverbatim %}
  104. 'lex_raw_data' => '{'.
  105. preg_quote($this->options['tag_block'][0], '#'). // {%
  106. '('.
  107. $this->options['whitespace_trim']. // -
  108. '|'.
  109. $this->options['whitespace_line_trim']. // ~
  110. ')?\s*endverbatim\s*'.
  111. '(?:'.
  112. preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%}
  113. '|'.
  114. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  115. '|'.
  116. preg_quote($this->options['tag_block'][1], '#'). // %}
  117. ')
  118. }sx',
  119. 'operator' => $this->getOperatorRegex(),
  120. // #}
  121. 'lex_comment' => '{
  122. (?:'.
  123. preg_quote($this->options['whitespace_trim'].$this->options['tag_comment'][1], '#').'\s*\n?'. // -#}\s*\n?
  124. '|'.
  125. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_comment'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~#}[ \t\0\x0B]*
  126. '|'.
  127. preg_quote($this->options['tag_comment'][1], '#').'\n?'. // #}\n?
  128. ')
  129. }sx',
  130. // verbatim %}
  131. 'lex_block_raw' => '{
  132. \s*verbatim\s*
  133. (?:'.
  134. preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%}\s*
  135. '|'.
  136. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  137. '|'.
  138. preg_quote($this->options['tag_block'][1], '#'). // %}
  139. ')
  140. }Asx',
  141. 'lex_block_line' => '{\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '#').'}As',
  142. // {{ or {% or {#
  143. 'lex_tokens_start' => '{
  144. ('.
  145. preg_quote($this->options['tag_variable'][0], '#'). // {{
  146. '|'.
  147. preg_quote($this->options['tag_block'][0], '#'). // {%
  148. '|'.
  149. preg_quote($this->options['tag_comment'][0], '#'). // {#
  150. ')('.
  151. preg_quote($this->options['whitespace_trim'], '#'). // -
  152. '|'.
  153. preg_quote($this->options['whitespace_line_trim'], '#'). // ~
  154. ')?
  155. }sx',
  156. 'interpolation_start' => '{'.preg_quote($this->options['interpolation'][0], '#').'\s*}A',
  157. 'interpolation_end' => '{\s*'.preg_quote($this->options['interpolation'][1], '#').'}A',
  158. ];
  159. $this->isInitialized = true;
  160. }
  161. public function tokenize(Source $source): TokenStream
  162. {
  163. $this->initialize();
  164. $this->source = $source;
  165. $this->code = str_replace(["\r\n", "\r"], "\n", $source->getCode());
  166. $this->cursor = 0;
  167. $this->lineno = 1;
  168. $this->end = \strlen($this->code);
  169. $this->tokens = [];
  170. $this->state = self::STATE_DATA;
  171. $this->states = [];
  172. $this->brackets = [];
  173. $this->position = -1;
  174. // find all token starts in one go
  175. preg_match_all($this->regexes['lex_tokens_start'], $this->code, $matches, \PREG_OFFSET_CAPTURE);
  176. $this->positions = $matches;
  177. while ($this->cursor < $this->end) {
  178. // dispatch to the lexing functions depending
  179. // on the current state
  180. switch ($this->state) {
  181. case self::STATE_DATA:
  182. $this->lexData();
  183. break;
  184. case self::STATE_BLOCK:
  185. $this->lexBlock();
  186. break;
  187. case self::STATE_VAR:
  188. $this->lexVar();
  189. break;
  190. case self::STATE_STRING:
  191. $this->lexString();
  192. break;
  193. case self::STATE_INTERPOLATION:
  194. $this->lexInterpolation();
  195. break;
  196. }
  197. }
  198. $this->pushToken(Token::EOF_TYPE);
  199. if ($this->brackets) {
  200. [$expect, $lineno] = array_pop($this->brackets);
  201. throw new SyntaxError(\sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
  202. }
  203. return new TokenStream($this->tokens, $this->source);
  204. }
  205. private function lexData(): void
  206. {
  207. // if no matches are left we return the rest of the template as simple text token
  208. if ($this->position == \count($this->positions[0]) - 1) {
  209. $this->pushToken(Token::TEXT_TYPE, substr($this->code, $this->cursor));
  210. $this->cursor = $this->end;
  211. return;
  212. }
  213. // Find the first token after the current cursor
  214. $position = $this->positions[0][++$this->position];
  215. while ($position[1] < $this->cursor) {
  216. if ($this->position == \count($this->positions[0]) - 1) {
  217. return;
  218. }
  219. $position = $this->positions[0][++$this->position];
  220. }
  221. // push the template text first
  222. $text = $textContent = substr($this->code, $this->cursor, $position[1] - $this->cursor);
  223. // trim?
  224. if (isset($this->positions[2][$this->position][0])) {
  225. if ($this->options['whitespace_trim'] === $this->positions[2][$this->position][0]) {
  226. // whitespace_trim detected ({%-, {{- or {#-)
  227. $text = rtrim($text);
  228. } elseif ($this->options['whitespace_line_trim'] === $this->positions[2][$this->position][0]) {
  229. // whitespace_line_trim detected ({%~, {{~ or {#~)
  230. // don't trim \r and \n
  231. $text = rtrim($text, " \t\0\x0B");
  232. }
  233. }
  234. $this->pushToken(Token::TEXT_TYPE, $text);
  235. $this->moveCursor($textContent.$position[0]);
  236. switch ($this->positions[1][$this->position][0]) {
  237. case $this->options['tag_comment'][0]:
  238. $this->lexComment();
  239. break;
  240. case $this->options['tag_block'][0]:
  241. // raw data?
  242. if (preg_match($this->regexes['lex_block_raw'], $this->code, $match, 0, $this->cursor)) {
  243. $this->moveCursor($match[0]);
  244. $this->lexRawData();
  245. // {% line \d+ %}
  246. } elseif (preg_match($this->regexes['lex_block_line'], $this->code, $match, 0, $this->cursor)) {
  247. $this->moveCursor($match[0]);
  248. $this->lineno = (int) $match[1];
  249. } else {
  250. $this->pushToken(Token::BLOCK_START_TYPE);
  251. $this->pushState(self::STATE_BLOCK);
  252. $this->currentVarBlockLine = $this->lineno;
  253. }
  254. break;
  255. case $this->options['tag_variable'][0]:
  256. $this->pushToken(Token::VAR_START_TYPE);
  257. $this->pushState(self::STATE_VAR);
  258. $this->currentVarBlockLine = $this->lineno;
  259. break;
  260. }
  261. }
  262. private function lexBlock(): void
  263. {
  264. if (!$this->brackets && preg_match($this->regexes['lex_block'], $this->code, $match, 0, $this->cursor)) {
  265. $this->pushToken(Token::BLOCK_END_TYPE);
  266. $this->moveCursor($match[0]);
  267. $this->popState();
  268. } else {
  269. $this->lexExpression();
  270. }
  271. }
  272. private function lexVar(): void
  273. {
  274. if (!$this->brackets && preg_match($this->regexes['lex_var'], $this->code, $match, 0, $this->cursor)) {
  275. $this->pushToken(Token::VAR_END_TYPE);
  276. $this->moveCursor($match[0]);
  277. $this->popState();
  278. } else {
  279. $this->lexExpression();
  280. }
  281. }
  282. private function lexExpression(): void
  283. {
  284. // whitespace
  285. if (preg_match('/\s+/A', $this->code, $match, 0, $this->cursor)) {
  286. $this->moveCursor($match[0]);
  287. if ($this->cursor >= $this->end) {
  288. throw new SyntaxError(\sprintf('Unclosed "%s".', self::STATE_BLOCK === $this->state ? 'block' : 'variable'), $this->currentVarBlockLine, $this->source);
  289. }
  290. }
  291. // operators
  292. if (preg_match($this->regexes['operator'], $this->code, $match, 0, $this->cursor)) {
  293. $operator = preg_replace('/\s+/', ' ', $match[0]);
  294. if (\in_array($operator, $this->openingBrackets, true)) {
  295. $this->checkBrackets($operator);
  296. }
  297. $this->pushToken(Token::OPERATOR_TYPE, $operator);
  298. $this->moveCursor($match[0]);
  299. }
  300. // names
  301. elseif (preg_match(self::REGEX_NAME, $this->code, $match, 0, $this->cursor)) {
  302. $this->pushToken(Token::NAME_TYPE, $match[0]);
  303. $this->moveCursor($match[0]);
  304. }
  305. // numbers
  306. elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, 0, $this->cursor)) {
  307. $this->pushToken(Token::NUMBER_TYPE, 0 + str_replace('_', '', $match[0]));
  308. $this->moveCursor($match[0]);
  309. }
  310. // punctuation
  311. elseif (str_contains(self::PUNCTUATION, $this->code[$this->cursor])) {
  312. $this->checkBrackets($this->code[$this->cursor]);
  313. $this->pushToken(Token::PUNCTUATION_TYPE, $this->code[$this->cursor]);
  314. ++$this->cursor;
  315. }
  316. // strings
  317. elseif (preg_match(self::REGEX_STRING, $this->code, $match, 0, $this->cursor)) {
  318. $this->pushToken(Token::STRING_TYPE, $this->stripcslashes(substr($match[0], 1, -1), substr($match[0], 0, 1)));
  319. $this->moveCursor($match[0]);
  320. }
  321. // opening double quoted string
  322. elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
  323. $this->brackets[] = ['"', $this->lineno];
  324. $this->pushState(self::STATE_STRING);
  325. $this->moveCursor($match[0]);
  326. }
  327. // inline comment
  328. elseif (preg_match(self::REGEX_INLINE_COMMENT, $this->code, $match, 0, $this->cursor)) {
  329. $this->moveCursor($match[0]);
  330. }
  331. // unlexable
  332. else {
  333. throw new SyntaxError(\sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
  334. }
  335. }
  336. private function stripcslashes(string $str, string $quoteType): string
  337. {
  338. $result = '';
  339. $length = \strlen($str);
  340. $i = 0;
  341. while ($i < $length) {
  342. if (false === $pos = strpos($str, '\\', $i)) {
  343. $result .= substr($str, $i);
  344. break;
  345. }
  346. $result .= substr($str, $i, $pos - $i);
  347. $i = $pos + 1;
  348. if ($i >= $length) {
  349. $result .= '\\';
  350. break;
  351. }
  352. $nextChar = $str[$i];
  353. if (isset(self::SPECIAL_CHARS[$nextChar])) {
  354. $result .= self::SPECIAL_CHARS[$nextChar];
  355. } elseif ('\\' === $nextChar) {
  356. $result .= $nextChar;
  357. } elseif ("'" === $nextChar || '"' === $nextChar) {
  358. if ($nextChar !== $quoteType) {
  359. trigger_deprecation('twig/twig', '3.12', 'Character "%s" should not be escaped; the "\" character is ignored in Twig 3 but will not be in Twig 4. Please remove the extra "\" character at position %d in "%s" at line %d.', $nextChar, $i + 1, $this->source->getName(), $this->lineno);
  360. }
  361. $result .= $nextChar;
  362. } elseif ('#' === $nextChar && $i + 1 < $length && '{' === $str[$i + 1]) {
  363. $result .= '#{';
  364. ++$i;
  365. } elseif ('x' === $nextChar && $i + 1 < $length && ctype_xdigit($str[$i + 1])) {
  366. $hex = $str[++$i];
  367. if ($i + 1 < $length && ctype_xdigit($str[$i + 1])) {
  368. $hex .= $str[++$i];
  369. }
  370. $result .= \chr(hexdec($hex));
  371. } elseif (ctype_digit($nextChar) && $nextChar < '8') {
  372. $octal = $nextChar;
  373. while ($i + 1 < $length && ctype_digit($str[$i + 1]) && $str[$i + 1] < '8' && \strlen($octal) < 3) {
  374. $octal .= $str[++$i];
  375. }
  376. $result .= \chr(octdec($octal));
  377. } else {
  378. trigger_deprecation('twig/twig', '3.12', 'Character "%s" should not be escaped; the "\" character is ignored in Twig 3 but will not be in Twig 4. Please remove the extra "\" character at position %d in "%s" at line %d.', $nextChar, $i + 1, $this->source->getName(), $this->lineno);
  379. $result .= $nextChar;
  380. }
  381. ++$i;
  382. }
  383. return $result;
  384. }
  385. private function lexRawData(): void
  386. {
  387. if (!preg_match($this->regexes['lex_raw_data'], $this->code, $match, \PREG_OFFSET_CAPTURE, $this->cursor)) {
  388. throw new SyntaxError('Unexpected end of file: Unclosed "verbatim" block.', $this->lineno, $this->source);
  389. }
  390. $text = substr($this->code, $this->cursor, $match[0][1] - $this->cursor);
  391. $this->moveCursor($text.$match[0][0]);
  392. // trim?
  393. if (isset($match[1][0])) {
  394. if ($this->options['whitespace_trim'] === $match[1][0]) {
  395. // whitespace_trim detected ({%-, {{- or {#-)
  396. $text = rtrim($text);
  397. } else {
  398. // whitespace_line_trim detected ({%~, {{~ or {#~)
  399. // don't trim \r and \n
  400. $text = rtrim($text, " \t\0\x0B");
  401. }
  402. }
  403. $this->pushToken(Token::TEXT_TYPE, $text);
  404. }
  405. private function lexComment(): void
  406. {
  407. if (!preg_match($this->regexes['lex_comment'], $this->code, $match, \PREG_OFFSET_CAPTURE, $this->cursor)) {
  408. throw new SyntaxError('Unclosed comment.', $this->lineno, $this->source);
  409. }
  410. $this->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor).$match[0][0]);
  411. }
  412. private function lexString(): void
  413. {
  414. if (preg_match($this->regexes['interpolation_start'], $this->code, $match, 0, $this->cursor)) {
  415. $this->brackets[] = [$this->options['interpolation'][0], $this->lineno];
  416. $this->pushToken(Token::INTERPOLATION_START_TYPE);
  417. $this->moveCursor($match[0]);
  418. $this->pushState(self::STATE_INTERPOLATION);
  419. } elseif (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, 0, $this->cursor) && '' !== $match[0]) {
  420. $this->pushToken(Token::STRING_TYPE, $this->stripcslashes($match[0], '"'));
  421. $this->moveCursor($match[0]);
  422. } elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
  423. [$expect, $lineno] = array_pop($this->brackets);
  424. if ('"' != $this->code[$this->cursor]) {
  425. throw new SyntaxError(\sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
  426. }
  427. $this->popState();
  428. ++$this->cursor;
  429. } else {
  430. // unlexable
  431. throw new SyntaxError(\sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
  432. }
  433. }
  434. private function lexInterpolation(): void
  435. {
  436. $bracket = end($this->brackets);
  437. if ($this->options['interpolation'][0] === $bracket[0] && preg_match($this->regexes['interpolation_end'], $this->code, $match, 0, $this->cursor)) {
  438. array_pop($this->brackets);
  439. $this->pushToken(Token::INTERPOLATION_END_TYPE);
  440. $this->moveCursor($match[0]);
  441. $this->popState();
  442. } else {
  443. $this->lexExpression();
  444. }
  445. }
  446. private function pushToken($type, $value = ''): void
  447. {
  448. // do not push empty text tokens
  449. if (Token::TEXT_TYPE === $type && '' === $value) {
  450. return;
  451. }
  452. $this->tokens[] = new Token($type, $value, $this->lineno);
  453. }
  454. private function moveCursor($text): void
  455. {
  456. $this->cursor += \strlen($text);
  457. $this->lineno += substr_count($text, "\n");
  458. }
  459. private function getOperatorRegex(): string
  460. {
  461. $expressionParsers = ['='];
  462. foreach ($this->env->getExpressionParsers() as $expressionParser) {
  463. $expressionParsers = array_merge($expressionParsers, [$expressionParser->getName()], $expressionParser->getAliases());
  464. }
  465. $expressionParsers = array_combine($expressionParsers, array_map('strlen', $expressionParsers));
  466. arsort($expressionParsers);
  467. $regex = [];
  468. foreach ($expressionParsers as $expressionParser => $length) {
  469. // an operator that ends with a character must be followed by
  470. // a whitespace, a parenthesis, an opening map [ or sequence {
  471. $r = preg_quote($expressionParser, '/');
  472. if (ctype_alpha($expressionParser[$length - 1])) {
  473. $r .= '(?=[\s()\[{])';
  474. }
  475. // an operator that begins with a character must not have a dot or pipe before
  476. if (ctype_alpha($expressionParser[0])) {
  477. $r = '(?<![\.\|])'.$r;
  478. }
  479. // an operator with a space can be any amount of whitespaces
  480. $r = preg_replace('/\s+/', '\s+', $r);
  481. $regex[] = $r;
  482. }
  483. return '/'.implode('|', $regex).'/A';
  484. }
  485. private function pushState($state): void
  486. {
  487. $this->states[] = $this->state;
  488. $this->state = $state;
  489. }
  490. private function popState(): void
  491. {
  492. if (0 === \count($this->states)) {
  493. throw new \LogicException('Cannot pop state without a previous state.');
  494. }
  495. $this->state = array_pop($this->states);
  496. }
  497. private function checkBrackets(string $code): void
  498. {
  499. // opening bracket
  500. if (\in_array($code, $this->openingBrackets, true)) {
  501. $this->brackets[] = [$code, $this->lineno];
  502. } elseif (\in_array($code, $this->closingBrackets, true)) {
  503. // closing bracket
  504. if (!$this->brackets) {
  505. throw new SyntaxError(\sprintf('Unexpected "%s".', $code), $this->lineno, $this->source);
  506. }
  507. [$expect, $lineno] = array_pop($this->brackets);
  508. if ($code !== str_replace($this->openingBrackets, $this->closingBrackets, $expect)) {
  509. throw new SyntaxError(\sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
  510. }
  511. }
  512. }
  513. }