Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions src/Tokenizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,12 @@
'YEARWEEK',
];

/** @var list<string> */
private array $dataTypeModifiers = [
'WITH TIME ZONE',
'WITHOUT TIME ZONE',
];

/** Regular expression for tokenizing. */
private readonly string $tokenizeRegex;

Expand Down Expand Up @@ -834,11 +840,13 @@
private function makeTokenizeRegexes(): array
{
// Set up regular expressions
$regexBoundaries = $this->makeRegexFromList($this->boundaries);
$regexReserved = $this->makeRegexFromList($this->reserved);
$regexReservedToplevel = str_replace(' ', '\s+', $this->makeRegexFromList($this->reservedToplevel));
$regexReservedNewline = str_replace(' ', '\s+', $this->makeRegexFromList($this->reservedNewline));
$regexFunction = $this->makeRegexFromList($this->functions);

$regexBoundaries = $this->makeRegexFromList($this->boundaries);
$regexReserved = $this->makeRegexFromList($this->reserved);
$regexReservedToplevel = str_replace(' ', '\s+', $this->makeRegexFromList($this->reservedToplevel));
$regexReservedNewline = str_replace(' ', '\s+', $this->makeRegexFromList($this->reservedNewline));
$regexFunction = $this->makeRegexFromList($this->functions);
$regexDataTypeModifiers = str_replace(' ', '\s+', $this->makeRegexFromList($this->dataTypeModifiers));

return [
Token::TOKEN_TYPE_WHITESPACE => '\s+',
Expand Down Expand Up @@ -866,6 +874,10 @@
Token::TOKEN_TYPE_NUMBER => '(?:\d+(?:\.\d+)?|0x[\da-fA-F]+|0b[01]+)(?=$|\s|"\'`|' . $regexBoundaries . ')',
// punctuation and symbols
Token::TOKEN_TYPE_BOUNDARY => $regexBoundaries,
// data type modifiers, this make 'WITH TIMEZONE' to be different from the 'WITH" from CTE
Token::TOKEN_TYPE_RESERVED => '(?<!\.)' . $regexDataTypeModifiers . '(?=$|\s|' . $regexBoundaries . ')'

Check failure on line 878 in src/Tokenizer.php

View workflow job for this annotation

GitHub Actions / Static Analysis / PHPStan (PHP: 8.4)

Array has 2 duplicate keys with value 4 (\Doctrine\SqlFormatter\Token::TOKEN_TYPE_RESERVED, \Doctrine\SqlFormatter\Token::TOKEN_TYPE_RESERVED).
. '|(?<!\.)' . $regexReserved . '(?=$|\s|' . $regexBoundaries . ')'
. '|' . $regexFunction . '(?=\s*\()',
// A reserved word cannot be preceded by a '.'
// this makes it so in "mytable.from", "from" is not considered a reserved word
Token::TOKEN_TYPE_RESERVED_TOPLEVEL => '(?<!\.|\sCHARACTER\s(?=SET\s))' . $regexReservedToplevel . '(?=$|\s|' . $regexBoundaries . ')',
Expand Down
32 changes: 32 additions & 0 deletions tests/TokenizerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -1658,6 +1658,38 @@ public static function tokenizeData(): Generator
],
'/* foo...',
];

yield 'WITH TIME ZONE as single token' => [
[
new Token(Token::TOKEN_TYPE_RESERVED, 'TIMESTAMP'),
new Token(Token::TOKEN_TYPE_BOUNDARY, '('),
new Token(Token::TOKEN_TYPE_NUMBER, '0'),
new Token(Token::TOKEN_TYPE_BOUNDARY, ')'),
new Token(Token::TOKEN_TYPE_WHITESPACE, ' '),
new Token(Token::TOKEN_TYPE_RESERVED, 'WITH TIME ZONE'),
],
'TIMESTAMP(0) WITH TIME ZONE',
];

yield 'WITHOUT TIME ZONE as single token' => [
[
new Token(Token::TOKEN_TYPE_RESERVED, 'TIME'),
new Token(Token::TOKEN_TYPE_WHITESPACE, ' '),
new Token(Token::TOKEN_TYPE_RESERVED, 'WITHOUT TIME ZONE'),
],
'TIME WITHOUT TIME ZONE',
];

yield 'CTE WITH still works' => [
[
new Token(Token::TOKEN_TYPE_RESERVED_TOPLEVEL, 'WITH'),
new Token(Token::TOKEN_TYPE_WHITESPACE, ' '),
new Token(Token::TOKEN_TYPE_WORD, 'cte'),
new Token(Token::TOKEN_TYPE_WHITESPACE, ' '),
new Token(Token::TOKEN_TYPE_RESERVED, 'AS'),
],
'WITH cte AS',
];
}

public function testTokenizeLongConcat(): void
Expand Down
Loading