Skip to content

Commit bd2097a

Browse files
fix(duckdb): Added generic implementation
1 parent 1bdeb3e commit bd2097a

File tree

3 files changed

+146
-96
lines changed

3 files changed

+146
-96
lines changed

sqlglot/dialects/bigquery.py

Lines changed: 70 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -152,32 +152,6 @@ def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression:
152152
return expression
153153

154154

155-
def _normalize_week_units(expression: exp.Expression) -> exp.Expression:
156-
"""
157-
Normalize WEEK and ISOWEEK units in DATE_DIFF to WeekStart expressions.
158-
159-
Transformations:
160-
- WEEK -> WeekStart(this=Var('SUNDAY'))
161-
- ISOWEEK -> WeekStart(this=Var('MONDAY'))
162-
163-
Note: WEEK(day) is already parsed as WeekStart by the parser.
164-
"""
165-
if isinstance(expression, exp.DateDiff):
166-
unit = expression.args.get("unit")
167-
168-
if isinstance(unit, exp.Var):
169-
unit_name = unit.this.upper() if isinstance(unit.this, str) else str(unit.this)
170-
171-
if unit_name == "WEEK":
172-
# BigQuery's WEEK uses Sunday as the start of the week
173-
expression.set("unit", exp.WeekStart(this=exp.var("SUNDAY")))
174-
elif unit_name == "ISOWEEK":
175-
# ISOWEEK uses Monday as the start of the week
176-
expression.set("unit", exp.WeekStart(this=exp.var("MONDAY")))
177-
178-
return expression
179-
180-
181155
def _build_parse_timestamp(args: t.List) -> exp.StrToTime:
182156
this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)])
183157
this.set("zone", seq_get(args, 2))
@@ -234,6 +208,34 @@ def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) ->
234208
return self.func("DATE_DIFF", expression.this, expression.expression, unit)
235209

236210

211+
def _serialize_bq_datetime_diff_unit(self: BigQuery.Generator, expression: exp.Expression) -> str:
212+
"""
213+
Serialize unit for *_DIFF functions, converting Week expressions to BigQuery syntax.
214+
215+
Canonical form -> BigQuery syntax:
216+
- Week(SUNDAY) -> WEEK (BigQuery's default)
217+
- Week(MONDAY) -> ISOWEEK
218+
- Week(other day) -> WEEK(day)
219+
- Other units -> use unit_to_var
220+
221+
"""
222+
from sqlglot.dialects.dialect import extract_week_unit_info
223+
224+
unit = expression.args.get("unit")
225+
day_name = extract_week_unit_info(unit, include_dow=False)
226+
227+
if day_name and isinstance(day_name, str):
228+
if day_name == "SUNDAY":
229+
return self.sql(exp.var("WEEK"))
230+
elif day_name == "MONDAY":
231+
return self.sql(exp.var("ISOWEEK"))
232+
else:
233+
return self.sql(exp.Week(this=exp.var(day_name)))
234+
235+
unit_expr = unit_to_var(expression)
236+
return self.sql(unit_expr) if unit_expr else "DAY"
237+
238+
237239
def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str:
238240
scale = expression.args.get("scale")
239241
timestamp = expression.this
@@ -267,6 +269,46 @@ def _build_datetime(args: t.List) -> exp.Func:
267269
return exp.TimestampFromParts.from_arg_list(args)
268270

269271

272+
def _normalize_week_unit(unit: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
273+
"""
274+
In BigQuery, plain WEEK defaults to Sunday-start weeks.
275+
Normalize plain WEEK to WEEK(SUNDAY) to preserve the semantic in the AST for correct cross-dialect transpilation.
276+
"""
277+
unit_name = None
278+
279+
if isinstance(unit, exp.Var):
280+
unit_name = str(unit.this)
281+
elif isinstance(unit, exp.Column) and isinstance(unit.this, exp.Identifier):
282+
unit_name = str(unit.this.this)
283+
284+
if unit_name and unit_name.upper() == "WEEK":
285+
return exp.Week(this=exp.var("SUNDAY"))
286+
287+
return unit
288+
289+
290+
def build_date_time_diff_with_week_normalization(
291+
exp_class: t.Type[E],
292+
) -> t.Callable[[t.List], E]:
293+
"""
294+
Factory for *_DIFF functions that normalizes plain WEEK units to WEEK(SUNDAY).
295+
296+
These functions have signature: FUNC(expr1, expr2, date_part)
297+
where date_part is at argument index 2.
298+
299+
Supports: DATE_DIFF, DATETIME_DIFF, TIME_DIFF, TIMESTAMP_DIFF
300+
"""
301+
302+
def _builder(args: t.List) -> E:
303+
return exp_class(
304+
this=seq_get(args, 0),
305+
expression=seq_get(args, 1),
306+
unit=_normalize_week_unit(seq_get(args, 2)),
307+
)
308+
309+
return _builder
310+
311+
270312
def _build_regexp_extract(
271313
expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None
272314
) -> t.Callable[[t.List], E]:
@@ -435,17 +477,6 @@ class BigQuery(Dialect):
435477

436478
EXPRESSION_METADATA = EXPRESSION_METADATA.copy()
437479

438-
def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
439-
"""Parse SQL and normalize BigQuery-specific constructs to canonical form."""
440-
expressions = super().parse(sql, **opts)
441-
442-
# Normalize WEEK units in DATE_DIFF to canonical WeekStart expressions
443-
for expression in expressions:
444-
if expression:
445-
expression.transform(_normalize_week_units, copy=False)
446-
447-
return expressions
448-
449480
def normalize_identifier(self, expression: E) -> E:
450481
if (
451482
isinstance(expression, exp.Identifier)
@@ -569,6 +600,7 @@ class Parser(parser.Parser):
569600
"CONTAINS_SUBSTR": _build_contains_substring,
570601
"DATE": _build_date,
571602
"DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
603+
"DATE_DIFF": build_date_time_diff_with_week_normalization(exp.DateDiff),
572604
"DATE_SUB": build_date_delta_with_interval(exp.DateSub),
573605
"DATE_TRUNC": lambda args: exp.DateTrunc(
574606
unit=seq_get(args, 1),
@@ -1101,7 +1133,7 @@ class Generator(generator.Generator):
11011133
exp.CTE: transforms.preprocess([_pushdown_cte_column_names]),
11021134
exp.DateAdd: date_add_interval_sql("DATE", "ADD"),
11031135
exp.DateDiff: lambda self, e: self.func(
1104-
"DATE_DIFF", e.this, e.expression, self._date_diff_unit_sql(e)
1136+
"DATE_DIFF", e.this, e.expression, _serialize_bq_datetime_diff_unit(self, e)
11051137
),
11061138
exp.DateFromParts: rename_func("DATE"),
11071139
exp.DateStrToDate: datestrtodate_sql,
@@ -1354,40 +1386,6 @@ class Generator(generator.Generator):
13541386
"within",
13551387
}
13561388

1357-
def _date_diff_unit_sql(self, expression: exp.DateDiff) -> str:
1358-
"""
1359-
Convert canonical WeekStart expression back to BigQuery syntax.
1360-
1361-
Canonical form -> BigQuery syntax:
1362-
- WeekStart(SUNDAY) -> WEEK (BigQuery's default)
1363-
- WeekStart(MONDAY) -> ISOWEEK
1364-
- WeekStart(other day) -> WEEK(day)
1365-
- Other units -> use unit_to_var as normal
1366-
"""
1367-
unit = expression.args.get("unit")
1368-
1369-
if isinstance(unit, exp.WeekStart):
1370-
# Extract the day from WeekStart
1371-
day_var = unit.this
1372-
if isinstance(day_var, exp.Var):
1373-
day_name = (
1374-
day_var.this.upper() if isinstance(day_var.this, str) else str(day_var.this)
1375-
)
1376-
1377-
if day_name == "SUNDAY":
1378-
# BigQuery's default WEEK is Sunday-start
1379-
return self.sql(exp.var("WEEK"))
1380-
elif day_name == "MONDAY":
1381-
# Use ISOWEEK for Monday-start
1382-
return self.sql(exp.var("ISOWEEK"))
1383-
else:
1384-
# For other days, use WEEK(day) syntax
1385-
return self.sql(exp.Week(this=day_var))
1386-
1387-
# For other units, use default behavior
1388-
unit_expr = unit_to_var(expression)
1389-
return self.sql(unit_expr) if unit_expr else "DAY"
1390-
13911389
def datetrunc_sql(self, expression: exp.DateTrunc) -> str:
13921390
unit = expression.unit
13931391
unit_sql = unit.name if unit.is_string else self.sql(unit)

sqlglot/dialects/dialect.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,23 @@ class Dialect(metaclass=_Dialect):
687687
"CENTURIES": "CENTURY",
688688
}
689689

690+
WEEK_UNIT_SEMANTICS = {
691+
"WEEK": ("SUNDAY", 0),
692+
"ISOWEEK": ("MONDAY", 1),
693+
"WEEKISO": ("MONDAY", 1),
694+
}
695+
696+
# Days of week to DOW numbers (ISO standard: Monday=1, Sunday=0)
697+
WEEK_START_DAY_TO_DOW = {
698+
"MONDAY": 1,
699+
"TUESDAY": 2,
700+
"WEDNESDAY": 3,
701+
"THURSDAY": 4,
702+
"FRIDAY": 5,
703+
"SATURDAY": 6,
704+
"SUNDAY": 0,
705+
}
706+
690707
# Specifies what types a given type can be coerced into
691708
COERCES_TO: t.Dict[exp.DataType.Type, t.Set[exp.DataType.Type]] = {}
692709

@@ -1558,6 +1575,52 @@ def map_date_part(part, dialect: DialectType = Dialect):
15581575
return part
15591576

15601577

1578+
def extract_week_unit_info(
1579+
unit: t.Optional[exp.Expression], include_dow: bool = False
1580+
) -> t.Optional[t.Union[str, t.Tuple[str, int]]]:
1581+
"""
1582+
Extract week unit information from AST node.
1583+
1584+
This helper provides a unified way to handle week units across dialects.
1585+
1586+
Args:
1587+
unit: The unit expression (Var, Week, or WeekStart)
1588+
include_dow: If True, return (day_name, dow_number) tuple for transformations
1589+
If False, return just day_name string for serialization
1590+
1591+
Returns:
1592+
- If include_dow=False: day_name (e.g., "SUNDAY")
1593+
- If include_dow=True: (day_name, dow_number) (e.g., ("SUNDAY", 0))
1594+
- None if not a week unit
1595+
1596+
"""
1597+
# Handle plain Var expressions for ISOWEEK/WEEKISO only
1598+
# NOTE: Plain Var('WEEK') is NOT handled to avoid breaking other dialects
1599+
if isinstance(unit, exp.Var):
1600+
unit_name = unit.this.upper() if isinstance(unit.this, str) else str(unit.this)
1601+
# Only handle ISOWEEK/WEEKISO variants, not plain WEEK
1602+
if unit_name in ("ISOWEEK", "WEEKISO"):
1603+
week_info = Dialect.WEEK_UNIT_SEMANTICS.get(unit_name)
1604+
if week_info:
1605+
day_name, dow = week_info
1606+
return (day_name, dow) if include_dow else day_name
1607+
return None
1608+
1609+
# Handle Week/WeekStart expressions with explicit day
1610+
if isinstance(unit, (exp.Week, exp.WeekStart)):
1611+
day_var = unit.this
1612+
if isinstance(day_var, exp.Var):
1613+
day_name = day_var.this.upper() if isinstance(day_var.this, str) else str(day_var.this)
1614+
1615+
if include_dow:
1616+
dow_value = Dialect.WEEK_START_DAY_TO_DOW.get(day_name)
1617+
return (day_name, dow_value) if dow_value is not None else None
1618+
1619+
return day_name
1620+
1621+
return None
1622+
1623+
15611624
def no_last_day_sql(self: Generator, expression: exp.LastDay) -> str:
15621625
trunc_curr_date = exp.func("date_trunc", "month", expression.this)
15631626
plus_one_month = exp.func("date_add", trunc_curr_date, 1, "month")

sqlglot/dialects/duckdb.py

Lines changed: 13 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -68,17 +68,6 @@
6868
"\u001f": 31,
6969
}
7070

71-
# DuckDB's EXTRACT(DAYOFWEEK) returns: 0=Sunday, 1=Monday, ..., 6=Saturday
72-
_WEEK_START_DAY_TO_DOW = {
73-
"MONDAY": 1,
74-
"TUESDAY": 2,
75-
"WEDNESDAY": 3,
76-
"THURSDAY": 4,
77-
"FRIDAY": 5,
78-
"SATURDAY": 6,
79-
"SUNDAY": 0,
80-
}
81-
8271

8372
# BigQuery -> DuckDB conversion for the DATE function
8473
def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str:
@@ -262,22 +251,22 @@ def _implicit_datetime_cast(
262251

263252
def _extract_week_start_day(unit: t.Optional[exp.Expression]) -> t.Optional[t.Tuple[str, int]]:
264253
"""
265-
Extract week start day name and DOW number from a Week or WeekStart expression.
266-
"""
267-
if not isinstance(unit, (exp.Week, exp.WeekStart)):
268-
return None
254+
Extract week start day name and DOW number from a Week, WeekStart, or plain Var expression.
269255
270-
day_var = unit.this
271-
if not isinstance(day_var, exp.Var):
272-
return None
256+
Uses extract_week_unit_info(include_dow=True) for uniform week unit handling.
273257
274-
start_day = day_var.this.upper() if isinstance(day_var.this, str) else str(day_var.this)
275-
start_dow = _WEEK_START_DAY_TO_DOW.get(start_day)
276-
277-
if start_dow is None:
278-
return None
258+
Handles:
259+
- Var('WEEK') -> ('SUNDAY', 0) # BigQuery default
260+
- Var('ISOWEEK') -> ('MONDAY', 1)
261+
- Week(Var('day')) -> ('day', dow)
262+
- WeekStart(Var('day')) -> ('day', dow)
263+
"""
264+
from sqlglot.dialects.dialect import extract_week_unit_info
279265

280-
return (start_day, start_dow)
266+
# Use shared helper with include_dow=True to get (day_name, dow_number)
267+
result = extract_week_unit_info(unit, include_dow=True)
268+
# When include_dow=True, result is either None or Tuple[str, int]
269+
return result if result is None or isinstance(result, tuple) else None
281270

282271

283272
def _build_week_trunc_expression(date_expr: exp.Expression, start_dow: int) -> exp.Expression:

0 commit comments

Comments
 (0)