Skip to content

Commit 1bdeb3e

Browse files
feat(duckdb): Handle WEEK, WEEK(day), ISOWEEK transpilation for DATE_DIFF function
1 parent df13a65 commit 1bdeb3e

File tree

4 files changed

+210
-1
lines changed

4 files changed

+210
-1
lines changed

sqlglot/dialects/bigquery.py

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,32 @@ def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression:
152152
return expression
153153

154154

155+
def _normalize_week_units(expression: exp.Expression) -> exp.Expression:
156+
"""
157+
Normalize WEEK and ISOWEEK units in DATE_DIFF to WeekStart expressions.
158+
159+
Transformations:
160+
- WEEK -> WeekStart(this=Var('SUNDAY'))
161+
- ISOWEEK -> WeekStart(this=Var('MONDAY'))
162+
163+
Note: WEEK(day) is already parsed as WeekStart by the parser.
164+
"""
165+
if isinstance(expression, exp.DateDiff):
166+
unit = expression.args.get("unit")
167+
168+
if isinstance(unit, exp.Var):
169+
unit_name = unit.this.upper() if isinstance(unit.this, str) else str(unit.this)
170+
171+
if unit_name == "WEEK":
172+
# BigQuery's WEEK uses Sunday as the start of the week
173+
expression.set("unit", exp.WeekStart(this=exp.var("SUNDAY")))
174+
elif unit_name == "ISOWEEK":
175+
# ISOWEEK uses Monday as the start of the week
176+
expression.set("unit", exp.WeekStart(this=exp.var("MONDAY")))
177+
178+
return expression
179+
180+
155181
def _build_parse_timestamp(args: t.List) -> exp.StrToTime:
156182
this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)])
157183
this.set("zone", seq_get(args, 2))
@@ -409,6 +435,17 @@ class BigQuery(Dialect):
409435

410436
EXPRESSION_METADATA = EXPRESSION_METADATA.copy()
411437

438+
def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
439+
"""Parse SQL and normalize BigQuery-specific constructs to canonical form."""
440+
expressions = super().parse(sql, **opts)
441+
442+
# Normalize WEEK units in DATE_DIFF to canonical WeekStart expressions
443+
for expression in expressions:
444+
if expression:
445+
expression.transform(_normalize_week_units, copy=False)
446+
447+
return expressions
448+
412449
def normalize_identifier(self, expression: E) -> E:
413450
if (
414451
isinstance(expression, exp.Identifier)
@@ -1064,7 +1101,7 @@ class Generator(generator.Generator):
10641101
exp.CTE: transforms.preprocess([_pushdown_cte_column_names]),
10651102
exp.DateAdd: date_add_interval_sql("DATE", "ADD"),
10661103
exp.DateDiff: lambda self, e: self.func(
1067-
"DATE_DIFF", e.this, e.expression, unit_to_var(e)
1104+
"DATE_DIFF", e.this, e.expression, self._date_diff_unit_sql(e)
10681105
),
10691106
exp.DateFromParts: rename_func("DATE"),
10701107
exp.DateStrToDate: datestrtodate_sql,
@@ -1317,6 +1354,40 @@ class Generator(generator.Generator):
13171354
"within",
13181355
}
13191356

1357+
def _date_diff_unit_sql(self, expression: exp.DateDiff) -> str:
1358+
"""
1359+
Convert canonical WeekStart expression back to BigQuery syntax.
1360+
1361+
Canonical form -> BigQuery syntax:
1362+
- WeekStart(SUNDAY) -> WEEK (BigQuery's default)
1363+
- WeekStart(MONDAY) -> ISOWEEK
1364+
- WeekStart(other day) -> WEEK(day)
1365+
- Other units -> use unit_to_var as normal
1366+
"""
1367+
unit = expression.args.get("unit")
1368+
1369+
if isinstance(unit, exp.WeekStart):
1370+
# Extract the day from WeekStart
1371+
day_var = unit.this
1372+
if isinstance(day_var, exp.Var):
1373+
day_name = (
1374+
day_var.this.upper() if isinstance(day_var.this, str) else str(day_var.this)
1375+
)
1376+
1377+
if day_name == "SUNDAY":
1378+
# BigQuery's default WEEK is Sunday-start
1379+
return self.sql(exp.var("WEEK"))
1380+
elif day_name == "MONDAY":
1381+
# Use ISOWEEK for Monday-start
1382+
return self.sql(exp.var("ISOWEEK"))
1383+
else:
1384+
# For other days, use WEEK(day) syntax
1385+
return self.sql(exp.Week(this=day_var))
1386+
1387+
# For other units, use default behavior
1388+
unit_expr = unit_to_var(expression)
1389+
return self.sql(unit_expr) if unit_expr else "DAY"
1390+
13201391
def datetrunc_sql(self, expression: exp.DateTrunc) -> str:
13211392
unit = expression.unit
13221393
unit_sql = unit.name if unit.is_string else self.sql(unit)

sqlglot/dialects/duckdb.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,17 @@
6868
"\u001f": 31,
6969
}
7070

71+
# DuckDB's EXTRACT(DAYOFWEEK) returns: 0=Sunday, 1=Monday, ..., 6=Saturday
72+
_WEEK_START_DAY_TO_DOW = {
73+
"MONDAY": 1,
74+
"TUESDAY": 2,
75+
"WEDNESDAY": 3,
76+
"THURSDAY": 4,
77+
"FRIDAY": 5,
78+
"SATURDAY": 6,
79+
"SUNDAY": 0,
80+
}
81+
7182

7283
# BigQuery -> DuckDB conversion for the DATE function
7384
def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str:
@@ -249,9 +260,68 @@ def _implicit_datetime_cast(
249260
return arg
250261

251262

263+
def _extract_week_start_day(unit: t.Optional[exp.Expression]) -> t.Optional[t.Tuple[str, int]]:
264+
"""
265+
Extract week start day name and DOW number from a Week or WeekStart expression.
266+
"""
267+
if not isinstance(unit, (exp.Week, exp.WeekStart)):
268+
return None
269+
270+
day_var = unit.this
271+
if not isinstance(day_var, exp.Var):
272+
return None
273+
274+
start_day = day_var.this.upper() if isinstance(day_var.this, str) else str(day_var.this)
275+
start_dow = _WEEK_START_DAY_TO_DOW.get(start_day)
276+
277+
if start_dow is None:
278+
return None
279+
280+
return (start_day, start_dow)
281+
282+
283+
def _build_week_trunc_expression(date_expr: exp.Expression, start_dow: int) -> exp.Expression:
284+
"""
285+
Build DATE_TRUNC expression for week boundaries with custom start day.
286+
Formula: shift = 1 - start_dow, then DATE_TRUNC('week', date + INTERVAL shift DAY)
287+
"""
288+
if start_dow == 1:
289+
# No shift needed for Monday-based weeks (ISO standard)
290+
return exp.Anonymous(this="DATE_TRUNC", expressions=[exp.Literal.string("week"), date_expr])
291+
292+
# Shift date to align week boundaries with ISO Monday
293+
shift_days = 1 - start_dow
294+
shifted_date = exp.DateAdd(
295+
this=date_expr,
296+
expression=exp.Interval(this=exp.Literal.string(str(shift_days)), unit=exp.var("DAY")),
297+
)
298+
299+
return exp.Anonymous(this="DATE_TRUNC", expressions=[exp.Literal.string("week"), shifted_date])
300+
301+
252302
def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str:
303+
"""
304+
Generate DATE_DIFF SQL for DuckDB using DATE_TRUNC-based week alignment.
305+
"""
253306
this = _implicit_datetime_cast(expression.this)
254307
expr = _implicit_datetime_cast(expression.expression)
308+
unit = expression.args.get("unit")
309+
310+
week_start = _extract_week_start_day(unit)
311+
if week_start and this and expr:
312+
_, start_dow = week_start
313+
314+
# Build truncated week boundary expressions
315+
truncated_this = _build_week_trunc_expression(this, start_dow)
316+
truncated_expr = _build_week_trunc_expression(expr, start_dow)
317+
318+
# Calculate week difference
319+
day_diff = exp.DateDiff(
320+
this=truncated_this, expression=truncated_expr, unit=exp.Literal.string("day")
321+
)
322+
result = exp.IntDiv(this=day_diff, expression=exp.Literal.number(7))
323+
324+
return self.sql(result)
255325

256326
return self.func("DATE_DIFF", unit_to_str(expression), expr, this)
257327

tests/dialects/test_bigquery.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3216,6 +3216,56 @@ def test_week(self):
32163216
"EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))",
32173217
)
32183218

3219+
# BigQuery → DuckDB transpilation tests for DATE_DIFF with week units
3220+
# Test WEEK(MONDAY) - Monday-based week (normalized to ISOWEEK)
3221+
self.validate_all(
3222+
"DATE_DIFF('2024-01-15', '2024-01-08', WEEK(MONDAY))",
3223+
write={
3224+
"bigquery": "DATE_DIFF('2024-01-15', '2024-01-08', ISOWEEK)",
3225+
"duckdb": "DATE_DIFF('DAY', DATE_TRUNC('week', CAST('2024-01-08' AS DATE)), DATE_TRUNC('week', CAST('2024-01-15' AS DATE))) // 7",
3226+
},
3227+
)
3228+
# Test WEEK(SUNDAY) - Sunday-based week (normalized to WEEK)
3229+
self.validate_all(
3230+
"DATE_DIFF('2024-01-15', '2024-01-08', WEEK(SUNDAY))",
3231+
write={
3232+
"bigquery": "DATE_DIFF('2024-01-15', '2024-01-08', WEEK)",
3233+
"duckdb": "DATE_DIFF('DAY', DATE_TRUNC('week', CAST('2024-01-08' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('week', CAST('2024-01-15' AS DATE) + INTERVAL '1' DAY)) // 7",
3234+
},
3235+
)
3236+
# Test WEEK(SATURDAY) - Saturday-based week calculation
3237+
self.validate_all(
3238+
"DATE_DIFF('2024-01-15', '2024-01-08', WEEK(SATURDAY))",
3239+
write={
3240+
"bigquery": "DATE_DIFF('2024-01-15', '2024-01-08', WEEK(SATURDAY))",
3241+
"duckdb": "DATE_DIFF('DAY', DATE_TRUNC('week', CAST('2024-01-08' AS DATE) + INTERVAL '-5' DAY), DATE_TRUNC('week', CAST('2024-01-15' AS DATE) + INTERVAL '-5' DAY)) // 7",
3242+
},
3243+
)
3244+
# Test WEEK - Default Sunday-based week calculation
3245+
self.validate_all(
3246+
"DATE_DIFF('2024-01-15', '2024-01-08', WEEK)",
3247+
write={
3248+
"bigquery": "DATE_DIFF('2024-01-15', '2024-01-08', WEEK)",
3249+
"duckdb": "DATE_DIFF('DAY', DATE_TRUNC('week', CAST('2024-01-08' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('week', CAST('2024-01-15' AS DATE) + INTERVAL '1' DAY)) // 7",
3250+
},
3251+
)
3252+
# Test ISOWEEK - ISO 8601 Monday-based week calculation
3253+
self.validate_all(
3254+
"DATE_DIFF('2024-01-15', '2024-01-08', ISOWEEK)",
3255+
write={
3256+
"bigquery": "DATE_DIFF('2024-01-15', '2024-01-08', ISOWEEK)",
3257+
"duckdb": "DATE_DIFF('DAY', DATE_TRUNC('week', CAST('2024-01-08' AS DATE)), DATE_TRUNC('week', CAST('2024-01-15' AS DATE))) // 7",
3258+
},
3259+
)
3260+
# Test with DATE literal - Explicit DATE casting
3261+
self.validate_all(
3262+
"DATE_DIFF(DATE '2024-01-15', DATE '2024-01-08', WEEK(MONDAY))",
3263+
write={
3264+
"bigquery": "DATE_DIFF(CAST('2024-01-15' AS DATE), CAST('2024-01-08' AS DATE), ISOWEEK)",
3265+
"duckdb": "DATE_DIFF('DAY', DATE_TRUNC('week', CAST('2024-01-08' AS DATE)), DATE_TRUNC('week', CAST('2024-01-15' AS DATE))) // 7",
3266+
},
3267+
)
3268+
32193269
def test_approx_qunatiles(self):
32203270
self.validate_identity("APPROX_QUANTILES(foo, 2)")
32213271
self.validate_identity("APPROX_QUANTILES(DISTINCT foo, 2 RESPECT NULLS)")

tests/dialects/test_duckdb.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1962,6 +1962,24 @@ def test_extract_date_parts(self):
19621962
# All of these should remain as is, they don't have synonyms
19631963
self.validate_identity(f"EXTRACT({part} FROM foo)")
19641964

1965+
def test_date_diff_week(self):
1966+
# Test Monday-based week (ISO week) - no offset needed
1967+
self.validate_identity(
1968+
"DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE))) // 7"
1969+
)
1970+
# Test Sunday-based week - shift by +1 day to align with ISO Monday
1971+
self.validate_identity(
1972+
"DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '1' DAY)) // 7"
1973+
)
1974+
# Test Saturday-based week - shift by -5 days to align with ISO Monday
1975+
self.validate_identity(
1976+
"DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '-5' DAY)) // 7"
1977+
)
1978+
# Test zero weeks between dates in same week
1979+
self.validate_identity(
1980+
"DATE_DIFF('DAY', DATE_TRUNC('WEEK', CAST('2024-01-01' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-01-01' AS DATE))) // 7"
1981+
)
1982+
19651983
def test_set_item(self):
19661984
self.validate_identity("SET memory_limit = '10GB'")
19671985
self.validate_identity("SET SESSION default_collation = 'nocase'")

0 commit comments

Comments
 (0)