From a47beb62b1a2979c820a4611525d25f15f3d5165 Mon Sep 17 00:00:00 2001 From: fivetran-felixhuang Date: Thu, 27 Nov 2025 11:31:58 -0500 Subject: [PATCH 1/7] annotate type for MEDIAN snowflake --- sqlglot/expressions.py | 2 +- sqlglot/typing/snowflake.py | 1 + tests/dialects/test_snowflake.py | 1 + tests/fixtures/optimizer/annotate_functions.sql | 16 ++++++++++++++++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/sqlglot/expressions.py b/sqlglot/expressions.py index ec229e591b..51eb879348 100644 --- a/sqlglot/expressions.py +++ b/sqlglot/expressions.py @@ -7316,7 +7316,7 @@ class MD5NumberUpper64(Func): class Median(AggFunc): - pass + arg_types = {"this": True} class Min(AggFunc): diff --git a/sqlglot/typing/snowflake.py b/sqlglot/typing/snowflake.py index 5ca0d87a53..3a016dbe4f 100644 --- a/sqlglot/typing/snowflake.py +++ b/sqlglot/typing/snowflake.py @@ -117,6 +117,7 @@ def _annotate_within_group(self: TypeAnnotator, expression: exp.WithinGroup) -> exp.Substring, exp.TimeSlice, exp.TimestampTrunc, + exp.Median, } }, **{ diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py index 514eac85a1..39dc0cc0b1 100644 --- a/tests/dialects/test_snowflake.py +++ b/tests/dialects/test_snowflake.py @@ -22,6 +22,7 @@ def test_snowflake(self): self.validate_identity("SELECT GREATEST_IGNORE_NULLS(1, 2, 3, NULL)") self.validate_identity("SELECT LEAST_IGNORE_NULLS(5, NULL, 7, 3)") self.validate_identity("SELECT MAX(x)") + self.validate_identity("SELECT MEDIAN(x)") self.validate_identity("SELECT TAN(x)") self.validate_identity("SELECT COS(x)") self.validate_identity("SELECT SINH(1.5)") diff --git a/tests/fixtures/optimizer/annotate_functions.sql b/tests/fixtures/optimizer/annotate_functions.sql index 1bf17f91da..9049f5e5d0 100644 --- a/tests/fixtures/optimizer/annotate_functions.sql +++ b/tests/fixtures/optimizer/annotate_functions.sql @@ -2644,6 +2644,22 @@ VARCHAR; MINUTE(CAST('08:50:57' AS TIME)); INT; +# dialect: snowflake +MEDIAN(tbl.double_col); +DOUBLE; + +# dialect: snowflake +MEDIAN(tbl.int_col); +INT; + +# dialect: snowflake +MEDIAN(tbl.bigint_col); +BIGINT; + +# dialect: snowflake +MEDIAN(CAST(100 AS DECIMAL(10,2))); +DECIMAL(10, 2); + # dialect: snowflake MONTHNAME(CAST('2024-05-09' AS DATE)); VARCHAR; From 39e10bb58c3330e12b43cd4889ca2dd82de1ac83 Mon Sep 17 00:00:00 2001 From: fivetran-felixhuang Date: Thu, 27 Nov 2025 15:36:44 -0500 Subject: [PATCH 2/7] fix annotation --- sqlglot/expressions.py | 2 +- sqlglot/typing/snowflake.py | 28 ++++++++++++++++++- tests/dialects/test_snowflake.py | 1 - .../fixtures/optimizer/annotate_functions.sql | 8 ++---- 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/sqlglot/expressions.py b/sqlglot/expressions.py index 51eb879348..ec229e591b 100644 --- a/sqlglot/expressions.py +++ b/sqlglot/expressions.py @@ -7316,7 +7316,7 @@ class MD5NumberUpper64(Func): class Median(AggFunc): - arg_types = {"this": True} + pass class Min(AggFunc): diff --git a/sqlglot/typing/snowflake.py b/sqlglot/typing/snowflake.py index 3a016dbe4f..1ac8338834 100644 --- a/sqlglot/typing/snowflake.py +++ b/sqlglot/typing/snowflake.py @@ -100,6 +100,32 @@ def _annotate_within_group(self: TypeAnnotator, expression: exp.WithinGroup) -> return expression + +def _annotate_median(self: TypeAnnotator, expression: exp.Median) -> exp.Median: + """Annotate MEDIAN function with correct return type. + + Based on Snowflake documentation: "Returns a FLOAT or DECIMAL (fixed-point) number, depending upon the input." + + MEDIAN returns: + - DECIMAL types preserve their precision (DECIMAL(10,2) -> DECIMAL(10,2)) + - Other numeric types return DOUBLE (INT, BIGINT, FLOAT -> DOUBLE) + """ + # First annotate the argument to get its type + expression = self._annotate_by_args(expression, "this") + + # Get the input type + input_type = expression.this.type + + # If input is DECIMAL, preserve the precision per Snowflake behavior + if input_type and input_type.is_type(exp.DataType.Type.DECIMAL): + self._set_type(expression, input_type) + else: + # For all other types (INT, BIGINT, FLOAT, NULL, etc.), return DOUBLE + self._set_type(expression, exp.DataType.Type.DOUBLE) + + return expression + + EXPRESSION_METADATA = { **EXPRESSION_METADATA, **{ @@ -117,7 +143,6 @@ def _annotate_within_group(self: TypeAnnotator, expression: exp.WithinGroup) -> exp.Substring, exp.TimeSlice, exp.TimestampTrunc, - exp.Median, } }, **{ @@ -311,6 +336,7 @@ def _annotate_within_group(self: TypeAnnotator, expression: exp.WithinGroup) -> "annotator": lambda self, e: self._annotate_by_args(e, "expressions") }, exp.LeastIgnoreNulls: {"annotator": lambda self, e: self._annotate_by_args(e, "expressions")}, + exp.Median: {"annotator": _annotate_median}, exp.Reverse: {"annotator": _annotate_reverse}, exp.TimeAdd: {"annotator": _annotate_date_or_time_add}, exp.TimestampFromParts: {"annotator": _annotate_timestamp_from_parts}, diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py index 39dc0cc0b1..514eac85a1 100644 --- a/tests/dialects/test_snowflake.py +++ b/tests/dialects/test_snowflake.py @@ -22,7 +22,6 @@ def test_snowflake(self): self.validate_identity("SELECT GREATEST_IGNORE_NULLS(1, 2, 3, NULL)") self.validate_identity("SELECT LEAST_IGNORE_NULLS(5, NULL, 7, 3)") self.validate_identity("SELECT MAX(x)") - self.validate_identity("SELECT MEDIAN(x)") self.validate_identity("SELECT TAN(x)") self.validate_identity("SELECT COS(x)") self.validate_identity("SELECT SINH(1.5)") diff --git a/tests/fixtures/optimizer/annotate_functions.sql b/tests/fixtures/optimizer/annotate_functions.sql index 9049f5e5d0..39371acbfd 100644 --- a/tests/fixtures/optimizer/annotate_functions.sql +++ b/tests/fixtures/optimizer/annotate_functions.sql @@ -2649,12 +2649,8 @@ MEDIAN(tbl.double_col); DOUBLE; # dialect: snowflake -MEDIAN(tbl.int_col); -INT; - -# dialect: snowflake -MEDIAN(tbl.bigint_col); -BIGINT; +MEDIAN(tbl.bigint_col) OVER (PARTITION BY 1); +DOUBLE; # dialect: snowflake MEDIAN(CAST(100 AS DECIMAL(10,2))); From 261d2858b12d1e1b2acb7cc727c98f60cdac4937 Mon Sep 17 00:00:00 2001 From: fivetran-felixhuang Date: Thu, 27 Nov 2025 16:02:19 -0500 Subject: [PATCH 3/7] fix format --- sqlglot/typing/snowflake.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sqlglot/typing/snowflake.py b/sqlglot/typing/snowflake.py index 1ac8338834..c8fce34ada 100644 --- a/sqlglot/typing/snowflake.py +++ b/sqlglot/typing/snowflake.py @@ -100,7 +100,6 @@ def _annotate_within_group(self: TypeAnnotator, expression: exp.WithinGroup) -> return expression - def _annotate_median(self: TypeAnnotator, expression: exp.Median) -> exp.Median: """Annotate MEDIAN function with correct return type. From 1f2aef6424f82f21f2196fd841d8d0d6bd6c401b Mon Sep 17 00:00:00 2001 From: fivetran-felixhuang Date: Mon, 1 Dec 2025 13:45:53 -0500 Subject: [PATCH 4/7] fine grained annotation --- sqlglot/typing/snowflake.py | 38 ++++++++++++++----- .../fixtures/optimizer/annotate_functions.sql | 8 ++-- 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/sqlglot/typing/snowflake.py b/sqlglot/typing/snowflake.py index c8fce34ada..7c04127c6f 100644 --- a/sqlglot/typing/snowflake.py +++ b/sqlglot/typing/snowflake.py @@ -103,11 +103,9 @@ def _annotate_within_group(self: TypeAnnotator, expression: exp.WithinGroup) -> def _annotate_median(self: TypeAnnotator, expression: exp.Median) -> exp.Median: """Annotate MEDIAN function with correct return type. - Based on Snowflake documentation: "Returns a FLOAT or DECIMAL (fixed-point) number, depending upon the input." - - MEDIAN returns: - - DECIMAL types preserve their precision (DECIMAL(10,2) -> DECIMAL(10,2)) - - Other numeric types return DOUBLE (INT, BIGINT, FLOAT -> DOUBLE) + Based on Snowflake documentation: + - If the expr is FLOAT -> annotate as FLOAT + - If the expr is NUMBER(p, s) -> annotate as NUMBER(MAX(p+3, 38), MAX(s+3, 37)) """ # First annotate the argument to get its type expression = self._annotate_by_args(expression, "this") @@ -115,12 +113,32 @@ def _annotate_median(self: TypeAnnotator, expression: exp.Median) -> exp.Median: # Get the input type input_type = expression.this.type - # If input is DECIMAL, preserve the precision per Snowflake behavior - if input_type and input_type.is_type(exp.DataType.Type.DECIMAL): - self._set_type(expression, input_type) + if input_type and input_type.is_type(exp.DataType.Type.FLOAT): + # If input is FLOAT, return FLOAT + self._set_type(expression, exp.DataType.Type.FLOAT) else: - # For all other types (INT, BIGINT, FLOAT, NULL, etc.), return DOUBLE - self._set_type(expression, exp.DataType.Type.DOUBLE) + # If input is NUMBER(p, s), return NUMBER(min(p+3, 38), min(s+3, 37)) + precision = input_type.expressions[0].this if input_type.expressions else 38 + scale = input_type.expressions[1].this if len(input_type.expressions) > 1 else 0 + + if hasattr(precision, "this"): + precision = precision.this + if hasattr(scale, "this"): + scale = scale.this + + try: + precision = int(precision) if precision is not None else 38 + scale = int(scale) if scale is not None else 0 + except (ValueError, TypeError): + precision = 38 + scale = 0 + + new_precision = min(precision + 3, 38) + new_scale = min(scale + 3, 37) + + # Build the new NUMBER type + new_type = exp.DataType.build(f"NUMBER({new_precision}, {new_scale})", dialect="snowflake") + self._set_type(expression, new_type) return expression diff --git a/tests/fixtures/optimizer/annotate_functions.sql b/tests/fixtures/optimizer/annotate_functions.sql index 39371acbfd..5159efc1b4 100644 --- a/tests/fixtures/optimizer/annotate_functions.sql +++ b/tests/fixtures/optimizer/annotate_functions.sql @@ -2645,16 +2645,16 @@ MINUTE(CAST('08:50:57' AS TIME)); INT; # dialect: snowflake -MEDIAN(tbl.double_col); -DOUBLE; +MEDIAN(2.71::FLOAT); +FLOAT; # dialect: snowflake MEDIAN(tbl.bigint_col) OVER (PARTITION BY 1); -DOUBLE; +DECIMAL(38, 3); # dialect: snowflake MEDIAN(CAST(100 AS DECIMAL(10,2))); -DECIMAL(10, 2); +DECIMAL(13, 5); # dialect: snowflake MONTHNAME(CAST('2024-05-09' AS DATE)); From 8258e9513293e8f857e9adec2f9417335aa08a69 Mon Sep 17 00:00:00 2001 From: fivetran-felixhuang Date: Mon, 1 Dec 2025 13:48:20 -0500 Subject: [PATCH 5/7] fix comment --- sqlglot/typing/snowflake.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlglot/typing/snowflake.py b/sqlglot/typing/snowflake.py index 7c04127c6f..83e59ac36d 100644 --- a/sqlglot/typing/snowflake.py +++ b/sqlglot/typing/snowflake.py @@ -105,7 +105,7 @@ def _annotate_median(self: TypeAnnotator, expression: exp.Median) -> exp.Median: Based on Snowflake documentation: - If the expr is FLOAT -> annotate as FLOAT - - If the expr is NUMBER(p, s) -> annotate as NUMBER(MAX(p+3, 38), MAX(s+3, 37)) + - If the expr is NUMBER(p, s) -> annotate as NUMBER(min(p+3, 38), min(s+3, 37)) """ # First annotate the argument to get its type expression = self._annotate_by_args(expression, "this") From 332c7e9a21f03314dd2917259bcc723d586b2742 Mon Sep 17 00:00:00 2001 From: fivetran-felixhuang Date: Tue, 2 Dec 2025 10:20:26 -0500 Subject: [PATCH 6/7] address comments --- sqlglot/typing/snowflake.py | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/sqlglot/typing/snowflake.py b/sqlglot/typing/snowflake.py index 83e59ac36d..40c3ee87ca 100644 --- a/sqlglot/typing/snowflake.py +++ b/sqlglot/typing/snowflake.py @@ -3,6 +3,7 @@ import typing as t from sqlglot import exp +from sqlglot.helper import seq_get from sqlglot.typing import EXPRESSION_METADATA if t.TYPE_CHECKING: @@ -10,6 +11,10 @@ DATE_PARTS = {"DAY", "WEEK", "MONTH", "QUARTER", "YEAR"} +MAX_PRECISION = 38 + +MAX_SCALE = 37 + def _annotate_reverse(self: TypeAnnotator, expression: exp.Reverse) -> exp.Reverse: expression = self._annotate_by_args(expression, "this") @@ -118,23 +123,16 @@ def _annotate_median(self: TypeAnnotator, expression: exp.Median) -> exp.Median: self._set_type(expression, exp.DataType.Type.FLOAT) else: # If input is NUMBER(p, s), return NUMBER(min(p+3, 38), min(s+3, 37)) - precision = input_type.expressions[0].this if input_type.expressions else 38 - scale = input_type.expressions[1].this if len(input_type.expressions) > 1 else 0 - - if hasattr(precision, "this"): - precision = precision.this - if hasattr(scale, "this"): - scale = scale.this - - try: - precision = int(precision) if precision is not None else 38 - scale = int(scale) if scale is not None else 0 - except (ValueError, TypeError): - precision = 38 - scale = 0 - - new_precision = min(precision + 3, 38) - new_scale = min(scale + 3, 37) + exprs = input_type.expressions + + precision_expr = seq_get(exprs, 0) + precision = precision_expr.this.to_py() if precision_expr else MAX_PRECISION + + scale_expr = seq_get(exprs, 1) + scale = scale_expr.this.to_py() if scale_expr else 0 + + new_precision = min(precision + 3, MAX_PRECISION) + new_scale = min(scale + 3, MAX_SCALE) # Build the new NUMBER type new_type = exp.DataType.build(f"NUMBER({new_precision}, {new_scale})", dialect="snowflake") From 2ac7d70c74813011886383a07c44eb2dfdf942c3 Mon Sep 17 00:00:00 2001 From: Jo <46752250+georgesittas@users.noreply.github.com> Date: Tue, 2 Dec 2025 17:40:55 +0200 Subject: [PATCH 7/7] Update sqlglot/typing/snowflake.py --- sqlglot/typing/snowflake.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlglot/typing/snowflake.py b/sqlglot/typing/snowflake.py index 40c3ee87ca..7102b8daea 100644 --- a/sqlglot/typing/snowflake.py +++ b/sqlglot/typing/snowflake.py @@ -118,7 +118,7 @@ def _annotate_median(self: TypeAnnotator, expression: exp.Median) -> exp.Median: # Get the input type input_type = expression.this.type - if input_type and input_type.is_type(exp.DataType.Type.FLOAT): + if input_type.is_type(exp.DataType.Type.FLOAT): # If input is FLOAT, return FLOAT self._set_type(expression, exp.DataType.Type.FLOAT) else: