From 626907306eedeed89b54eea3519b55c6466da333 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 8 Oct 2025 15:24:08 -0500 Subject: [PATCH 01/46] start --- datafusion/expr/src/expr.rs | 37 ++++++++++++++++-------------- datafusion/expr/src/expr_fn.rs | 2 +- datafusion/expr/src/expr_schema.rs | 22 ++++++++---------- 3 files changed, 31 insertions(+), 30 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 282b3f6a0f55..a12cf268de2b 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1370,13 +1370,16 @@ pub struct Placeholder { /// The identifier of the parameter, including the leading `$` (e.g, `"$1"` or `"$foo"`) pub id: String, /// The type the parameter will be filled in with - pub data_type: Option, + pub field: Option, } impl Placeholder { /// Create a new Placeholder expression pub fn new(id: String, data_type: Option) -> Self { - Self { id, data_type } + Self { + id, + field: data_type.map(|dt| Arc::new(Field::new("", dt, true))), + } } } @@ -2888,17 +2891,17 @@ impl HashNode for Expr { // Modifies expr if it is a placeholder with datatype of right fn rewrite_placeholder(expr: &mut Expr, other: &Expr, schema: &DFSchema) -> Result<()> { - if let Expr::Placeholder(Placeholder { id: _, data_type }) = expr { - if data_type.is_none() { - let other_dt = other.get_type(schema); - match other_dt { + if let Expr::Placeholder(Placeholder { id: _, field }) = expr { + if field.is_none() { + let other_field = other.to_field(schema); + match other_field { Err(e) => { Err(e.context(format!( "Can not find type of {other} needed to infer type of {expr}" )))?; } - Ok(dt) => { - *data_type = Some(dt); + Ok((_, other_field)) => { + *field = Some(other_field); } } }; @@ -3730,15 +3733,15 @@ mod test { let param_placeholders = vec![ Expr::Placeholder(Placeholder { id: "$1".to_string(), - data_type: None, + field: None, }), Expr::Placeholder(Placeholder { id: "$2".to_string(), - data_type: None, + field: None, }), Expr::Placeholder(Placeholder { id: "$3".to_string(), - data_type: None, + field: None, }), ]; let in_list = Expr::InList(InList { @@ -3764,8 +3767,8 @@ mod test { match expr { Expr::Placeholder(placeholder) => { assert_eq!( - placeholder.data_type, - Some(DataType::Int32), + placeholder.field.unwrap().data_type(), + &DataType::Int32, "Placeholder {} should infer Int32", placeholder.id ); @@ -3789,7 +3792,7 @@ mod test { expr: Box::new(col("name")), pattern: Box::new(Expr::Placeholder(Placeholder { id: "$1".to_string(), - data_type: None, + field: None, })), negated: false, case_insensitive: false, @@ -3802,7 +3805,7 @@ mod test { match inferred_expr { Expr::Like(like) => match *like.pattern { Expr::Placeholder(placeholder) => { - assert_eq!(placeholder.data_type, Some(DataType::Utf8)); + assert_eq!(placeholder.field.unwrap().data_type(), &DataType::Utf8); } _ => panic!("Expected Placeholder"), }, @@ -3817,8 +3820,8 @@ mod test { Expr::SimilarTo(like) => match *like.pattern { Expr::Placeholder(placeholder) => { assert_eq!( - placeholder.data_type, - Some(DataType::Utf8), + placeholder.field.unwrap().data_type(), + &DataType::Utf8, "Placeholder {} should infer Utf8", placeholder.id ); diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 4666411dd540..5ff42a31a885 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -125,7 +125,7 @@ pub fn ident(name: impl Into) -> Expr { pub fn placeholder(id: impl Into) -> Expr { Expr::Placeholder(Placeholder { id: id.into(), - data_type: None, + field: None, }) } diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index e803e3534130..81af29c2f683 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -433,18 +433,16 @@ impl ExprSchemable for Expr { .. }) => { let field = match &**expr { - Expr::Placeholder(Placeholder { data_type, .. }) => { - match &data_type { - None => schema - .data_type_and_nullable(&Column::from_name(name)) - .map(|(d, n)| Field::new(&schema_name, d.clone(), n)), - Some(dt) => Ok(Field::new( - &schema_name, - dt.clone(), - expr.nullable(schema)?, - )), - } - } + Expr::Placeholder(Placeholder { field, .. }) => match &field { + None => schema + .data_type_and_nullable(&Column::from_name(name)) + .map(|(d, n)| Field::new(&schema_name, d.clone(), n)), + Some(field) => Ok(field + .as_ref() + .clone() + .with_name(&schema_name) + .with_nullable(expr.nullable(schema)?)), + }, _ => expr.to_field(schema).map(|(_, f)| f.as_ref().clone()), }?; From 585bba9a147993a8caab8667f2a50f3f075bcfcb Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 8 Oct 2025 17:07:28 -0500 Subject: [PATCH 02/46] builds! --- datafusion/expr/src/expr_schema.rs | 10 +++--- datafusion/expr/src/logical_plan/plan.rs | 35 ++++++++++++++----- datafusion/proto/src/logical_plan/to_proto.rs | 12 +++++-- datafusion/sql/src/statement.rs | 6 ++-- 4 files changed, 44 insertions(+), 19 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 81af29c2f683..00708fd3411d 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -104,9 +104,9 @@ impl ExprSchemable for Expr { fn get_type(&self, schema: &dyn ExprSchema) -> Result { match self { Expr::Alias(Alias { expr, name, .. }) => match &**expr { - Expr::Placeholder(Placeholder { data_type, .. }) => match &data_type { + Expr::Placeholder(Placeholder { field, .. }) => match &field { None => schema.data_type(&Column::from_name(name)).cloned(), - Some(dt) => Ok(dt.clone()), + Some(field) => Ok(field.data_type().clone()), }, _ => expr.get_type(schema), }, @@ -211,9 +211,9 @@ impl ExprSchemable for Expr { ) .get_result_type(), Expr::Like { .. } | Expr::SimilarTo { .. } => Ok(DataType::Boolean), - Expr::Placeholder(Placeholder { data_type, .. }) => { - if let Some(dtype) = data_type { - Ok(dtype.clone()) + Expr::Placeholder(Placeholder { field, .. }) => { + if let Some(field) = field { + Ok(field.data_type().clone()) } else { // If the placeholder's type hasn't been specified, treat it as // null (unspecified placeholders generate an error during planning) diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index b8200ab8a48c..ebb1df9f1126 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -51,7 +51,7 @@ use crate::{ WindowFunctionDefinition, }; -use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; +use arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaRef}; use datafusion_common::cse::{NormalizeEq, Normalizeable}; use datafusion_common::format::ExplainFormat; use datafusion_common::tree_node::{ @@ -1494,24 +1494,43 @@ impl LogicalPlan { } /// Walk the logical plan, find any `Placeholder` tokens, and return a map of their IDs and DataTypes + /// + /// Note that this will drop any extension or field metadata attached to parameters. Use + /// [`LogicalPlan::get_parameter_fields`] to keep extension metadata. pub fn get_parameter_types( &self, ) -> Result>, DataFusionError> { - let mut param_types: HashMap> = HashMap::new(); + let mut parameter_fields = self.get_parameter_fields()?; + Ok(parameter_fields + .drain() + .map(|(name, maybe_field)| { + (name, maybe_field.map(|field| field.data_type().clone())) + }) + .collect()) + } + + /// Walk the logical plan, find any `Placeholder` tokens, and return a map of their IDs and FieldRefs + pub fn get_parameter_fields( + &self, + ) -> Result>, DataFusionError> { + let mut param_types: HashMap> = HashMap::new(); self.apply_with_subqueries(|plan| { plan.apply_expressions(|expr| { expr.apply(|expr| { - if let Expr::Placeholder(Placeholder { id, data_type }) = expr { + if let Expr::Placeholder(Placeholder { id, field }) = expr { let prev = param_types.get(id); - match (prev, data_type) { - (Some(Some(prev)), Some(dt)) => { - if prev != dt { + match (prev, field) { + (Some(Some(prev)), Some(field)) => { + // This check is possibly too strict (requires nullability and field + // metadata align perfectly, rather than compute true type equality + // when field metadata is representing an extension type) + if prev != field { plan_err!("Conflicting types for {id}")?; } } - (_, Some(dt)) => { - param_types.insert(id.clone(), Some(dt.clone())); + (_, Some(field)) => { + param_types.insert(id.clone(), Some(Arc::clone(field))); } _ => { param_types.insert(id.clone(), None); diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 1be3300008c7..179fd1a62e85 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -600,9 +600,15 @@ pub fn serialize_expr( })), } } - Expr::Placeholder(Placeholder { id, data_type }) => { - let data_type = match data_type { - Some(data_type) => Some(data_type.try_into()?), + Expr::Placeholder(Placeholder { id, field }) => { + let data_type = match field { + Some(field) => { + if !field.metadata().is_empty() { + return Err(Error::General(format!("Can't serialize placeholder with metadata ('{id}') to protobuf."))); + } + + Some(field.data_type().try_into()?) + } None => None, }; protobuf::LogicalExprNode { diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 0e868e8c2689..d1d64091c8cc 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -1998,10 +1998,10 @@ impl SqlToRel<'_, S> { )?; // Update placeholder's datatype to the type of the target column if let Expr::Placeholder(placeholder) = &mut expr { - placeholder.data_type = placeholder - .data_type + placeholder.field = placeholder + .field .take() - .or_else(|| Some(field.data_type().clone())); + .or_else(|| Some(Arc::clone(field))); } // Cast to target column type, if necessary expr.cast_to(field.data_type(), source.schema())? From 16ac02d8efc5c41b03f957963ac21db2754b423e Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 9 Oct 2025 13:20:52 -0500 Subject: [PATCH 03/46] backward compatible proto --- datafusion/proto/proto/datafusion.proto | 2 ++ datafusion/proto/src/generated/pbjson.rs | 36 +++++++++++++++++++ datafusion/proto/src/generated/prost.rs | 7 ++++ datafusion/proto/src/logical_plan/to_proto.rs | 31 +++++++--------- 4 files changed, 58 insertions(+), 18 deletions(-) diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index ee9ac0e7902d..b62eae739f40 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -413,6 +413,8 @@ message Wildcard { message PlaceholderNode { string id = 1; datafusion_common.ArrowType data_type = 2; + optional bool nullable = 3; + map metadata = 4; } message LogicalExprList { diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 29967d812000..1754a1e77784 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -18343,6 +18343,12 @@ impl serde::Serialize for PlaceholderNode { if self.data_type.is_some() { len += 1; } + if self.nullable.is_some() { + len += 1; + } + if !self.metadata.is_empty() { + len += 1; + } let mut struct_ser = serializer.serialize_struct("datafusion.PlaceholderNode", len)?; if !self.id.is_empty() { struct_ser.serialize_field("id", &self.id)?; @@ -18350,6 +18356,12 @@ impl serde::Serialize for PlaceholderNode { if let Some(v) = self.data_type.as_ref() { struct_ser.serialize_field("dataType", v)?; } + if let Some(v) = self.nullable.as_ref() { + struct_ser.serialize_field("nullable", v)?; + } + if !self.metadata.is_empty() { + struct_ser.serialize_field("metadata", &self.metadata)?; + } struct_ser.end() } } @@ -18363,12 +18375,16 @@ impl<'de> serde::Deserialize<'de> for PlaceholderNode { "id", "data_type", "dataType", + "nullable", + "metadata", ]; #[allow(clippy::enum_variant_names)] enum GeneratedField { Id, DataType, + Nullable, + Metadata, } impl<'de> serde::Deserialize<'de> for GeneratedField { fn deserialize(deserializer: D) -> std::result::Result @@ -18392,6 +18408,8 @@ impl<'de> serde::Deserialize<'de> for PlaceholderNode { match value { "id" => Ok(GeneratedField::Id), "dataType" | "data_type" => Ok(GeneratedField::DataType), + "nullable" => Ok(GeneratedField::Nullable), + "metadata" => Ok(GeneratedField::Metadata), _ => Err(serde::de::Error::unknown_field(value, FIELDS)), } } @@ -18413,6 +18431,8 @@ impl<'de> serde::Deserialize<'de> for PlaceholderNode { { let mut id__ = None; let mut data_type__ = None; + let mut nullable__ = None; + let mut metadata__ = None; while let Some(k) = map_.next_key()? { match k { GeneratedField::Id => { @@ -18427,11 +18447,27 @@ impl<'de> serde::Deserialize<'de> for PlaceholderNode { } data_type__ = map_.next_value()?; } + GeneratedField::Nullable => { + if nullable__.is_some() { + return Err(serde::de::Error::duplicate_field("nullable")); + } + nullable__ = map_.next_value()?; + } + GeneratedField::Metadata => { + if metadata__.is_some() { + return Err(serde::de::Error::duplicate_field("metadata")); + } + metadata__ = Some( + map_.next_value::>()? + ); + } } } Ok(PlaceholderNode { id: id__.unwrap_or_default(), data_type: data_type__, + nullable: nullable__, + metadata: metadata__.unwrap_or_default(), }) } } diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index d3b5f566e98b..51ce9b4f8565 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -653,6 +653,13 @@ pub struct PlaceholderNode { pub id: ::prost::alloc::string::String, #[prost(message, optional, tag = "2")] pub data_type: ::core::option::Option, + #[prost(bool, optional, tag = "3")] + pub nullable: ::core::option::Option, + #[prost(map = "string, string", tag = "4")] + pub metadata: ::std::collections::HashMap< + ::prost::alloc::string::String, + ::prost::alloc::string::String, + >, } #[derive(Clone, PartialEq, ::prost::Message)] pub struct LogicalExprList { diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 179fd1a62e85..8b59c97da432 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -600,24 +600,19 @@ pub fn serialize_expr( })), } } - Expr::Placeholder(Placeholder { id, field }) => { - let data_type = match field { - Some(field) => { - if !field.metadata().is_empty() { - return Err(Error::General(format!("Can't serialize placeholder with metadata ('{id}') to protobuf."))); - } - - Some(field.data_type().try_into()?) - } - None => None, - }; - protobuf::LogicalExprNode { - expr_type: Some(ExprType::Placeholder(PlaceholderNode { - id: id.clone(), - data_type, - })), - } - } + Expr::Placeholder(Placeholder { id, field }) => protobuf::LogicalExprNode { + expr_type: Some(ExprType::Placeholder(PlaceholderNode { + id: id.clone(), + data_type: match field { + Some(field) => Some(field.data_type().try_into()?), + None => None, + }, + nullable: field.as_ref().map(|f| f.is_nullable()), + metadata: field.as_ref() + .map(|f| f.metadata().clone()) + .unwrap_or(HashMap::new()), + })), + }, }; Ok(expr_node) From a702499dd6a9729a004e2b05f5a0342ed8011890 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 9 Oct 2025 14:00:44 -0500 Subject: [PATCH 04/46] move FieldMetadata --- datafusion/common/src/lib.rs | 1 + datafusion/common/src/metadata.rs | 250 ++++++++++++++++++ datafusion/core/tests/dataframe/mod.rs | 5 +- .../user_defined_scalar_functions.rs | 2 +- datafusion/expr/src/expr.rs | 232 +--------------- datafusion/expr/src/expr_schema.rs | 5 +- datafusion/expr/src/literal.rs | 3 +- datafusion/expr/src/logical_plan/builder.rs | 7 +- .../simplify_expressions/expr_simplifier.rs | 2 +- .../physical-expr/src/expressions/literal.rs | 2 +- datafusion/physical-expr/src/planner.rs | 5 +- datafusion/proto/src/logical_plan/to_proto.rs | 3 +- 12 files changed, 270 insertions(+), 247 deletions(-) create mode 100644 datafusion/common/src/metadata.rs diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs index 24ec9b7be323..89dc4956abb1 100644 --- a/datafusion/common/src/lib.rs +++ b/datafusion/common/src/lib.rs @@ -47,6 +47,7 @@ pub mod file_options; pub mod format; pub mod hash_utils; pub mod instant; +pub mod metadata; pub mod nested_struct; mod null_equality; pub mod parsers; diff --git a/datafusion/common/src/metadata.rs b/datafusion/common/src/metadata.rs new file mode 100644 index 000000000000..6f6f4e6adbe2 --- /dev/null +++ b/datafusion/common/src/metadata.rs @@ -0,0 +1,250 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{collections::BTreeMap, sync::Arc}; + +use arrow::datatypes::Field; +use hashbrown::HashMap; + +/// Literal metadata +/// +/// Stores metadata associated with a literal expressions +/// and is designed to be fast to `clone`. +/// +/// This structure is used to store metadata associated with a literal expression, and it +/// corresponds to the `metadata` field on [`Field`]. +/// +/// # Example: Create [`FieldMetadata`] from a [`Field`] +/// ``` +/// # use std::collections::HashMap; +/// # use datafusion_expr::expr::FieldMetadata; +/// # use arrow::datatypes::{Field, DataType}; +/// # let field = Field::new("c1", DataType::Int32, true) +/// # .with_metadata(HashMap::from([("foo".to_string(), "bar".to_string())])); +/// // Create a new `FieldMetadata` instance from a `Field` +/// let metadata = FieldMetadata::new_from_field(&field); +/// // There is also a `From` impl: +/// let metadata = FieldMetadata::from(&field); +/// ``` +/// +/// # Example: Update a [`Field`] with [`FieldMetadata`] +/// ``` +/// # use datafusion_expr::expr::FieldMetadata; +/// # use arrow::datatypes::{Field, DataType}; +/// # let field = Field::new("c1", DataType::Int32, true); +/// # let metadata = FieldMetadata::new_from_field(&field); +/// // Add any metadata from `FieldMetadata` to `Field` +/// let updated_field = metadata.add_to_field(field); +/// ``` +/// +#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] +pub struct FieldMetadata { + /// The inner metadata of a literal expression, which is a map of string + /// keys to string values. + /// + /// Note this is not a `HashMap` because `HashMap` does not provide + /// implementations for traits like `Debug` and `Hash`. + inner: Arc>, +} + +impl Default for FieldMetadata { + fn default() -> Self { + Self::new_empty() + } +} + +impl FieldMetadata { + /// Create a new empty metadata instance. + pub fn new_empty() -> Self { + Self { + inner: Arc::new(BTreeMap::new()), + } + } + + /// Merges two optional `FieldMetadata` instances, overwriting any existing + /// keys in `m` with keys from `n` if present. + /// + /// This function is commonly used in alias operations, particularly for literals + /// with metadata. When creating an alias expression, the metadata from the original + /// expression (such as a literal) is combined with any metadata specified on the alias. + /// + /// # Arguments + /// + /// * `m` - The first metadata (typically from the original expression like a literal) + /// * `n` - The second metadata (typically from the alias definition) + /// + /// # Merge Strategy + /// + /// - If both metadata instances exist, they are merged with `n` taking precedence + /// - Keys from `n` will overwrite keys from `m` if they have the same name + /// - If only one metadata instance exists, it is returned unchanged + /// - If neither exists, `None` is returned + /// + /// # Example usage + /// ```rust + /// use datafusion_expr::expr::FieldMetadata; + /// use std::collections::BTreeMap; + /// + /// // Create metadata for a literal expression + /// let literal_metadata = Some(FieldMetadata::from(BTreeMap::from([ + /// ("source".to_string(), "constant".to_string()), + /// ("type".to_string(), "int".to_string()), + /// ]))); + /// + /// // Create metadata for an alias + /// let alias_metadata = Some(FieldMetadata::from(BTreeMap::from([ + /// ("description".to_string(), "answer".to_string()), + /// ("source".to_string(), "user".to_string()), // This will override literal's "source" + /// ]))); + /// + /// // Merge the metadata + /// let merged = FieldMetadata::merge_options( + /// literal_metadata.as_ref(), + /// alias_metadata.as_ref(), + /// ); + /// + /// // Result contains: {"source": "user", "type": "int", "description": "answer"} + /// assert!(merged.is_some()); + /// ``` + pub fn merge_options( + m: Option<&FieldMetadata>, + n: Option<&FieldMetadata>, + ) -> Option { + match (m, n) { + (Some(m), Some(n)) => { + let mut merged = m.clone(); + merged.extend(n.clone()); + Some(merged) + } + (Some(m), None) => Some(m.clone()), + (None, Some(n)) => Some(n.clone()), + (None, None) => None, + } + } + + /// Create a new metadata instance from a `Field`'s metadata. + pub fn new_from_field(field: &Field) -> Self { + let inner = field + .metadata() + .iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(); + Self { + inner: Arc::new(inner), + } + } + + /// Create a new metadata instance from a map of string keys to string values. + pub fn new(inner: BTreeMap) -> Self { + Self { + inner: Arc::new(inner), + } + } + + /// Get the inner metadata as a reference to a `BTreeMap`. + pub fn inner(&self) -> &BTreeMap { + &self.inner + } + + /// Return the inner metadata + pub fn into_inner(self) -> Arc> { + self.inner + } + + /// Adds metadata from `other` into `self`, overwriting any existing keys. + pub fn extend(&mut self, other: Self) { + if other.is_empty() { + return; + } + let other = Arc::unwrap_or_clone(other.into_inner()); + Arc::make_mut(&mut self.inner).extend(other); + } + + /// Returns true if the metadata is empty. + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + /// Returns the number of key-value pairs in the metadata. + pub fn len(&self) -> usize { + self.inner.len() + } + + /// Convert this `FieldMetadata` into a `HashMap` + pub fn to_hashmap(&self) -> std::collections::HashMap { + self.inner + .iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect() + } + + /// Updates the metadata on the Field with this metadata, if it is not empty. + pub fn add_to_field(&self, field: Field) -> Field { + if self.inner.is_empty() { + return field; + } + + field.with_metadata(self.to_hashmap()) + } +} + +impl From<&Field> for FieldMetadata { + fn from(field: &Field) -> Self { + Self::new_from_field(field) + } +} + +impl From> for FieldMetadata { + fn from(inner: BTreeMap) -> Self { + Self::new(inner) + } +} + +impl From> for FieldMetadata { + fn from(map: std::collections::HashMap) -> Self { + Self::new(map.into_iter().collect()) + } +} + +/// From reference +impl From<&std::collections::HashMap> for FieldMetadata { + fn from(map: &std::collections::HashMap) -> Self { + let inner = map + .iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(); + Self::new(inner) + } +} + +/// From hashbrown map +impl From> for FieldMetadata { + fn from(map: HashMap) -> Self { + let inner = map.into_iter().collect(); + Self::new(inner) + } +} + +impl From<&HashMap> for FieldMetadata { + fn from(map: &HashMap) -> Self { + let inner = map + .into_iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(); + Self::new(inner) + } +} diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs index aa538f6dee81..11685b4c17ea 100644 --- a/datafusion/core/tests/dataframe/mod.rs +++ b/datafusion/core/tests/dataframe/mod.rs @@ -33,6 +33,7 @@ use arrow::error::ArrowError; use arrow::util::pretty::pretty_format_batches; use arrow_schema::{SortOptions, TimeUnit}; use datafusion::{assert_batches_eq, dataframe}; +use datafusion_common::metadata::FieldMetadata; use datafusion_functions_aggregate::count::{count_all, count_all_window}; use datafusion_functions_aggregate::expr_fn::{ array_agg, avg, avg_distinct, count, count_distinct, max, median, min, sum, @@ -71,9 +72,7 @@ use datafusion_common_runtime::SpawnedTask; use datafusion_datasource::file_format::format_as_file_type; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnv; -use datafusion_expr::expr::{ - FieldMetadata, GroupingSet, NullTreatment, Sort, WindowFunction, -}; +use datafusion_expr::expr::{GroupingSet, NullTreatment, Sort, WindowFunction}; use datafusion_expr::var_provider::{VarProvider, VarType}; use datafusion_expr::{ cast, col, create_udf, exists, in_subquery, lit, out_ref_col, placeholder, diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index f1af66de9b59..fb1371da6ceb 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -34,13 +34,13 @@ use datafusion::execution::context::{FunctionFactory, RegisterFunction, SessionS use datafusion::prelude::*; use datafusion::{execution::registry::FunctionRegistry, test_util}; use datafusion_common::cast::{as_float64_array, as_int32_array}; +use datafusion_common::metadata::FieldMetadata; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::utils::take_function_args; use datafusion_common::{ assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_datafusion_err, exec_err, not_impl_err, plan_err, DFSchema, DataFusionError, Result, ScalarValue, }; -use datafusion_expr::expr::FieldMetadata; use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; use datafusion_expr::{ lit_with_metadata, Accumulator, ColumnarValue, CreateFunction, CreateFunctionBody, diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index a12cf268de2b..b47f7412b121 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -18,7 +18,7 @@ //! Logical Expressions: [`Expr`] use std::cmp::Ordering; -use std::collections::{BTreeMap, HashSet}; +use std::collections::HashSet; use std::fmt::{self, Display, Formatter, Write}; use std::hash::{Hash, Hasher}; use std::mem; @@ -32,6 +32,7 @@ use crate::{ExprSchemable, Operator, Signature, WindowFrame, WindowUDF}; use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::cse::{HashNode, NormalizeEq, Normalizeable}; +use datafusion_common::metadata::FieldMetadata; use datafusion_common::tree_node::{ Transformed, TransformedResult, TreeNode, TreeNodeContainer, TreeNodeRecursion, }; @@ -447,235 +448,6 @@ impl<'a> TreeNodeContainer<'a, Self> for Expr { } } -/// Literal metadata -/// -/// Stores metadata associated with a literal expressions -/// and is designed to be fast to `clone`. -/// -/// This structure is used to store metadata associated with a literal expression, and it -/// corresponds to the `metadata` field on [`Field`]. -/// -/// # Example: Create [`FieldMetadata`] from a [`Field`] -/// ``` -/// # use std::collections::HashMap; -/// # use datafusion_expr::expr::FieldMetadata; -/// # use arrow::datatypes::{Field, DataType}; -/// # let field = Field::new("c1", DataType::Int32, true) -/// # .with_metadata(HashMap::from([("foo".to_string(), "bar".to_string())])); -/// // Create a new `FieldMetadata` instance from a `Field` -/// let metadata = FieldMetadata::new_from_field(&field); -/// // There is also a `From` impl: -/// let metadata = FieldMetadata::from(&field); -/// ``` -/// -/// # Example: Update a [`Field`] with [`FieldMetadata`] -/// ``` -/// # use datafusion_expr::expr::FieldMetadata; -/// # use arrow::datatypes::{Field, DataType}; -/// # let field = Field::new("c1", DataType::Int32, true); -/// # let metadata = FieldMetadata::new_from_field(&field); -/// // Add any metadata from `FieldMetadata` to `Field` -/// let updated_field = metadata.add_to_field(field); -/// ``` -/// -#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] -pub struct FieldMetadata { - /// The inner metadata of a literal expression, which is a map of string - /// keys to string values. - /// - /// Note this is not a `HashMap` because `HashMap` does not provide - /// implementations for traits like `Debug` and `Hash`. - inner: Arc>, -} - -impl Default for FieldMetadata { - fn default() -> Self { - Self::new_empty() - } -} - -impl FieldMetadata { - /// Create a new empty metadata instance. - pub fn new_empty() -> Self { - Self { - inner: Arc::new(BTreeMap::new()), - } - } - - /// Merges two optional `FieldMetadata` instances, overwriting any existing - /// keys in `m` with keys from `n` if present. - /// - /// This function is commonly used in alias operations, particularly for literals - /// with metadata. When creating an alias expression, the metadata from the original - /// expression (such as a literal) is combined with any metadata specified on the alias. - /// - /// # Arguments - /// - /// * `m` - The first metadata (typically from the original expression like a literal) - /// * `n` - The second metadata (typically from the alias definition) - /// - /// # Merge Strategy - /// - /// - If both metadata instances exist, they are merged with `n` taking precedence - /// - Keys from `n` will overwrite keys from `m` if they have the same name - /// - If only one metadata instance exists, it is returned unchanged - /// - If neither exists, `None` is returned - /// - /// # Example usage - /// ```rust - /// use datafusion_expr::expr::FieldMetadata; - /// use std::collections::BTreeMap; - /// - /// // Create metadata for a literal expression - /// let literal_metadata = Some(FieldMetadata::from(BTreeMap::from([ - /// ("source".to_string(), "constant".to_string()), - /// ("type".to_string(), "int".to_string()), - /// ]))); - /// - /// // Create metadata for an alias - /// let alias_metadata = Some(FieldMetadata::from(BTreeMap::from([ - /// ("description".to_string(), "answer".to_string()), - /// ("source".to_string(), "user".to_string()), // This will override literal's "source" - /// ]))); - /// - /// // Merge the metadata - /// let merged = FieldMetadata::merge_options( - /// literal_metadata.as_ref(), - /// alias_metadata.as_ref(), - /// ); - /// - /// // Result contains: {"source": "user", "type": "int", "description": "answer"} - /// assert!(merged.is_some()); - /// ``` - pub fn merge_options( - m: Option<&FieldMetadata>, - n: Option<&FieldMetadata>, - ) -> Option { - match (m, n) { - (Some(m), Some(n)) => { - let mut merged = m.clone(); - merged.extend(n.clone()); - Some(merged) - } - (Some(m), None) => Some(m.clone()), - (None, Some(n)) => Some(n.clone()), - (None, None) => None, - } - } - - /// Create a new metadata instance from a `Field`'s metadata. - pub fn new_from_field(field: &Field) -> Self { - let inner = field - .metadata() - .iter() - .map(|(k, v)| (k.to_string(), v.to_string())) - .collect(); - Self { - inner: Arc::new(inner), - } - } - - /// Create a new metadata instance from a map of string keys to string values. - pub fn new(inner: BTreeMap) -> Self { - Self { - inner: Arc::new(inner), - } - } - - /// Get the inner metadata as a reference to a `BTreeMap`. - pub fn inner(&self) -> &BTreeMap { - &self.inner - } - - /// Return the inner metadata - pub fn into_inner(self) -> Arc> { - self.inner - } - - /// Adds metadata from `other` into `self`, overwriting any existing keys. - pub fn extend(&mut self, other: Self) { - if other.is_empty() { - return; - } - let other = Arc::unwrap_or_clone(other.into_inner()); - Arc::make_mut(&mut self.inner).extend(other); - } - - /// Returns true if the metadata is empty. - pub fn is_empty(&self) -> bool { - self.inner.is_empty() - } - - /// Returns the number of key-value pairs in the metadata. - pub fn len(&self) -> usize { - self.inner.len() - } - - /// Convert this `FieldMetadata` into a `HashMap` - pub fn to_hashmap(&self) -> std::collections::HashMap { - self.inner - .iter() - .map(|(k, v)| (k.to_string(), v.to_string())) - .collect() - } - - /// Updates the metadata on the Field with this metadata, if it is not empty. - pub fn add_to_field(&self, field: Field) -> Field { - if self.inner.is_empty() { - return field; - } - - field.with_metadata(self.to_hashmap()) - } -} - -impl From<&Field> for FieldMetadata { - fn from(field: &Field) -> Self { - Self::new_from_field(field) - } -} - -impl From> for FieldMetadata { - fn from(inner: BTreeMap) -> Self { - Self::new(inner) - } -} - -impl From> for FieldMetadata { - fn from(map: std::collections::HashMap) -> Self { - Self::new(map.into_iter().collect()) - } -} - -/// From reference -impl From<&std::collections::HashMap> for FieldMetadata { - fn from(map: &std::collections::HashMap) -> Self { - let inner = map - .iter() - .map(|(k, v)| (k.to_string(), v.to_string())) - .collect(); - Self::new(inner) - } -} - -/// From hashbrown map -impl From> for FieldMetadata { - fn from(map: HashMap) -> Self { - let inner = map.into_iter().collect(); - Self::new(inner) - } -} - -impl From<&HashMap> for FieldMetadata { - fn from(map: &HashMap) -> Self { - let inner = map - .into_iter() - .map(|(k, v)| (k.to_string(), v.to_string())) - .collect(); - Self::new(inner) - } -} - /// The metadata used in [`Field::metadata`]. /// /// This represents the metadata associated with an Arrow [`Field`]. The metadata consists of key-value pairs. diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 00708fd3411d..94b855addcaa 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -17,8 +17,8 @@ use super::{Between, Expr, Like}; use crate::expr::{ - AggregateFunction, AggregateFunctionParams, Alias, BinaryExpr, Cast, FieldMetadata, - InList, InSubquery, Placeholder, ScalarFunction, TryCast, Unnest, WindowFunction, + AggregateFunction, AggregateFunctionParams, Alias, BinaryExpr, Cast, InList, + InSubquery, Placeholder, ScalarFunction, TryCast, Unnest, WindowFunction, WindowFunctionParams, }; use crate::type_coercion::functions::{ @@ -28,6 +28,7 @@ use crate::udf::ReturnFieldArgs; use crate::{utils, LogicalPlan, Projection, Subquery, WindowFunctionDefinition}; use arrow::compute::can_cast_types; use arrow::datatypes::{DataType, Field, FieldRef}; +use datafusion_common::metadata::FieldMetadata; use datafusion_common::{ not_impl_err, plan_datafusion_err, plan_err, Column, DataFusionError, ExprSchema, Result, Spans, TableReference, diff --git a/datafusion/expr/src/literal.rs b/datafusion/expr/src/literal.rs index c4bd43bc0a62..335d7b471f5f 100644 --- a/datafusion/expr/src/literal.rs +++ b/datafusion/expr/src/literal.rs @@ -17,9 +17,8 @@ //! Literal module contains foundational types that are used to represent literals in DataFusion. -use crate::expr::FieldMetadata; use crate::Expr; -use datafusion_common::ScalarValue; +use datafusion_common::{metadata::FieldMetadata, ScalarValue}; /// Create a literal expression pub fn lit(n: T) -> Expr { diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 7a283b0420d3..c04b9c015615 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -25,7 +25,7 @@ use std::iter::once; use std::sync::Arc; use crate::dml::CopyTo; -use crate::expr::{Alias, FieldMetadata, PlannedReplaceSelectItem, Sort as SortExpr}; +use crate::expr::{Alias, PlannedReplaceSelectItem, Sort as SortExpr}; use crate::expr_rewriter::{ coerce_plan_expr_for_schema, normalize_col, normalize_col_with_schemas_and_ambiguity_check, normalize_cols, normalize_sorts, @@ -53,6 +53,7 @@ use arrow::compute::can_cast_types; use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef}; use datafusion_common::display::ToStringifiedPlan; use datafusion_common::file_options::file_type::FileType; +use datafusion_common::metadata::FieldMetadata; use datafusion_common::{ exec_err, get_target_functional_dependencies, internal_datafusion_err, not_impl_err, plan_datafusion_err, plan_err, Column, Constraints, DFSchema, DFSchemaRef, @@ -2708,12 +2709,12 @@ mod tests { assert_snapshot!(plan, @r" Union - Cross Join: + Cross Join: SubqueryAlias: left Values: (Int32(1)) SubqueryAlias: right Values: (Int32(1)) - Cross Join: + Cross Join: SubqueryAlias: left Values: (Int32(1)) SubqueryAlias: right diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index c40906239073..204ce14e37d8 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -29,6 +29,7 @@ use std::sync::Arc; use datafusion_common::{ cast::{as_large_list_array, as_list_array}, + metadata::FieldMetadata, tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter}, }; use datafusion_common::{ @@ -57,7 +58,6 @@ use crate::simplify_expressions::unwrap_cast::{ unwrap_cast_in_comparison_for_binary, }; use crate::simplify_expressions::SimplifyInfo; -use datafusion_expr::expr::FieldMetadata; use datafusion_expr_common::casts::try_cast_literal_to_type; use indexmap::IndexSet; use regex::Regex; diff --git a/datafusion/physical-expr/src/expressions/literal.rs b/datafusion/physical-expr/src/expressions/literal.rs index 6e425ee439d6..94e91d43a1c4 100644 --- a/datafusion/physical-expr/src/expressions/literal.rs +++ b/datafusion/physical-expr/src/expressions/literal.rs @@ -28,8 +28,8 @@ use arrow::{ datatypes::{DataType, Schema}, record_batch::RecordBatch, }; +use datafusion_common::metadata::FieldMetadata; use datafusion_common::{Result, ScalarValue}; -use datafusion_expr::expr::FieldMetadata; use datafusion_expr::Expr; use datafusion_expr_common::columnar_value::ColumnarValue; use datafusion_expr_common::interval_arithmetic::Interval; diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index 73df60c42e96..7790380dffd5 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -25,13 +25,12 @@ use crate::{ use arrow::datatypes::Schema; use datafusion_common::config::ConfigOptions; +use datafusion_common::metadata::FieldMetadata; use datafusion_common::{ exec_err, not_impl_err, plan_err, DFSchema, Result, ScalarValue, ToDFSchema, }; use datafusion_expr::execution_props::ExecutionProps; -use datafusion_expr::expr::{ - Alias, Cast, FieldMetadata, InList, Placeholder, ScalarFunction, -}; +use datafusion_expr::expr::{Alias, Cast, InList, Placeholder, ScalarFunction}; use datafusion_expr::var_provider::is_system_variables; use datafusion_expr::var_provider::VarType; use datafusion_expr::{ diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 8b59c97da432..c912da66cef7 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -608,7 +608,8 @@ pub fn serialize_expr( None => None, }, nullable: field.as_ref().map(|f| f.is_nullable()), - metadata: field.as_ref() + metadata: field + .as_ref() .map(|f| f.metadata().clone()) .unwrap_or(HashMap::new()), })), From 2c98b28826d83653bd0dd110a8cb7133c8e195ca Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 9 Oct 2025 14:18:53 -0500 Subject: [PATCH 05/46] maybe working from proto --- datafusion/expr/src/expr.rs | 5 +++++ .../proto/src/logical_plan/from_proto.rs | 22 ++++++++++++++----- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index b47f7412b121..9ee058e1a08b 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1153,6 +1153,11 @@ impl Placeholder { field: data_type.map(|dt| Arc::new(Field::new("", dt, true))), } } + + /// Create a new Placeholder expression from a Field + pub fn new_with_metadata(id: String, field: Option) -> Self { + Self { id, field } + } } /// Grouping sets diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index cbfa15183b5c..ae395afaeb23 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use arrow::datatypes::Field; use datafusion::execution::registry::FunctionRegistry; use datafusion_common::{ exec_datafusion_err, internal_err, plan_datafusion_err, NullEquality, @@ -595,12 +596,23 @@ pub fn parse_expr( ExprType::Rollup(RollupNode { expr }) => Ok(Expr::GroupingSet( GroupingSet::Rollup(parse_exprs(expr, registry, codec)?), )), - ExprType::Placeholder(PlaceholderNode { id, data_type }) => match data_type { + ExprType::Placeholder(PlaceholderNode { + id, + data_type, + nullable, + metadata, + }) => match data_type { None => Ok(Expr::Placeholder(Placeholder::new(id.clone(), None))), - Some(data_type) => Ok(Expr::Placeholder(Placeholder::new( - id.clone(), - Some(data_type.try_into()?), - ))), + Some(data_type) => { + // Foofy + let field = + Field::new("", data_type.try_into()?, nullable.unwrap_or(true)) + .with_metadata(metadata.clone()); + Ok(Expr::Placeholder(Placeholder::new_with_metadata( + id.clone(), + Some(field.into()), + ))) + } }, } } From cbf75aabc08a2a6cf4b129ef7d1b31acffb9354b Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 9 Oct 2025 15:29:08 -0500 Subject: [PATCH 06/46] oof --- datafusion/common/src/param_value.rs | 45 ++++++++++++++----- datafusion/core/src/execution/context/mod.rs | 11 +++-- .../core/src/execution/session_state.rs | 12 ++--- datafusion/core/tests/dataframe/mod.rs | 4 +- datafusion/expr/src/logical_plan/builder.rs | 4 +- datafusion/expr/src/logical_plan/plan.rs | 2 +- datafusion/expr/src/logical_plan/statement.rs | 4 +- datafusion/sql/src/expr/mod.rs | 11 +++-- datafusion/sql/src/expr/value.rs | 8 ++-- datafusion/sql/src/planner.rs | 45 ++++++++++--------- datafusion/sql/src/statement.rs | 17 +++---- 11 files changed, 97 insertions(+), 66 deletions(-) diff --git a/datafusion/common/src/param_value.rs b/datafusion/common/src/param_value.rs index 7582cff56f87..8cfdff6d6e1d 100644 --- a/datafusion/common/src/param_value.rs +++ b/datafusion/common/src/param_value.rs @@ -16,22 +16,23 @@ // under the License. use crate::error::{_plan_datafusion_err, _plan_err}; +use crate::metadata::FieldMetadata; use crate::{Result, ScalarValue}; -use arrow::datatypes::DataType; +use arrow::datatypes::FieldRef; use std::collections::HashMap; /// The parameter value corresponding to the placeholder #[derive(Debug, Clone)] pub enum ParamValues { /// For positional query parameters, like `SELECT * FROM test WHERE a > $1 AND b = $2` - List(Vec), + List(Vec<(ScalarValue, Option)>), /// For named query parameters, like `SELECT * FROM test WHERE a > $foo AND b = $goo` - Map(HashMap), + Map(HashMap)>), } impl ParamValues { /// Verify parameter list length and type - pub fn verify(&self, expect: &[DataType]) -> Result<()> { + pub fn verify(&self, expect: &[FieldRef]) -> Result<()> { match self { ParamValues::List(list) => { // Verify if the number of params matches the number of values @@ -45,8 +46,8 @@ impl ParamValues { // Verify if the types of the params matches the types of the values let iter = expect.iter().zip(list.iter()); - for (i, (param_type, value)) in iter.enumerate() { - if *param_type != value.data_type() { + for (i, (param_type, (value, maybe_metadata))) in iter.enumerate() { + if *param_type.data_type() != value.data_type() { return _plan_err!( "Expected parameter of type {}, got {:?} at index {}", param_type, @@ -54,6 +55,19 @@ impl ParamValues { i ); } + + if let Some(expected_metadata) = maybe_metadata { + // Probably too strict of a comparison (this is an example of where + // the concept of type equality would be useful) + if &expected_metadata.to_hashmap() != param_type.metadata() { + return _plan_err!( + "Expected parameter with metadata {:?}, got {:?} at index {}", + expected_metadata, + param_type.metadata(), + i + ); + } + } } Ok(()) } @@ -65,7 +79,10 @@ impl ParamValues { } } - pub fn get_placeholders_with_values(&self, id: &str) -> Result { + pub fn get_placeholders_with_values( + &self, + id: &str, + ) -> Result<(ScalarValue, Option)> { match self { ParamValues::List(list) => { if id.is_empty() { @@ -99,7 +116,7 @@ impl ParamValues { impl From> for ParamValues { fn from(value: Vec) -> Self { - Self::List(value) + Self::List(value.into_iter().map(|v| (v, None)).collect()) } } @@ -108,8 +125,10 @@ where K: Into, { fn from(value: Vec<(K, ScalarValue)>) -> Self { - let value: HashMap = - value.into_iter().map(|(k, v)| (k.into(), v)).collect(); + let value: HashMap)> = value + .into_iter() + .map(|(k, v)| (k.into(), (v, None))) + .collect(); Self::Map(value) } } @@ -119,8 +138,10 @@ where K: Into, { fn from(value: HashMap) -> Self { - let value: HashMap = - value.into_iter().map(|(k, v)| (k.into(), v)).collect(); + let value: HashMap)> = value + .into_iter() + .map(|(k, v)| (k.into(), (v, None))) + .collect(); Self::Map(value) } } diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index a8148b80495e..f04b9a9ce0bb 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -64,6 +64,7 @@ use datafusion_catalog::{ DynamicFileCatalog, TableFunction, TableFunctionImpl, UrlTableFactory, }; use datafusion_common::config::ConfigOptions; +use datafusion_common::metadata::FieldMetadata; use datafusion_common::{ config::{ConfigExtension, TableOptions}, exec_datafusion_err, exec_err, internal_datafusion_err, not_impl_err, @@ -1238,10 +1239,10 @@ impl SessionContext { })?; // Only allow literals as parameters for now. - let mut params: Vec = parameters + let mut params: Vec<(ScalarValue, Option)> = parameters .into_iter() .map(|e| match e { - Expr::Literal(scalar, _) => Ok(scalar), + Expr::Literal(scalar, metadata) => Ok((scalar, metadata)), _ => not_impl_err!("Unsupported parameter type: {}", e), }) .collect::>()?; @@ -1259,7 +1260,11 @@ impl SessionContext { params = params .into_iter() .zip(prepared.data_types.iter()) - .map(|(e, dt)| e.cast_to(dt)) + .map(|(e, dt)| -> Result<_> { + // This is fishy...we're casting storage without checking if an + // extension type supports the destination + Ok((e.0.cast_to(dt.data_type())?, e.1)) + }) .collect::>()?; } diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index 6749ddd7ab8d..4fe6b57130aa 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -30,7 +30,7 @@ use crate::datasource::provider_as_source; use crate::execution::context::{EmptySerializerRegistry, FunctionFactory, QueryPlanner}; use crate::execution::SessionStateDefaults; use crate::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner}; -use arrow::datatypes::DataType; +use arrow_schema::{DataType, FieldRef}; use datafusion_catalog::information_schema::{ InformationSchemaProvider, INFORMATION_SCHEMA, }; @@ -115,11 +115,11 @@ use uuid::Uuid; /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let state = SessionStateBuilder::new() -/// .with_config(SessionConfig::new()) +/// .with_config(SessionConfig::new()) /// .with_runtime_env(Arc::new(RuntimeEnv::default())) /// .with_default_features() /// .build(); -/// Ok(()) +/// Ok(()) /// # } /// ``` /// @@ -872,7 +872,7 @@ impl SessionState { pub(crate) fn store_prepared( &mut self, name: String, - data_types: Vec, + data_types: Vec, plan: Arc, ) -> datafusion_common::Result<()> { match self.prepared_plans.entry(name) { @@ -1322,7 +1322,7 @@ impl SessionStateBuilder { /// let url = Url::try_from("file://").unwrap(); /// let object_store = object_store::local::LocalFileSystem::new(); /// let state = SessionStateBuilder::new() - /// .with_config(SessionConfig::new()) + /// .with_config(SessionConfig::new()) /// .with_object_store(&url, Arc::new(object_store)) /// .with_default_features() /// .build(); @@ -2011,7 +2011,7 @@ impl SimplifyInfo for SessionSimplifyProvider<'_> { #[derive(Debug)] pub(crate) struct PreparedPlan { /// Data types of the parameters - pub(crate) data_types: Vec, + pub(crate) data_types: Vec, /// The prepared logical plan pub(crate) plan: Arc, } diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs index 11685b4c17ea..979ada2bc6bb 100644 --- a/datafusion/core/tests/dataframe/mod.rs +++ b/datafusion/core/tests/dataframe/mod.rs @@ -66,7 +66,7 @@ use datafusion_catalog::TableProvider; use datafusion_common::test_util::{batches_to_sort_string, batches_to_string}; use datafusion_common::{ assert_contains, internal_datafusion_err, Constraint, Constraints, DFSchema, - DataFusionError, ParamValues, ScalarValue, TableReference, UnnestOptions, + DataFusionError, ScalarValue, TableReference, UnnestOptions, }; use datafusion_common_runtime::SpawnedTask; use datafusion_datasource::file_format::format_as_file_type; @@ -2464,7 +2464,7 @@ async fn filtered_aggr_with_param_values() -> Result<()> { let df = ctx .sql("select count (c2) filter (where c3 > $1) from table1") .await? - .with_param_values(ParamValues::List(vec![ScalarValue::from(10u64)])); + .with_param_values(vec![ScalarValue::from(10u64)]); let df_results = df?.collect().await?; assert_snapshot!( diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index c04b9c015615..e291349c7a66 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -50,7 +50,7 @@ use crate::{ use super::dml::InsertOp; use arrow::compute::can_cast_types; -use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef}; +use arrow::datatypes::{DataType, Field, FieldRef, Fields, Schema, SchemaRef}; use datafusion_common::display::ToStringifiedPlan; use datafusion_common::file_options::file_type::FileType; use datafusion_common::metadata::FieldMetadata; @@ -623,7 +623,7 @@ impl LogicalPlanBuilder { } /// Make a builder for a prepare logical plan from the builder's plan - pub fn prepare(self, name: String, data_types: Vec) -> Result { + pub fn prepare(self, name: String, data_types: Vec) -> Result { Ok(Self::new(LogicalPlan::Statement(Statement::Prepare( Prepare { name, diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index ebb1df9f1126..8369f0325d97 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -1464,7 +1464,7 @@ impl LogicalPlan { let transformed_expr = e.transform_up(|e| { if let Expr::Placeholder(Placeholder { id, .. }) = e { let value = param_values.get_placeholders_with_values(&id)?; - Ok(Transformed::yes(Expr::Literal(value, None))) + Ok(Transformed::yes(Expr::Literal(value.0, value.1))) } else { Ok(Transformed::no(e)) } diff --git a/datafusion/expr/src/logical_plan/statement.rs b/datafusion/expr/src/logical_plan/statement.rs index 6d3fe9fa75ac..6cff6b180423 100644 --- a/datafusion/expr/src/logical_plan/statement.rs +++ b/datafusion/expr/src/logical_plan/statement.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use arrow::datatypes::DataType; +use arrow::datatypes::FieldRef; use datafusion_common::{DFSchema, DFSchemaRef}; use itertools::Itertools as _; use std::fmt::{self, Display}; @@ -192,7 +192,7 @@ pub struct Prepare { /// The name of the statement pub name: String, /// Data types of the parameters ([`Expr::Placeholder`]) - pub data_types: Vec, + pub data_types: Vec, /// The logical plan of the statements pub input: Arc, } diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 23426701409e..d74415faccf4 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -287,7 +287,7 @@ impl SqlToRel<'_, S> { schema, planner_context, )?), - self.convert_data_type(&data_type)?, + self.convert_data_type(&data_type)?.data_type().clone(), ))) } @@ -297,7 +297,7 @@ impl SqlToRel<'_, S> { uses_odbc_syntax: _, }) => Ok(Expr::Cast(Cast::new( Box::new(lit(value.into_string().unwrap())), - self.convert_data_type(&data_type)?, + self.convert_data_type(&data_type)?.data_type().clone(), ))), SQLExpr::IsNull(expr) => Ok(Expr::IsNull(Box::new( @@ -974,7 +974,7 @@ impl SqlToRel<'_, S> { // numeric constants are treated as seconds (rather as nanoseconds) // to align with postgres / duckdb semantics - let expr = match &dt { + let expr = match dt.data_type() { DataType::Timestamp(TimeUnit::Nanosecond, tz) if expr.get_type(schema)? == DataType::Int64 => { @@ -986,7 +986,10 @@ impl SqlToRel<'_, S> { _ => expr, }; - Ok(Expr::Cast(Cast::new(Box::new(expr), dt))) + Ok(Expr::Cast(Cast::new( + Box::new(expr), + dt.data_type().clone(), + ))) } /// Extracts the root expression and access chain from a compound expression. diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs index 7075a1afd9dd..d71f3e3f9ea9 100644 --- a/datafusion/sql/src/expr/value.rs +++ b/datafusion/sql/src/expr/value.rs @@ -20,7 +20,7 @@ use arrow::compute::kernels::cast_utils::{ parse_interval_month_day_nano_config, IntervalParseConfig, IntervalUnit, }; use arrow::datatypes::{ - i256, DataType, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, + i256, FieldRef, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, }; use bigdecimal::num_bigint::BigInt; use bigdecimal::{BigDecimal, Signed, ToPrimitive}; @@ -45,7 +45,7 @@ impl SqlToRel<'_, S> { pub(crate) fn parse_value( &self, value: Value, - param_data_types: &[DataType], + param_data_types: &[FieldRef], ) -> Result { match value { Value::Number(n, _) => self.parse_sql_number(&n, false), @@ -108,7 +108,7 @@ impl SqlToRel<'_, S> { /// number 1, 2, ... etc. For example, `$1` is the first placeholder; $2 is the second one and so on. fn create_placeholder_expr( param: String, - param_data_types: &[DataType], + param_data_types: &[FieldRef], ) -> Result { // Parse the placeholder as a number because it is the only support from sqlparser and postgres let index = param[1..].parse::(); @@ -133,7 +133,7 @@ impl SqlToRel<'_, S> { // Data type of the parameter debug!("type of param {param} param_data_types[idx]: {param_type:?}"); - Ok(Expr::Placeholder(Placeholder::new( + Ok(Expr::Placeholder(Placeholder::new_with_metadata( param, param_type.cloned(), ))) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index e93c5e066b66..a233e0f1fdb1 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -256,7 +256,7 @@ impl IdentNormalizer { pub struct PlannerContext { /// Data types for numbered parameters ($1, $2, etc), if supplied /// in `PREPARE` statement - prepare_param_data_types: Arc>, + prepare_param_data_types: Arc>, /// Map of CTE name to logical plan of the WITH clause. /// Use `Arc` to allow cheap cloning ctes: HashMap>, @@ -290,7 +290,7 @@ impl PlannerContext { /// Update the PlannerContext with provided prepare_param_data_types pub fn with_prepare_param_data_types( mut self, - prepare_param_data_types: Vec, + prepare_param_data_types: Vec, ) -> Self { self.prepare_param_data_types = prepare_param_data_types.into(); self @@ -347,7 +347,7 @@ impl PlannerContext { } /// Return the types of parameters (`$1`, `$2`, etc) if known - pub fn prepare_param_data_types(&self) -> &[DataType] { + pub fn prepare_param_data_types(&self) -> &[FieldRef] { &self.prepare_param_data_types } @@ -433,11 +433,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { .options .iter() .any(|x| x.option == ColumnOption::NotNull); - fields.push(Field::new( - self.ident_normalizer.normalize(column.name), - data_type, - !not_nullable, - )); + fields.push( + data_type + .as_ref() + .clone() + .with_name(self.ident_normalizer.normalize(column.name)) + .with_nullable(!not_nullable), + ); } Ok(Schema::new(fields)) @@ -587,11 +589,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { }) } - pub(crate) fn convert_data_type(&self, sql_type: &SQLDataType) -> Result { + pub(crate) fn convert_data_type(&self, sql_type: &SQLDataType) -> Result { // First check if any of the registered type_planner can handle this type if let Some(type_planner) = self.context_provider.get_type_planner() { if let Some(data_type) = type_planner.plan_type(sql_type)? { - return Ok(data_type); + return Ok(Field::new("", data_type, true).into()); } } @@ -600,7 +602,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_sql_type)) => { // Arrays may be multi-dimensional. let inner_data_type = self.convert_data_type(inner_sql_type)?; - Ok(DataType::new_list(inner_data_type, true)) + Ok(Field::new("", DataType::List(inner_data_type), true).into()) } SQLDataType::Array(ArrayElemTypeDef::SquareBracket( inner_sql_type, @@ -608,19 +610,22 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { )) => { let inner_data_type = self.convert_data_type(inner_sql_type)?; if let Some(array_size) = maybe_array_size { - Ok(DataType::new_fixed_size_list( - inner_data_type, - *array_size as i32, + Ok(Field::new( + "", + DataType::FixedSizeList(inner_data_type, *array_size as i32), true, - )) + ) + .into()) } else { - Ok(DataType::new_list(inner_data_type, true)) + Ok(Field::new("", DataType::List(inner_data_type), true).into()) } } SQLDataType::Array(ArrayElemTypeDef::None) => { not_impl_err!("Arrays with unspecified type is not supported") } - other => self.convert_simple_data_type(other), + other => { + Ok(Field::new("", self.convert_simple_data_type(other)?, true).into()) + } } } @@ -739,11 +744,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Some(ident) => ident.clone(), None => Ident::new(format!("c{idx}")), }; - Ok(Arc::new(Field::new( - self.ident_normalizer.normalize(field_name), - data_type, - true, - ))) + Ok(data_type.as_ref().clone().with_name(self.ident_normalizer.normalize(field_name))) }) .collect::>>()?; Ok(DataType::Struct(Fields::from(fields))) diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index d1d64091c8cc..87076950d91a 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -29,7 +29,7 @@ use crate::planner::{ }; use crate::utils::normalize_ident; -use arrow::datatypes::{DataType, Fields}; +use arrow::datatypes::{Field, FieldRef, Fields}; use datafusion_common::error::_plan_err; use datafusion_common::parsers::CompressionTypeVariant; use datafusion_common::{ @@ -730,7 +730,7 @@ impl SqlToRel<'_, S> { statement, } => { // Convert parser data types to DataFusion data types - let mut data_types: Vec = data_types + let mut data_types: Vec = data_types .into_iter() .map(|t| self.convert_data_type(&t)) .collect::>()?; @@ -746,7 +746,7 @@ impl SqlToRel<'_, S> { )?; if data_types.is_empty() { - let map_types = plan.get_parameter_types()?; + let map_types = plan.get_parameter_fields()?; let param_types: Vec<_> = (1..=map_types.len()) .filter_map(|i| { let key = format!("${i}"); @@ -1203,7 +1203,7 @@ impl SqlToRel<'_, S> { Ok(OperateFunctionArg { name: arg.name, default_expr, - data_type, + data_type: data_type.data_type().clone(), }) }) .collect::>>(); @@ -1221,7 +1221,9 @@ impl SqlToRel<'_, S> { // Convert resulting expression to data fusion expression // let arg_types = args.as_ref().map(|arg| { - arg.iter().map(|t| t.data_type.clone()).collect::>() + arg.iter() + .map(|t| Arc::new(Field::new("", t.data_type.clone(), true))) + .collect::>() }); let mut planner_context = PlannerContext::new() .with_prepare_param_data_types(arg_types.unwrap_or_default()); @@ -1264,7 +1266,7 @@ impl SqlToRel<'_, S> { or_replace, temporary, name, - return_type, + return_type: return_type.map(|f| f.data_type().clone()), args, params, schema: DFSchemaRef::new(DFSchema::empty()), @@ -2105,8 +2107,7 @@ impl SqlToRel<'_, S> { idx + 1 ) })?; - let dt = field.data_type().clone(); - let _ = prepare_param_data_types.insert(name, dt); + let _ = prepare_param_data_types.insert(name, Arc::clone(field)); } } } From 9982d32f6ac669385c9398f35a54523d356c6aa6 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 9 Oct 2025 15:58:37 -0500 Subject: [PATCH 07/46] maybe fix build --- datafusion/proto/proto/datafusion.proto | 1 + datafusion/proto/src/generated/pbjson.rs | 17 ++++++++++++ datafusion/proto/src/generated/prost.rs | 2 ++ datafusion/proto/src/logical_plan/mod.rs | 35 ++++++++++++++++++++---- 4 files changed, 50 insertions(+), 5 deletions(-) diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index b62eae739f40..1e6e4a0397c3 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -181,6 +181,7 @@ message PrepareNode { string name = 1; repeated datafusion_common.ArrowType data_types = 2; LogicalPlanNode input = 3; + repeated datafusion_common.Field fields = 4; } message CreateCatalogSchemaNode { diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 1754a1e77784..f8cce7afc2ea 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -18834,6 +18834,9 @@ impl serde::Serialize for PrepareNode { if self.input.is_some() { len += 1; } + if !self.fields.is_empty() { + len += 1; + } let mut struct_ser = serializer.serialize_struct("datafusion.PrepareNode", len)?; if !self.name.is_empty() { struct_ser.serialize_field("name", &self.name)?; @@ -18844,6 +18847,9 @@ impl serde::Serialize for PrepareNode { if let Some(v) = self.input.as_ref() { struct_ser.serialize_field("input", v)?; } + if !self.fields.is_empty() { + struct_ser.serialize_field("fields", &self.fields)?; + } struct_ser.end() } } @@ -18858,6 +18864,7 @@ impl<'de> serde::Deserialize<'de> for PrepareNode { "data_types", "dataTypes", "input", + "fields", ]; #[allow(clippy::enum_variant_names)] @@ -18865,6 +18872,7 @@ impl<'de> serde::Deserialize<'de> for PrepareNode { Name, DataTypes, Input, + Fields, } impl<'de> serde::Deserialize<'de> for GeneratedField { fn deserialize(deserializer: D) -> std::result::Result @@ -18889,6 +18897,7 @@ impl<'de> serde::Deserialize<'de> for PrepareNode { "name" => Ok(GeneratedField::Name), "dataTypes" | "data_types" => Ok(GeneratedField::DataTypes), "input" => Ok(GeneratedField::Input), + "fields" => Ok(GeneratedField::Fields), _ => Err(serde::de::Error::unknown_field(value, FIELDS)), } } @@ -18911,6 +18920,7 @@ impl<'de> serde::Deserialize<'de> for PrepareNode { let mut name__ = None; let mut data_types__ = None; let mut input__ = None; + let mut fields__ = None; while let Some(k) = map_.next_key()? { match k { GeneratedField::Name => { @@ -18931,12 +18941,19 @@ impl<'de> serde::Deserialize<'de> for PrepareNode { } input__ = map_.next_value()?; } + GeneratedField::Fields => { + if fields__.is_some() { + return Err(serde::de::Error::duplicate_field("fields")); + } + fields__ = Some(map_.next_value()?); + } } } Ok(PrepareNode { name: name__.unwrap_or_default(), data_types: data_types__.unwrap_or_default(), input: input__, + fields: fields__.unwrap_or_default(), }) } } diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 51ce9b4f8565..101b18f5ea5d 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -282,6 +282,8 @@ pub struct PrepareNode { pub data_types: ::prost::alloc::vec::Vec, #[prost(message, optional, boxed, tag = "3")] pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag = "4")] + pub fields: ::prost::alloc::vec::Vec, } #[derive(Clone, PartialEq, ::prost::Message)] pub struct CreateCatalogSchemaNode { diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs index fd9e07914b07..55e778b53155 100644 --- a/datafusion/proto/src/logical_plan/mod.rs +++ b/datafusion/proto/src/logical_plan/mod.rs @@ -33,7 +33,7 @@ use crate::{ }; use crate::protobuf::{proto_error, ToProtoError}; -use arrow::datatypes::{DataType, Schema, SchemaBuilder, SchemaRef}; +use arrow::datatypes::{DataType, Field, Schema, SchemaBuilder, SchemaRef}; use datafusion::datasource::cte_worktable::CteWorkTable; use datafusion::datasource::file_format::arrow::ArrowFormat; #[cfg(feature = "avro")] @@ -888,9 +888,30 @@ impl AsLogicalPlan for LogicalPlanNode { .iter() .map(DataType::try_from) .collect::>()?; - LogicalPlanBuilder::from(input) - .prepare(prepare.name.clone(), data_types)? - .build() + let fields: Vec = prepare + .fields + .iter() + .map(Field::try_from) + .collect::>()?; + + if fields.is_empty() { + LogicalPlanBuilder::from(input) + .prepare( + prepare.name.clone(), + data_types + .into_iter() + .map(|dt| Field::new("", dt, true).into()) + .collect(), + )? + .build() + } else { + LogicalPlanBuilder::from(input) + .prepare( + prepare.name.clone(), + fields.into_iter().map(|f| f.into()).collect(), + )? + .build() + } } LogicalPlanType::DropView(dropview) => { Ok(LogicalPlan::Ddl(DdlStatement::DropView(DropView { @@ -1632,9 +1653,13 @@ impl AsLogicalPlan for LogicalPlanNode { name: name.clone(), data_types: data_types .iter() - .map(|t| t.try_into()) + .map(|t| t.data_type().try_into()) .collect::, _>>()?, input: Some(Box::new(input)), + fields: data_types + .iter() + .map(|f| f.as_ref().try_into()) + .collect::, _>>()?, }, ))), }) From 0091bbbbd3b32f3ce58b07f1e0ce92ac7e80ede5 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 9 Oct 2025 20:19:31 -0500 Subject: [PATCH 08/46] fix doc --- datafusion/common/src/metadata.rs | 6 +++--- datafusion/expr/src/expr.rs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/datafusion/common/src/metadata.rs b/datafusion/common/src/metadata.rs index 6f6f4e6adbe2..7026b49cb6fe 100644 --- a/datafusion/common/src/metadata.rs +++ b/datafusion/common/src/metadata.rs @@ -31,7 +31,7 @@ use hashbrown::HashMap; /// # Example: Create [`FieldMetadata`] from a [`Field`] /// ``` /// # use std::collections::HashMap; -/// # use datafusion_expr::expr::FieldMetadata; +/// # use datafusion_common::metadata::FieldMetadata; /// # use arrow::datatypes::{Field, DataType}; /// # let field = Field::new("c1", DataType::Int32, true) /// # .with_metadata(HashMap::from([("foo".to_string(), "bar".to_string())])); @@ -43,7 +43,7 @@ use hashbrown::HashMap; /// /// # Example: Update a [`Field`] with [`FieldMetadata`] /// ``` -/// # use datafusion_expr::expr::FieldMetadata; +/// # use datafusion_common::metadata::FieldMetadata; /// # use arrow::datatypes::{Field, DataType}; /// # let field = Field::new("c1", DataType::Int32, true); /// # let metadata = FieldMetadata::new_from_field(&field); @@ -96,7 +96,7 @@ impl FieldMetadata { /// /// # Example usage /// ```rust - /// use datafusion_expr::expr::FieldMetadata; + /// use datafusion_common::metadata::FieldMetadata; /// use std::collections::BTreeMap; /// /// // Create metadata for a literal expression diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 9ee058e1a08b..2bcdee2cc419 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1623,7 +1623,7 @@ impl Expr { /// ``` /// # use datafusion_expr::col; /// # use std::collections::HashMap; - /// # use datafusion_expr::expr::FieldMetadata; + /// # use datafusion_common::metadata::FieldMetadata; /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]); /// let metadata = FieldMetadata::from(metadata); /// let expr = col("foo").alias_with_metadata("bar", Some(metadata)); @@ -1655,7 +1655,7 @@ impl Expr { /// ``` /// # use datafusion_expr::col; /// # use std::collections::HashMap; - /// # use datafusion_expr::expr::FieldMetadata; + /// # use datafusion_common::metadata::FieldMetadata; /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]); /// let metadata = FieldMetadata::from(metadata); /// let expr = col("foo").alias_qualified_with_metadata(Some("tbl"), "bar", Some(metadata)); From 4f018007b1ccd5ac2a3f4619ba063c7d50d04240 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 9 Oct 2025 20:33:43 -0500 Subject: [PATCH 09/46] fix the name of the list --- datafusion/sql/src/planner.rs | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index a233e0f1fdb1..108e5ad52536 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -602,7 +602,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_sql_type)) => { // Arrays may be multi-dimensional. let inner_data_type = self.convert_data_type(inner_sql_type)?; - Ok(Field::new("", DataType::List(inner_data_type), true).into()) + Ok(Field::new( + "", + DataType::List( + inner_data_type.as_ref().clone().with_name("item").into(), + ), + true, + ) + .into()) } SQLDataType::Array(ArrayElemTypeDef::SquareBracket( inner_sql_type, @@ -612,12 +619,22 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { if let Some(array_size) = maybe_array_size { Ok(Field::new( "", - DataType::FixedSizeList(inner_data_type, *array_size as i32), + DataType::FixedSizeList( + inner_data_type.as_ref().clone().with_name("item").into(), + *array_size as i32, + ), true, ) .into()) } else { - Ok(Field::new("", DataType::List(inner_data_type), true).into()) + Ok(Field::new( + "", + DataType::List( + inner_data_type.as_ref().clone().with_name("item").into(), + ), + true, + ) + .into()) } } SQLDataType::Array(ArrayElemTypeDef::None) => { From 9b3a45a3a71cc29a6ebf34c680e57a3071cccbe4 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 9 Oct 2025 20:37:01 -0500 Subject: [PATCH 10/46] undo trailing whitespace change --- datafusion/expr/src/logical_plan/builder.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index e291349c7a66..a85de130e948 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -2709,12 +2709,12 @@ mod tests { assert_snapshot!(plan, @r" Union - Cross Join: + Cross Join: SubqueryAlias: left Values: (Int32(1)) SubqueryAlias: right Values: (Int32(1)) - Cross Join: + Cross Join: SubqueryAlias: left Values: (Int32(1)) SubqueryAlias: right From 901a01146c4696ce1a7ca37dde50b248beda04f4 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 9 Oct 2025 21:12:26 -0500 Subject: [PATCH 11/46] maybe fix failing tests --- datafusion/sql/tests/cases/params.rs | 50 +++++++++---------- .../sqllogictest/test_files/prepare.slt | 2 +- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/datafusion/sql/tests/cases/params.rs b/datafusion/sql/tests/cases/params.rs index 343a90af3efb..555bc51a7bdc 100644 --- a/datafusion/sql/tests/cases/params.rs +++ b/datafusion/sql/tests/cases/params.rs @@ -155,13 +155,13 @@ fn test_prepare_statement_to_plan_no_param() { assert_snapshot!( plan, @r#" - Prepare: "my_plan" [Int32] + Prepare: "my_plan" [Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] Projection: person.id, person.age Filter: person.age = Int64(10) TableScan: person "# ); - assert_snapshot!(dt, @r#"Int32"#); + assert_snapshot!(dt, @r#"Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#); /////////////////// // replace params with values @@ -235,7 +235,7 @@ fn test_prepare_statement_to_plan_one_param_one_value_different_type_panic() { .unwrap_err() .strip_backtrace(), @r###" - Error during planning: Expected parameter of type Int32, got Float64 at index 0 + Error during planning: Expected parameter of type Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, got Float64 at index 0 "### ); } @@ -265,12 +265,12 @@ fn test_prepare_statement_to_plan_params_as_constants() { assert_snapshot!( plan, @r#" - Prepare: "my_plan" [Int32] + Prepare: "my_plan" [Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] Projection: $1 EmptyRelation: rows=1 "# ); - assert_snapshot!(dt, @r#"Int32"#); + assert_snapshot!(dt, @r#"Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#); /////////////////// // replace params with values @@ -290,12 +290,12 @@ fn test_prepare_statement_to_plan_params_as_constants() { assert_snapshot!( plan, @r#" - Prepare: "my_plan" [Int32] + Prepare: "my_plan" [Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] Projection: Int64(1) + $1 EmptyRelation: rows=1 "# ); - assert_snapshot!(dt, @r#"Int32"#); + assert_snapshot!(dt, @r#"Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#); /////////////////// // replace params with values @@ -315,12 +315,12 @@ fn test_prepare_statement_to_plan_params_as_constants() { assert_snapshot!( plan, @r#" - Prepare: "my_plan" [Int32, Float64] + Prepare: "my_plan" [Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] Projection: Int64(1) + $1 + $2 EmptyRelation: rows=1 "# ); - assert_snapshot!(dt, @r#"Int32, Float64"#); + assert_snapshot!(dt, @r#"Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#); /////////////////// // replace params with values @@ -376,7 +376,7 @@ fn test_prepare_statement_infer_types_from_join() { test.run(), @r#" ** Initial Plan: - Prepare: "my_plan" [Int32] + Prepare: "my_plan" [Field { name: "age", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }] Projection: person.id, orders.order_id Inner Join: Filter: person.id = orders.customer_id AND person.age = $1 TableScan: person @@ -424,7 +424,7 @@ fn test_prepare_statement_infer_types_from_predicate() { test.run(), @r#" ** Initial Plan: - Prepare: "my_plan" [Int32] + Prepare: "my_plan" [Field { name: "age", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }] Projection: person.id, person.age Filter: person.age = $1 TableScan: person @@ -476,7 +476,7 @@ fn test_prepare_statement_infer_types_from_between_predicate() { test.run(), @r#" ** Initial Plan: - Prepare: "my_plan" [Int32, Int32] + Prepare: "my_plan" [Field { name: "age", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "age", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }] Projection: person.id, person.age Filter: person.age BETWEEN $1 AND $2 TableScan: person @@ -533,7 +533,7 @@ fn test_prepare_statement_infer_types_subquery() { test.run(), @r#" ** Initial Plan: - Prepare: "my_plan" [UInt32] + Prepare: "my_plan" [Field { name: "id", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }] Projection: person.id, person.age Filter: person.age = () Subquery: @@ -598,7 +598,7 @@ fn test_prepare_statement_update_infer() { test.run(), @r#" ** Initial Plan: - Prepare: "my_plan" [Int32, UInt32] + Prepare: "my_plan" [Field { name: "age", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "id", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }] Dml: op=[Update] table=[person] Projection: person.id AS id, person.first_name AS first_name, person.last_name AS last_name, $1 AS age, person.state AS state, person.salary AS salary, person.birth_date AS birth_date, person.😀 AS 😀 Filter: person.id = $2 @@ -662,7 +662,7 @@ fn test_prepare_statement_insert_infer() { test.run(), @r#" ** Initial Plan: - Prepare: "my_plan" [UInt32, Utf8, Utf8] + Prepare: "my_plan" [Field { name: "id", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "first_name", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "last_name", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }] Dml: op=[Insert Into] table=[person] Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀 Values: ($1, $2, $3) @@ -681,13 +681,13 @@ fn test_prepare_statement_to_plan_one_param() { assert_snapshot!( plan, @r#" - Prepare: "my_plan" [Int32] + Prepare: "my_plan" [Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] Projection: person.id, person.age Filter: person.age = $1 TableScan: person "# ); - assert_snapshot!(dt, @r#"Int32"#); + assert_snapshot!(dt, @r#"Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#); /////////////////// // replace params with values @@ -714,13 +714,13 @@ fn test_prepare_statement_to_plan_data_type() { // age is defined as Int32 but prepare statement declares it as DOUBLE/Float64 // Prepare statement and its logical plan should be created successfully @r#" - Prepare: "my_plan" [Float64] + Prepare: "my_plan" [Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] Projection: person.id, person.age Filter: person.age = $1 TableScan: person "# ); - assert_snapshot!(dt, @r#"Float64"#); + assert_snapshot!(dt, @r#"Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#); /////////////////// // replace params with values still succeed and use Float64 @@ -747,13 +747,13 @@ fn test_prepare_statement_to_plan_multi_params() { assert_snapshot!( plan, @r#" - Prepare: "my_plan" [Int32, Utf8View, Float64, Int32, Float64, Utf8View] + Prepare: "my_plan" [Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] Projection: person.id, person.age, $6 Filter: person.age IN ([$1, $4]) AND person.salary > $3 AND person.salary < $5 OR person.first_name < $2 TableScan: person "# ); - assert_snapshot!(dt, @r#"Int32, Utf8View, Float64, Int32, Float64, Utf8View"#); + assert_snapshot!(dt, @r#"Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#); /////////////////// // replace params with values @@ -790,7 +790,7 @@ fn test_prepare_statement_to_plan_having() { assert_snapshot!( plan, @r#" - Prepare: "my_plan" [Int32, Float64, Float64, Float64] + Prepare: "my_plan" [Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] Projection: person.id, sum(person.age) Filter: sum(person.age) < $1 AND sum(person.age) > Int64(10) OR sum(person.age) IN ([$3, $4]) Aggregate: groupBy=[[person.id]], aggr=[[sum(person.age)]] @@ -798,7 +798,7 @@ fn test_prepare_statement_to_plan_having() { TableScan: person "# ); - assert_snapshot!(dt, @r#"Int32, Float64, Float64, Float64"#); + assert_snapshot!(dt, @r#"Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#); /////////////////// // replace params with values @@ -831,13 +831,13 @@ fn test_prepare_statement_to_plan_limit() { assert_snapshot!( plan, @r#" - Prepare: "my_plan" [Int64, Int64] + Prepare: "my_plan" [Field { name: "", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] Limit: skip=$1, fetch=$2 Projection: person.id TableScan: person "# ); - assert_snapshot!(dt, @r#"Int64, Int64"#); + assert_snapshot!(dt, @r#"Field { name: "", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#); // replace params with values let param_values = vec![ScalarValue::Int64(Some(10)), ScalarValue::Int64(Some(200))]; diff --git a/datafusion/sqllogictest/test_files/prepare.slt b/datafusion/sqllogictest/test_files/prepare.slt index d61603ae6558..084e717dc2d7 100644 --- a/datafusion/sqllogictest/test_files/prepare.slt +++ b/datafusion/sqllogictest/test_files/prepare.slt @@ -289,7 +289,7 @@ query TT EXPLAIN PREPARE my_plan(INT) AS SELECT id + $1 FROM person; ---- logical_plan -01)Prepare: "my_plan" [Int32] +01)Prepare: "my_plan" [Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] 02)--Projection: person.id + $1 03)----TableScan: person projection=[id] From 21af946e6b1aa413132a5073916c2161110e351c Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 14 Oct 2025 13:32:33 -0500 Subject: [PATCH 12/46] possibly reusable type equality --- datafusion/common/src/metadata.rs | 67 +++++++++++++++++++++++- datafusion/common/src/param_value.rs | 50 +++++++++--------- datafusion/expr/src/logical_plan/plan.rs | 2 +- datafusion/sql/tests/cases/params.rs | 2 +- 4 files changed, 94 insertions(+), 27 deletions(-) diff --git a/datafusion/common/src/metadata.rs b/datafusion/common/src/metadata.rs index 7026b49cb6fe..d1a99642357e 100644 --- a/datafusion/common/src/metadata.rs +++ b/datafusion/common/src/metadata.rs @@ -17,9 +17,74 @@ use std::{collections::BTreeMap, sync::Arc}; -use arrow::datatypes::Field; +use arrow::datatypes::{DataType, Field}; use hashbrown::HashMap; +use crate::{error::_plan_err, DataFusionError}; + +/// Assert equality of data types where one or both sides may have field metadata +/// +/// This currently compares absent metadata (e.g., one side was a DataType) and +/// empty metadata (e.g., one side was a field where the field had no metadata) +/// as equal and uses byte-for-byte comparison for the keys and values of the +/// fields, even though this is potentially too strict for some cases (e.g., +/// extension types where extension metadata is represented by JSON, or cases +/// where field metadata is orthogonal to the interpretation of the data type). +/// +/// Returns a planning error with suitably formatted type representations if +/// actual and expected do not compare to equal. +pub fn check_metadata_with_storage_equal( + actual: ( + &DataType, + Option<&std::collections::HashMap>, + ), + expected: ( + &DataType, + Option<&std::collections::HashMap>, + ), + what: &str, + context: &str, +) -> Result<(), DataFusionError> { + if actual.0 != expected.0 { + return _plan_err!( + "Expected {what} of type {}, got {}{context}", + format_type_and_metadata(expected.0, expected.1), + format_type_and_metadata(actual.0, actual.1) + ); + } + + let metdata_equal = match (actual.1, expected.1) { + (None, None) => true, + (None, Some(expected_metadata)) => expected_metadata.is_empty(), + (Some(actual_metadata), None) => actual_metadata.is_empty(), + (Some(actual_metadata), Some(expected_metadata)) => { + actual_metadata == expected_metadata + } + }; + + if !metdata_equal { + return _plan_err!( + "Expected {what} of type {}, got {}{context}", + format_type_and_metadata(expected.0, expected.1), + format_type_and_metadata(actual.0, actual.1) + ); + } + + Ok(()) +} + +fn format_type_and_metadata( + data_type: &DataType, + metadata: Option<&std::collections::HashMap>, +) -> String { + match metadata { + Some(metadata) if !metadata.is_empty() => { + format!("{data_type}<{metadata:?}>") + } + _ => data_type.to_string(), + } +} + /// Literal metadata /// /// Stores metadata associated with a literal expressions diff --git a/datafusion/common/src/param_value.rs b/datafusion/common/src/param_value.rs index 8cfdff6d6e1d..bb132013e94b 100644 --- a/datafusion/common/src/param_value.rs +++ b/datafusion/common/src/param_value.rs @@ -16,9 +16,9 @@ // under the License. use crate::error::{_plan_datafusion_err, _plan_err}; -use crate::metadata::FieldMetadata; +use crate::metadata::{check_metadata_with_storage_equal, FieldMetadata}; use crate::{Result, ScalarValue}; -use arrow::datatypes::FieldRef; +use arrow::datatypes::{DataType, Field, FieldRef}; use std::collections::HashMap; /// The parameter value corresponding to the placeholder @@ -31,8 +31,22 @@ pub enum ParamValues { } impl ParamValues { + /// Verify parameter list length and DataType + /// + /// Use [`ParamValues::verify_fields`] to ensure field metadata is considered when + /// computing type equality. + #[deprecated] + pub fn verify(&self, expect: &[DataType]) -> Result<()> { + // make dummy Fields + let expect = expect + .iter() + .map(|dt| Field::new("", dt.clone(), true).into()) + .collect::>(); + self.verify_fields(&expect) + } + /// Verify parameter list length and type - pub fn verify(&self, expect: &[FieldRef]) -> Result<()> { + pub fn verify_fields(&self, expect: &[FieldRef]) -> Result<()> { match self { ParamValues::List(list) => { // Verify if the number of params matches the number of values @@ -47,27 +61,15 @@ impl ParamValues { // Verify if the types of the params matches the types of the values let iter = expect.iter().zip(list.iter()); for (i, (param_type, (value, maybe_metadata))) in iter.enumerate() { - if *param_type.data_type() != value.data_type() { - return _plan_err!( - "Expected parameter of type {}, got {:?} at index {}", - param_type, - value.data_type(), - i - ); - } - - if let Some(expected_metadata) = maybe_metadata { - // Probably too strict of a comparison (this is an example of where - // the concept of type equality would be useful) - if &expected_metadata.to_hashmap() != param_type.metadata() { - return _plan_err!( - "Expected parameter with metadata {:?}, got {:?} at index {}", - expected_metadata, - param_type.metadata(), - i - ); - } - } + check_metadata_with_storage_equal( + ( + &value.data_type(), + maybe_metadata.as_ref().map(|m| m.to_hashmap()).as_ref(), + ), + (param_type.data_type(), Some(param_type.metadata())), + "parameter", + &format!(" at index {i}"), + )?; } Ok(()) } diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 8369f0325d97..cfa77ce1063f 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -1282,7 +1282,7 @@ impl LogicalPlan { if let LogicalPlan::Statement(Statement::Prepare(prepare_lp)) = plan_with_values { - param_values.verify(&prepare_lp.data_types)?; + param_values.verify_fields(&prepare_lp.data_types)?; // try and take ownership of the input if is not shared, clone otherwise Arc::unwrap_or_clone(prepare_lp.input) } else { diff --git a/datafusion/sql/tests/cases/params.rs b/datafusion/sql/tests/cases/params.rs index 555bc51a7bdc..7c7b93d1aa5f 100644 --- a/datafusion/sql/tests/cases/params.rs +++ b/datafusion/sql/tests/cases/params.rs @@ -235,7 +235,7 @@ fn test_prepare_statement_to_plan_one_param_one_value_different_type_panic() { .unwrap_err() .strip_backtrace(), @r###" - Error during planning: Expected parameter of type Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, got Float64 at index 0 + Error during planning: Expected parameter of type Int32, got Float64 at index 0 "### ); } From b7404c0304b74eb53d3db2554e63425baee15ee8 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 14 Oct 2025 14:05:39 -0500 Subject: [PATCH 13/46] literal class --- datafusion/common/src/metadata.rs | 40 +++++++++++++++++++- datafusion/common/src/param_value.rs | 27 ++++++------- datafusion/core/src/execution/context/mod.rs | 10 ++--- datafusion/expr/src/logical_plan/plan.rs | 6 ++- 4 files changed, 60 insertions(+), 23 deletions(-) diff --git a/datafusion/common/src/metadata.rs b/datafusion/common/src/metadata.rs index d1a99642357e..91fbecc29d1b 100644 --- a/datafusion/common/src/metadata.rs +++ b/datafusion/common/src/metadata.rs @@ -20,7 +20,45 @@ use std::{collections::BTreeMap, sync::Arc}; use arrow::datatypes::{DataType, Field}; use hashbrown::HashMap; -use crate::{error::_plan_err, DataFusionError}; +use crate::{error::_plan_err, DataFusionError, ScalarValue}; + +/// A [`ScalarValue`] with optional [`FieldMetadata`] +#[derive(Debug, Clone)] +pub struct Literal { + pub value: ScalarValue, + pub metadata: Option, +} + +impl Literal { + /// Create a new Literal from a scalar value with optional [`FieldMetadata`] + pub fn new(value: ScalarValue, metadata: Option) -> Self { + Self { value, metadata } + } + + /// Access the underlying [ScalarValue] storage + pub fn value(&self) -> &ScalarValue { + &self.value + } + + /// Access the [FieldMetadata] attached to this value, if any + pub fn metadata(&self) -> Option<&FieldMetadata> { + self.metadata.as_ref() + } + + /// Consume self and return components + pub fn into_inner(self) -> (ScalarValue, Option) { + (self.value, self.metadata) + } + + /// Cast this literal's storage type + pub fn cast_storage_to( + &self, + target_type: &DataType, + ) -> Result { + let new_value = self.value().cast_to(target_type)?; + Ok(Literal::new(new_value, self.metadata.clone())) + } +} /// Assert equality of data types where one or both sides may have field metadata /// diff --git a/datafusion/common/src/param_value.rs b/datafusion/common/src/param_value.rs index bb132013e94b..5b831c9f3047 100644 --- a/datafusion/common/src/param_value.rs +++ b/datafusion/common/src/param_value.rs @@ -16,7 +16,7 @@ // under the License. use crate::error::{_plan_datafusion_err, _plan_err}; -use crate::metadata::{check_metadata_with_storage_equal, FieldMetadata}; +use crate::metadata::{check_metadata_with_storage_equal, Literal}; use crate::{Result, ScalarValue}; use arrow::datatypes::{DataType, Field, FieldRef}; use std::collections::HashMap; @@ -25,9 +25,9 @@ use std::collections::HashMap; #[derive(Debug, Clone)] pub enum ParamValues { /// For positional query parameters, like `SELECT * FROM test WHERE a > $1 AND b = $2` - List(Vec<(ScalarValue, Option)>), + List(Vec), /// For named query parameters, like `SELECT * FROM test WHERE a > $foo AND b = $goo` - Map(HashMap)>), + Map(HashMap), } impl ParamValues { @@ -60,11 +60,11 @@ impl ParamValues { // Verify if the types of the params matches the types of the values let iter = expect.iter().zip(list.iter()); - for (i, (param_type, (value, maybe_metadata))) in iter.enumerate() { + for (i, (param_type, lit)) in iter.enumerate() { check_metadata_with_storage_equal( ( - &value.data_type(), - maybe_metadata.as_ref().map(|m| m.to_hashmap()).as_ref(), + &lit.value.data_type(), + lit.metadata.as_ref().map(|m| m.to_hashmap()).as_ref(), ), (param_type.data_type(), Some(param_type.metadata())), "parameter", @@ -81,10 +81,7 @@ impl ParamValues { } } - pub fn get_placeholders_with_values( - &self, - id: &str, - ) -> Result<(ScalarValue, Option)> { + pub fn get_placeholders_with_values(&self, id: &str) -> Result { match self { ParamValues::List(list) => { if id.is_empty() { @@ -118,7 +115,7 @@ impl ParamValues { impl From> for ParamValues { fn from(value: Vec) -> Self { - Self::List(value.into_iter().map(|v| (v, None)).collect()) + Self::List(value.into_iter().map(|v| Literal::new(v, None)).collect()) } } @@ -127,9 +124,9 @@ where K: Into, { fn from(value: Vec<(K, ScalarValue)>) -> Self { - let value: HashMap)> = value + let value: HashMap = value .into_iter() - .map(|(k, v)| (k.into(), (v, None))) + .map(|(k, v)| (k.into(), Literal::new(v, None))) .collect(); Self::Map(value) } @@ -140,9 +137,9 @@ where K: Into, { fn from(value: HashMap) -> Self { - let value: HashMap)> = value + let value: HashMap = value .into_iter() - .map(|(k, v)| (k.into(), (v, None))) + .map(|(k, v)| (k.into(), Literal::new(v, None))) .collect(); Self::Map(value) } diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index f04b9a9ce0bb..5b16d9002786 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -64,13 +64,13 @@ use datafusion_catalog::{ DynamicFileCatalog, TableFunction, TableFunctionImpl, UrlTableFactory, }; use datafusion_common::config::ConfigOptions; -use datafusion_common::metadata::FieldMetadata; +use datafusion_common::metadata::Literal; use datafusion_common::{ config::{ConfigExtension, TableOptions}, exec_datafusion_err, exec_err, internal_datafusion_err, not_impl_err, plan_datafusion_err, plan_err, tree_node::{TreeNodeRecursion, TreeNodeVisitor}, - DFSchema, DataFusionError, ParamValues, ScalarValue, SchemaReference, TableReference, + DFSchema, DataFusionError, ParamValues, SchemaReference, TableReference, }; pub use datafusion_execution::config::SessionConfig; use datafusion_execution::registry::SerializerRegistry; @@ -1239,10 +1239,10 @@ impl SessionContext { })?; // Only allow literals as parameters for now. - let mut params: Vec<(ScalarValue, Option)> = parameters + let mut params: Vec = parameters .into_iter() .map(|e| match e { - Expr::Literal(scalar, metadata) => Ok((scalar, metadata)), + Expr::Literal(scalar, metadata) => Ok(Literal::new(scalar, metadata)), _ => not_impl_err!("Unsupported parameter type: {}", e), }) .collect::>()?; @@ -1263,7 +1263,7 @@ impl SessionContext { .map(|(e, dt)| -> Result<_> { // This is fishy...we're casting storage without checking if an // extension type supports the destination - Ok((e.0.cast_to(dt.data_type())?, e.1)) + e.cast_storage_to(dt.data_type()) }) .collect::>()?; } diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index cfa77ce1063f..9e7af9520ce7 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -1463,8 +1463,10 @@ impl LogicalPlan { let original_name = name_preserver.save(&e); let transformed_expr = e.transform_up(|e| { if let Expr::Placeholder(Placeholder { id, .. }) = e { - let value = param_values.get_placeholders_with_values(&id)?; - Ok(Transformed::yes(Expr::Literal(value.0, value.1))) + let (value, metadata) = param_values + .get_placeholders_with_values(&id)? + .into_inner(); + Ok(Transformed::yes(Expr::Literal(value, metadata))) } else { Ok(Transformed::no(e)) } From 94cb418f301b29dd75dbae7bb0fa8da87895942f Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 14 Oct 2025 14:09:39 -0500 Subject: [PATCH 14/46] doc --- datafusion/common/src/metadata.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/datafusion/common/src/metadata.rs b/datafusion/common/src/metadata.rs index 91fbecc29d1b..d7b015c7d751 100644 --- a/datafusion/common/src/metadata.rs +++ b/datafusion/common/src/metadata.rs @@ -51,6 +51,10 @@ impl Literal { } /// Cast this literal's storage type + /// + /// This operation assumes that if the underlying [ScalarValue] can be casted + /// to a given type that any extension type represented by the metadata is also + /// valid. pub fn cast_storage_to( &self, target_type: &DataType, From b982d4cd25030d0a73c402da553f08f0cc9d25c1 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 14 Oct 2025 14:47:06 -0500 Subject: [PATCH 15/46] one more pass on documenting things --- datafusion/common/src/metadata.rs | 4 ++-- datafusion/proto/src/logical_plan/from_proto.rs | 1 - datafusion/proto/src/logical_plan/mod.rs | 9 +++++++-- datafusion/sql/src/expr/mod.rs | 2 ++ datafusion/sql/src/planner.rs | 7 +++++++ 5 files changed, 18 insertions(+), 5 deletions(-) diff --git a/datafusion/common/src/metadata.rs b/datafusion/common/src/metadata.rs index d7b015c7d751..1850a9441a88 100644 --- a/datafusion/common/src/metadata.rs +++ b/datafusion/common/src/metadata.rs @@ -95,7 +95,7 @@ pub fn check_metadata_with_storage_equal( ); } - let metdata_equal = match (actual.1, expected.1) { + let metadata_equal = match (actual.1, expected.1) { (None, None) => true, (None, Some(expected_metadata)) => expected_metadata.is_empty(), (Some(actual_metadata), None) => actual_metadata.is_empty(), @@ -104,7 +104,7 @@ pub fn check_metadata_with_storage_equal( } }; - if !metdata_equal { + if !metadata_equal { return _plan_err!( "Expected {what} of type {}, got {}{context}", format_type_and_metadata(expected.0, expected.1), diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index ae395afaeb23..57831b8dd612 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -604,7 +604,6 @@ pub fn parse_expr( }) => match data_type { None => Ok(Expr::Placeholder(Placeholder::new(id.clone(), None))), Some(data_type) => { - // Foofy let field = Field::new("", data_type.try_into()?, nullable.unwrap_or(true)) .with_metadata(metadata.clone()); diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs index 55e778b53155..5af3a0791e07 100644 --- a/datafusion/proto/src/logical_plan/mod.rs +++ b/datafusion/proto/src/logical_plan/mod.rs @@ -894,6 +894,9 @@ impl AsLogicalPlan for LogicalPlanNode { .map(Field::try_from) .collect::>()?; + // If the fields are empty this may have been generated by an + // earlier version of DataFusion, in which case the DataTypes + // can be used to construct the plan. if fields.is_empty() { LogicalPlanBuilder::from(input) .prepare( @@ -1651,11 +1654,13 @@ impl AsLogicalPlan for LogicalPlanNode { logical_plan_type: Some(LogicalPlanType::Prepare(Box::new( protobuf::PrepareNode { name: name.clone(), + input: Some(Box::new(input)), + // Store the DataTypes for reading by older DataFusion data_types: data_types .iter() - .map(|t| t.data_type().try_into()) + .map(|f| f.data_type().try_into()) .collect::, _>>()?, - input: Some(Box::new(input)), + // Store the Fields for current and future DataFusion fields: data_types .iter() .map(|f| f.as_ref().try_into()) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index d74415faccf4..5cff1e349b18 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -986,6 +986,8 @@ impl SqlToRel<'_, S> { _ => expr, }; + // Currently drops metadata attached to the type + // https://github.com/apache/datafusion/issues/18060 Ok(Expr::Cast(Cast::new( Box::new(expr), dt.data_type().clone(), diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 108e5ad52536..05bb1121131b 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -602,6 +602,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_sql_type)) => { // Arrays may be multi-dimensional. let inner_data_type = self.convert_data_type(inner_sql_type)?; + + // Lists are allowed to have an arbitrarily named field; + // however, a name other than 'item' will cause it to fail an + // == check against a more idiomatically created list in + // arrow-rs which causes issues. We use the low-level + // constructor here to preserve extension metadata from the + // child type. Ok(Field::new( "", DataType::List( From 4f4820b84338d8b5ab736d877c414de9e7903d6c Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 14 Oct 2025 14:59:07 -0500 Subject: [PATCH 16/46] use the equality checker in one more place --- datafusion/expr/src/logical_plan/plan.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 9e7af9520ce7..55b2f19a76b8 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -54,6 +54,7 @@ use crate::{ use arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaRef}; use datafusion_common::cse::{NormalizeEq, Normalizeable}; use datafusion_common::format::ExplainFormat; +use datafusion_common::metadata::check_metadata_with_storage_equal; use datafusion_common::tree_node::{ Transformed, TreeNode, TreeNodeContainer, TreeNodeRecursion, }; @@ -1499,6 +1500,7 @@ impl LogicalPlan { /// /// Note that this will drop any extension or field metadata attached to parameters. Use /// [`LogicalPlan::get_parameter_fields`] to keep extension metadata. + #[deprecated] pub fn get_parameter_types( &self, ) -> Result>, DataFusionError> { @@ -1524,12 +1526,12 @@ impl LogicalPlan { let prev = param_types.get(id); match (prev, field) { (Some(Some(prev)), Some(field)) => { - // This check is possibly too strict (requires nullability and field - // metadata align perfectly, rather than compute true type equality - // when field metadata is representing an extension type) - if prev != field { - plan_err!("Conflicting types for {id}")?; - } + check_metadata_with_storage_equal( + (field.data_type(), Some(field.metadata())), + (prev.data_type(), Some(prev.metadata())), + "parameter", + &format!(": Conflicting types for id {id}"), + )?; } (_, Some(field)) => { param_types.insert(id.clone(), Some(Arc::clone(field))); @@ -5163,7 +5165,7 @@ mod tests { .unwrap(); // Check that the placeholder parameters have not received a DataType. - let params = plan.get_parameter_types().unwrap(); + let params = plan.get_parameter_fields().unwrap(); assert_eq!(params.len(), 1); let parameter_type = params.clone().get(placeholder_value).unwrap().clone(); From 63ae6fbbeb2ae508868dda7cd2f6cb47b75dd229 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 14 Oct 2025 15:27:24 -0500 Subject: [PATCH 17/46] update data_types variables that were actually fields --- datafusion/common/src/metadata.rs | 8 +++++++- datafusion/core/src/execution/context/mod.rs | 18 +++++++++--------- .../core/src/execution/session_state.rs | 6 +++--- datafusion/expr/src/logical_plan/builder.rs | 4 ++-- datafusion/expr/src/logical_plan/plan.rs | 9 +++------ datafusion/expr/src/logical_plan/statement.rs | 19 ++++++++++++++----- datafusion/proto/src/logical_plan/mod.rs | 6 +++--- datafusion/sql/src/statement.rs | 12 ++++++------ datafusion/sql/tests/cases/params.rs | 4 ++-- 9 files changed, 49 insertions(+), 37 deletions(-) diff --git a/datafusion/common/src/metadata.rs b/datafusion/common/src/metadata.rs index 1850a9441a88..9520ed016f7c 100644 --- a/datafusion/common/src/metadata.rs +++ b/datafusion/common/src/metadata.rs @@ -115,7 +115,13 @@ pub fn check_metadata_with_storage_equal( Ok(()) } -fn format_type_and_metadata( +/// Given a data type represented by storage and optional metadata, generate +/// a user-facing string +/// +/// This function exists to reduce the number of Field debug strings that are +/// used to communicate type information in error messages and plan explain +/// renderings. +pub fn format_type_and_metadata( data_type: &DataType, metadata: Option<&std::collections::HashMap>, ) -> String { diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 5b16d9002786..c84c20f03b66 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -709,15 +709,15 @@ impl SessionContext { LogicalPlan::Statement(Statement::Prepare(Prepare { name, input, - data_types, + fields, })) => { // The number of parameters must match the specified data types length. - if !data_types.is_empty() { + if !fields.is_empty() { let param_names = input.get_parameter_names()?; - if param_names.len() != data_types.len() { + if param_names.len() != fields.len() { return plan_err!( "Prepare specifies {} data types but query has {} parameters", - data_types.len(), + fields.len(), param_names.len() ); } @@ -727,7 +727,7 @@ impl SessionContext { // not currently feasible. This is because `now()` would be optimized to a // constant value, causing each EXECUTE to yield the same result, which is // incorrect behavior. - self.state.write().store_prepared(name, data_types, input)?; + self.state.write().store_prepared(name, fields, input)?; self.return_empty_dataframe() } LogicalPlan::Statement(Statement::Execute(execute)) => { @@ -1248,18 +1248,18 @@ impl SessionContext { .collect::>()?; // If the prepared statement provides data types, cast the params to those types. - if !prepared.data_types.is_empty() { - if params.len() != prepared.data_types.len() { + if !prepared.fields.is_empty() { + if params.len() != prepared.fields.len() { return exec_err!( "Prepared statement '{}' expects {} parameters, but {} provided", name, - prepared.data_types.len(), + prepared.fields.len(), params.len() ); } params = params .into_iter() - .zip(prepared.data_types.iter()) + .zip(prepared.fields.iter()) .map(|(e, dt)| -> Result<_> { // This is fishy...we're casting storage without checking if an // extension type supports the destination diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index 4fe6b57130aa..44eeb2df3f46 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -872,12 +872,12 @@ impl SessionState { pub(crate) fn store_prepared( &mut self, name: String, - data_types: Vec, + fields: Vec, plan: Arc, ) -> datafusion_common::Result<()> { match self.prepared_plans.entry(name) { Entry::Vacant(e) => { - e.insert(Arc::new(PreparedPlan { data_types, plan })); + e.insert(Arc::new(PreparedPlan { fields, plan })); Ok(()) } Entry::Occupied(e) => { @@ -2011,7 +2011,7 @@ impl SimplifyInfo for SessionSimplifyProvider<'_> { #[derive(Debug)] pub(crate) struct PreparedPlan { /// Data types of the parameters - pub(crate) data_types: Vec, + pub(crate) fields: Vec, /// The prepared logical plan pub(crate) plan: Arc, } diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index a85de130e948..a430add3f786 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -623,11 +623,11 @@ impl LogicalPlanBuilder { } /// Make a builder for a prepare logical plan from the builder's plan - pub fn prepare(self, name: String, data_types: Vec) -> Result { + pub fn prepare(self, name: String, fields: Vec) -> Result { Ok(Self::new(LogicalPlan::Statement(Statement::Prepare( Prepare { name, - data_types, + fields, input: self.plan, }, )))) diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 55b2f19a76b8..3cde927d9c08 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -1099,15 +1099,13 @@ impl LogicalPlan { })) } LogicalPlan::Statement(Statement::Prepare(Prepare { - name, - data_types, - .. + name, fields, .. })) => { self.assert_no_expressions(expr)?; let input = self.only_input(inputs)?; Ok(LogicalPlan::Statement(Statement::Prepare(Prepare { name: name.clone(), - data_types: data_types.clone(), + fields: fields.clone(), input: Arc::new(input), }))) } @@ -1283,7 +1281,7 @@ impl LogicalPlan { if let LogicalPlan::Statement(Statement::Prepare(prepare_lp)) = plan_with_values { - param_values.verify_fields(&prepare_lp.data_types)?; + param_values.verify_fields(&prepare_lp.fields)?; // try and take ownership of the input if is not shared, clone otherwise Arc::unwrap_or_clone(prepare_lp.input) } else { @@ -1500,7 +1498,6 @@ impl LogicalPlan { /// /// Note that this will drop any extension or field metadata attached to parameters. Use /// [`LogicalPlan::get_parameter_fields`] to keep extension metadata. - #[deprecated] pub fn get_parameter_types( &self, ) -> Result>, DataFusionError> { diff --git a/datafusion/expr/src/logical_plan/statement.rs b/datafusion/expr/src/logical_plan/statement.rs index 6cff6b180423..bfc6b53d1136 100644 --- a/datafusion/expr/src/logical_plan/statement.rs +++ b/datafusion/expr/src/logical_plan/statement.rs @@ -16,6 +16,7 @@ // under the License. use arrow::datatypes::FieldRef; +use datafusion_common::metadata::format_type_and_metadata; use datafusion_common::{DFSchema, DFSchemaRef}; use itertools::Itertools as _; use std::fmt::{self, Display}; @@ -108,10 +109,18 @@ impl Statement { }) => { write!(f, "SetVariable: set {variable:?} to {value:?}") } - Statement::Prepare(Prepare { - name, data_types, .. - }) => { - write!(f, "Prepare: {name:?} [{}]", data_types.iter().join(", ")) + Statement::Prepare(Prepare { name, fields, .. }) => { + write!( + f, + "Prepare: {name:?} [{}]", + fields + .iter() + .map(|f| format_type_and_metadata( + f.data_type(), + Some(f.metadata()) + )) + .join(", ") + ) } Statement::Execute(Execute { name, parameters, .. @@ -192,7 +201,7 @@ pub struct Prepare { /// The name of the statement pub name: String, /// Data types of the parameters ([`Expr::Placeholder`]) - pub data_types: Vec, + pub fields: Vec, /// The logical plan of the statements pub input: Arc, } diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs index 5af3a0791e07..9f250c86dcf8 100644 --- a/datafusion/proto/src/logical_plan/mod.rs +++ b/datafusion/proto/src/logical_plan/mod.rs @@ -1645,7 +1645,7 @@ impl AsLogicalPlan for LogicalPlanNode { } LogicalPlan::Statement(Statement::Prepare(Prepare { name, - data_types, + fields, input, })) => { let input = @@ -1656,12 +1656,12 @@ impl AsLogicalPlan for LogicalPlanNode { name: name.clone(), input: Some(Box::new(input)), // Store the DataTypes for reading by older DataFusion - data_types: data_types + data_types: fields .iter() .map(|f| f.data_type().try_into()) .collect::, _>>()?, // Store the Fields for current and future DataFusion - fields: data_types + fields: fields .iter() .map(|f| f.as_ref().try_into()) .collect::, _>>()?, diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 87076950d91a..4338285d678b 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -730,14 +730,14 @@ impl SqlToRel<'_, S> { statement, } => { // Convert parser data types to DataFusion data types - let mut data_types: Vec = data_types + let mut fields: Vec = data_types .into_iter() .map(|t| self.convert_data_type(&t)) .collect::>()?; // Create planner context with parameters - let mut planner_context = PlannerContext::new() - .with_prepare_param_data_types(data_types.clone()); + let mut planner_context = + PlannerContext::new().with_prepare_param_data_types(fields.clone()); // Build logical plan for inner statement of the prepare statement let plan = self.sql_statement_to_plan_with_context_impl( @@ -745,7 +745,7 @@ impl SqlToRel<'_, S> { &mut planner_context, )?; - if data_types.is_empty() { + if fields.is_empty() { let map_types = plan.get_parameter_fields()?; let param_types: Vec<_> = (1..=map_types.len()) .filter_map(|i| { @@ -753,13 +753,13 @@ impl SqlToRel<'_, S> { map_types.get(&key).and_then(|opt| opt.clone()) }) .collect(); - data_types.extend(param_types.iter().cloned()); + fields.extend(param_types.iter().cloned()); planner_context.with_prepare_param_data_types(param_types); } Ok(LogicalPlan::Statement(PlanStatement::Prepare(Prepare { name: ident_to_string(&name), - data_types, + fields, input: Arc::new(plan), }))) } diff --git a/datafusion/sql/tests/cases/params.rs b/datafusion/sql/tests/cases/params.rs index 7c7b93d1aa5f..f60188b85185 100644 --- a/datafusion/sql/tests/cases/params.rs +++ b/datafusion/sql/tests/cases/params.rs @@ -54,8 +54,8 @@ impl ParameterTest<'_> { fn generate_prepare_stmt_and_data_types(sql: &str) -> (LogicalPlan, String) { let plan = logical_plan(sql).unwrap(); let data_types = match &plan { - LogicalPlan::Statement(Statement::Prepare(Prepare { data_types, .. })) => { - data_types.iter().join(", ").to_string() + LogicalPlan::Statement(Statement::Prepare(Prepare { fields, .. })) => { + fields.iter().map(|f| f.data_type()).join(", ").to_string() } _ => panic!("Expected a Prepare statement"), }; From a085cf18ea7119e628c167ad513c34ff7af00cef Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 14 Oct 2025 15:45:40 -0500 Subject: [PATCH 18/46] revert tests --- datafusion/sql/tests/cases/params.rs | 48 ++++++++++++++-------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/datafusion/sql/tests/cases/params.rs b/datafusion/sql/tests/cases/params.rs index f60188b85185..0a355b4aab75 100644 --- a/datafusion/sql/tests/cases/params.rs +++ b/datafusion/sql/tests/cases/params.rs @@ -155,13 +155,13 @@ fn test_prepare_statement_to_plan_no_param() { assert_snapshot!( plan, @r#" - Prepare: "my_plan" [Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] + Prepare: "my_plan" [Int32] Projection: person.id, person.age Filter: person.age = Int64(10) TableScan: person "# ); - assert_snapshot!(dt, @r#"Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#); + assert_snapshot!(dt, @r#"Int32"#); /////////////////// // replace params with values @@ -265,12 +265,12 @@ fn test_prepare_statement_to_plan_params_as_constants() { assert_snapshot!( plan, @r#" - Prepare: "my_plan" [Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] + Prepare: "my_plan" [Int32] Projection: $1 EmptyRelation: rows=1 "# ); - assert_snapshot!(dt, @r#"Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#); + assert_snapshot!(dt, @r#"Int32"#); /////////////////// // replace params with values @@ -290,12 +290,12 @@ fn test_prepare_statement_to_plan_params_as_constants() { assert_snapshot!( plan, @r#" - Prepare: "my_plan" [Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] + Prepare: "my_plan" [Int32] Projection: Int64(1) + $1 EmptyRelation: rows=1 "# ); - assert_snapshot!(dt, @r#"Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#); + assert_snapshot!(dt, @r#"Int32"#); /////////////////// // replace params with values @@ -315,12 +315,12 @@ fn test_prepare_statement_to_plan_params_as_constants() { assert_snapshot!( plan, @r#" - Prepare: "my_plan" [Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] + Prepare: "my_plan" [Int32, Float64] Projection: Int64(1) + $1 + $2 EmptyRelation: rows=1 "# ); - assert_snapshot!(dt, @r#"Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#); + assert_snapshot!(dt, @r#"Int32, Float64"#); /////////////////// // replace params with values @@ -376,7 +376,7 @@ fn test_prepare_statement_infer_types_from_join() { test.run(), @r#" ** Initial Plan: - Prepare: "my_plan" [Field { name: "age", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }] + Prepare: "my_plan" [Int32] Projection: person.id, orders.order_id Inner Join: Filter: person.id = orders.customer_id AND person.age = $1 TableScan: person @@ -424,7 +424,7 @@ fn test_prepare_statement_infer_types_from_predicate() { test.run(), @r#" ** Initial Plan: - Prepare: "my_plan" [Field { name: "age", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }] + Prepare: "my_plan" [Int32] Projection: person.id, person.age Filter: person.age = $1 TableScan: person @@ -476,7 +476,7 @@ fn test_prepare_statement_infer_types_from_between_predicate() { test.run(), @r#" ** Initial Plan: - Prepare: "my_plan" [Field { name: "age", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "age", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }] + Prepare: "my_plan" [Int32, Int32] Projection: person.id, person.age Filter: person.age BETWEEN $1 AND $2 TableScan: person @@ -533,7 +533,7 @@ fn test_prepare_statement_infer_types_subquery() { test.run(), @r#" ** Initial Plan: - Prepare: "my_plan" [Field { name: "id", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }] + Prepare: "my_plan" [UInt32] Projection: person.id, person.age Filter: person.age = () Subquery: @@ -598,7 +598,7 @@ fn test_prepare_statement_update_infer() { test.run(), @r#" ** Initial Plan: - Prepare: "my_plan" [Field { name: "age", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "id", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }] + Prepare: "my_plan" [Int32, UInt32] Dml: op=[Update] table=[person] Projection: person.id AS id, person.first_name AS first_name, person.last_name AS last_name, $1 AS age, person.state AS state, person.salary AS salary, person.birth_date AS birth_date, person.😀 AS 😀 Filter: person.id = $2 @@ -662,7 +662,7 @@ fn test_prepare_statement_insert_infer() { test.run(), @r#" ** Initial Plan: - Prepare: "my_plan" [Field { name: "id", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "first_name", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "last_name", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }] + Prepare: "my_plan" [UInt32, Utf8, Utf8] Dml: op=[Insert Into] table=[person] Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀 Values: ($1, $2, $3) @@ -681,13 +681,13 @@ fn test_prepare_statement_to_plan_one_param() { assert_snapshot!( plan, @r#" - Prepare: "my_plan" [Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] + Prepare: "my_plan" [Int32] Projection: person.id, person.age Filter: person.age = $1 TableScan: person "# ); - assert_snapshot!(dt, @r#"Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#); + assert_snapshot!(dt, @r#"Int32"#); /////////////////// // replace params with values @@ -714,13 +714,13 @@ fn test_prepare_statement_to_plan_data_type() { // age is defined as Int32 but prepare statement declares it as DOUBLE/Float64 // Prepare statement and its logical plan should be created successfully @r#" - Prepare: "my_plan" [Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] + Prepare: "my_plan" [Float64] Projection: person.id, person.age Filter: person.age = $1 TableScan: person "# ); - assert_snapshot!(dt, @r#"Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#); + assert_snapshot!(dt, @r#"Float64"#); /////////////////// // replace params with values still succeed and use Float64 @@ -747,13 +747,13 @@ fn test_prepare_statement_to_plan_multi_params() { assert_snapshot!( plan, @r#" - Prepare: "my_plan" [Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] + Prepare: "my_plan" [Int32, Utf8View, Float64, Int32, Float64, Utf8View] Projection: person.id, person.age, $6 Filter: person.age IN ([$1, $4]) AND person.salary > $3 AND person.salary < $5 OR person.first_name < $2 TableScan: person "# ); - assert_snapshot!(dt, @r#"Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#); + assert_snapshot!(dt, @r#"Int32, Utf8View, Float64, Int32, Float64, Utf8View"#); /////////////////// // replace params with values @@ -790,7 +790,7 @@ fn test_prepare_statement_to_plan_having() { assert_snapshot!( plan, @r#" - Prepare: "my_plan" [Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] + Prepare: "my_plan" [Int32, Float64, Float64, Float64] Projection: person.id, sum(person.age) Filter: sum(person.age) < $1 AND sum(person.age) > Int64(10) OR sum(person.age) IN ([$3, $4]) Aggregate: groupBy=[[person.id]], aggr=[[sum(person.age)]] @@ -798,7 +798,7 @@ fn test_prepare_statement_to_plan_having() { TableScan: person "# ); - assert_snapshot!(dt, @r#"Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#); + assert_snapshot!(dt, @r#"Int32, Float64, Float64, Float64"#); /////////////////// // replace params with values @@ -831,13 +831,13 @@ fn test_prepare_statement_to_plan_limit() { assert_snapshot!( plan, @r#" - Prepare: "my_plan" [Field { name: "", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] + Prepare: "my_plan" [Int64, Int64] Limit: skip=$1, fetch=$2 Projection: person.id TableScan: person "# ); - assert_snapshot!(dt, @r#"Field { name: "", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#); + assert_snapshot!(dt, @r#"Int64, Int64"#); // replace params with values let param_values = vec![ScalarValue::Int64(Some(10)), ScalarValue::Int64(Some(200))]; From 54759372d66e39cc795bc3234888507b02bf7952 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 14 Oct 2025 16:28:29 -0500 Subject: [PATCH 19/46] fix slt --- datafusion/sqllogictest/test_files/prepare.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/prepare.slt b/datafusion/sqllogictest/test_files/prepare.slt index 084e717dc2d7..d61603ae6558 100644 --- a/datafusion/sqllogictest/test_files/prepare.slt +++ b/datafusion/sqllogictest/test_files/prepare.slt @@ -289,7 +289,7 @@ query TT EXPLAIN PREPARE my_plan(INT) AS SELECT id + $1 FROM person; ---- logical_plan -01)Prepare: "my_plan" [Field { name: "", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }] +01)Prepare: "my_plan" [Int32] 02)--Projection: person.id + $1 03)----TableScan: person projection=[id] From 05431a2ce7b49ca1b8ef52fec2f616ca7a10a2e6 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 14 Oct 2025 17:14:28 -0500 Subject: [PATCH 20/46] add a test that fails --- datafusion/core/tests/sql/select.rs | 39 ++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs index 98c3e3ccee8a..0751f1f79bea 100644 --- a/datafusion/core/tests/sql/select.rs +++ b/datafusion/core/tests/sql/select.rs @@ -15,8 +15,10 @@ // specific language governing permissions and limitations // under the License. +use std::collections::HashMap; + use super::*; -use datafusion_common::ScalarValue; +use datafusion_common::{metadata::Literal, ParamValues, ScalarValue}; use insta::assert_snapshot; #[tokio::test] @@ -317,6 +319,41 @@ async fn test_named_parameter_not_bound() -> Result<()> { Ok(()) } +#[tokio::test] +async fn test_query_parameters_with_metadata() -> Result<()> { + let tmp_dir = TempDir::new()?; + let partition_count = 4; + let ctx = create_ctx_with_partition(&tmp_dir, partition_count).await?; + + let metadata0 = HashMap::from([( + "some_key".to_string(), + "some_value".to_string(), + )]); + let metadata1 = HashMap::from([( + "some_other_key".to_string(), + "some_other_value".to_string(), + )]); + + // sql to statement then to logical plan with parameters + let df = ctx.sql("SELECT $1, $2").await?; + + let df_with_params_replaced = df.with_param_values(ParamValues::List(vec![ + Literal::new(ScalarValue::UInt32(Some(3)), Some(metadata0.clone().into())), + Literal::new( + ScalarValue::Utf8(Some("bar_value".to_string())), + Some(metadata1.clone().into()), + ), + ]))?; + + let schema = df_with_params_replaced.schema(); + assert_eq!(schema.field(0).data_type(), &DataType::UInt32); + assert_eq!(schema.field(0).metadata(), &metadata0); + assert_eq!(schema.field(1).data_type(), &DataType::Utf8); + assert_eq!(schema.field(1).metadata(), &metadata1); + + Ok(()) +} + #[tokio::test] async fn test_version_function() { let expected_version = format!( From 45f15f4cc438dc7e782f6c2cdf9e37b3a7c224ff Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 16 Oct 2025 13:48:30 -0500 Subject: [PATCH 21/46] fix test --- datafusion/core/tests/sql/select.rs | 56 +++++++++++++++++------------ 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs index 0751f1f79bea..133e73538285 100644 --- a/datafusion/core/tests/sql/select.rs +++ b/datafusion/core/tests/sql/select.rs @@ -18,6 +18,7 @@ use std::collections::HashMap; use super::*; +use datafusion::assert_batches_eq; use datafusion_common::{metadata::Literal, ParamValues, ScalarValue}; use insta::assert_snapshot; @@ -321,35 +322,44 @@ async fn test_named_parameter_not_bound() -> Result<()> { #[tokio::test] async fn test_query_parameters_with_metadata() -> Result<()> { - let tmp_dir = TempDir::new()?; - let partition_count = 4; - let ctx = create_ctx_with_partition(&tmp_dir, partition_count).await?; + let ctx = SessionContext::new(); - let metadata0 = HashMap::from([( - "some_key".to_string(), - "some_value".to_string(), - )]); - let metadata1 = HashMap::from([( - "some_other_key".to_string(), - "some_other_value".to_string(), - )]); + let df = ctx.sql("SELECT $1, $2").await.unwrap(); - // sql to statement then to logical plan with parameters - let df = ctx.sql("SELECT $1, $2").await?; + let metadata1 = HashMap::from([("some_key".to_string(), "some_value".to_string())]); + let metadata2 = + HashMap::from([("some_other_key".to_string(), "some_other_value".to_string())]); + + let df_with_params_replaced = df + .with_param_values(ParamValues::List(vec![ + Literal::new(ScalarValue::UInt32(Some(1)), Some(metadata1.clone().into())), + Literal::new( + ScalarValue::Utf8(Some("two".to_string())), + Some(metadata2.clone().into()), + ), + ])) + .unwrap(); - let df_with_params_replaced = df.with_param_values(ParamValues::List(vec![ - Literal::new(ScalarValue::UInt32(Some(3)), Some(metadata0.clone().into())), - Literal::new( - ScalarValue::Utf8(Some("bar_value".to_string())), - Some(metadata1.clone().into()), - ), - ]))?; + // df_with_params_replaced.schema() is not correct here + // https://github.com/apache/datafusion/issues/18102 + let batches = df_with_params_replaced.clone().collect().await.unwrap(); + let schema = batches[0].schema(); - let schema = df_with_params_replaced.schema(); assert_eq!(schema.field(0).data_type(), &DataType::UInt32); - assert_eq!(schema.field(0).metadata(), &metadata0); + assert_eq!(schema.field(0).metadata(), &metadata1); assert_eq!(schema.field(1).data_type(), &DataType::Utf8); - assert_eq!(schema.field(1).metadata(), &metadata1); + assert_eq!(schema.field(1).metadata(), &metadata2); + + assert_batches_eq!( + vec![ + "+----+-----+", + "| $1 | $2 |", + "+----+-----+", + "| 1 | two |", + "+----+-----+", + ], + &batches + ); Ok(()) } From 4386a85707aa1cf47d0e5612d4d431292c890026 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 16 Oct 2025 15:10:25 -0500 Subject: [PATCH 22/46] add tests for expression schema for typed parameters --- datafusion/expr/src/expr_schema.rs | 73 ++++++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 8 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 94b855addcaa..586151e70039 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -310,10 +310,12 @@ impl ExprSchemable for Expr { window_function, ) .map(|(_, nullable)| nullable), - Expr::ScalarVariable(_, _) - | Expr::TryCast { .. } - | Expr::Unnest(_) - | Expr::Placeholder(_) => Ok(true), + Expr::Placeholder(Placeholder { id: _, field }) => { + Ok(field.as_ref().map(|f| f.is_nullable()).unwrap_or(true)) + } + Expr::ScalarVariable(_, _) | Expr::TryCast { .. } | Expr::Unnest(_) => { + Ok(true) + } Expr::IsNull(_) | Expr::IsNotNull(_) | Expr::IsTrue(_) @@ -435,9 +437,11 @@ impl ExprSchemable for Expr { }) => { let field = match &**expr { Expr::Placeholder(Placeholder { field, .. }) => match &field { + // TODO: This seems to be pulling metadata/types from the schema + // based on the Alias destination. name? Is this correct? None => schema - .data_type_and_nullable(&Column::from_name(name)) - .map(|(d, n)| Field::new(&schema_name, d.clone(), n)), + .field_from_column(&Column::from_name(name)) + .map(|f| f.clone().with_name(&schema_name)), Some(field) => Ok(field .as_ref() .clone() @@ -593,6 +597,10 @@ impl ExprSchemable for Expr { .to_field(schema) .map(|(_, f)| f.as_ref().clone().with_data_type(data_type.clone())) .map(Arc::new), + Expr::Placeholder(Placeholder { + id: _, + field: Some(field), + }) => Ok(field.as_ref().clone().with_name(&schema_name).into()), Expr::Like(_) | Expr::SimilarTo(_) | Expr::Not(_) @@ -775,10 +783,12 @@ pub fn cast_subquery(subquery: Subquery, cast_to_type: &DataType) -> Result {{ @@ -904,7 +914,7 @@ mod tests { let schema = DFSchema::from_unqualified_fields( vec![meta.add_to_field(Field::new("foo", DataType::Int32, true))].into(), - std::collections::HashMap::new(), + HashMap::new(), ) .unwrap(); @@ -920,6 +930,53 @@ mod tests { assert_eq!(meta, outer_ref.metadata(&schema).unwrap()); } + #[test] + fn test_expr_placeholder_metadata() { + let schema = MockExprSchema::new(); + + let mut placeholder_meta = HashMap::new(); + placeholder_meta.insert("bar".to_string(), "buzz".to_string()); + let placeholder_meta = FieldMetadata::from(placeholder_meta); + + let expr = Expr::Placeholder(Placeholder::new_with_metadata( + "".to_string(), + Some( + Field::new("", DataType::Utf8, true) + .with_metadata(placeholder_meta.to_hashmap()) + .into(), + ), + )); + + assert!(expr.nullable(&schema).unwrap()); + assert_eq!( + expr.data_type_and_nullable(&schema).unwrap(), + (DataType::Utf8, true) + ); + assert_eq!(placeholder_meta, expr.metadata(&schema).unwrap()); + + let expr_alias = expr.alias("a placeholder by any other name"); + assert_eq!( + expr_alias.data_type_and_nullable(&schema).unwrap(), + (DataType::Utf8, true) + ); + assert_eq!(placeholder_meta, expr_alias.metadata(&schema).unwrap()); + + // Non-nullable placeholder field should remain non-nullable + let expr = Expr::Placeholder(Placeholder::new_with_metadata( + "".to_string(), + Some(Field::new("", DataType::Utf8, false).into()), + )); + assert_eq!( + expr.data_type_and_nullable(&schema).unwrap(), + (DataType::Utf8, false) + ); + let expr_alias = expr.alias("a placeholder by any other name"); + assert_eq!( + expr_alias.data_type_and_nullable(&schema).unwrap(), + (DataType::Utf8, false) + ); + } + #[derive(Debug)] struct MockExprSchema { field: Field, From 8b5622f38dbea5b399236de7176f96db09b95574 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 16 Oct 2025 16:19:08 -0500 Subject: [PATCH 23/46] more tests --- datafusion/expr/src/expr_fn.rs | 4 +-- datafusion/expr/src/expr_schema.rs | 2 +- datafusion/expr/src/logical_plan/plan.rs | 37 ++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 5ff42a31a885..c777c4978f99 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -119,8 +119,8 @@ pub fn ident(name: impl Into) -> Expr { /// /// ```rust /// # use datafusion_expr::{placeholder}; -/// let p = placeholder("$0"); // $0, refers to parameter 1 -/// assert_eq!(p.to_string(), "$0") +/// let p = placeholder("$1"); // $1, refers to parameter 1 +/// assert_eq!(p.to_string(), "$1") /// ``` pub fn placeholder(id: impl Into) -> Expr { Expr::Placeholder(Placeholder { diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 586151e70039..4f1da2c442a3 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -931,7 +931,7 @@ mod tests { } #[test] - fn test_expr_placeholder_metadata() { + fn test_expr_placeholder() { let schema = MockExprSchema::new(); let mut placeholder_meta = HashMap::new(); diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 3cde927d9c08..09449be8fbce 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -4250,6 +4250,7 @@ mod tests { binary_expr, col, exists, in_subquery, lit, placeholder, scalar_subquery, GroupingSet, }; + use datafusion_common::metadata::Literal; use datafusion_common::tree_node::{ TransformedResult, TreeNodeRewriter, TreeNodeVisitor, }; @@ -4790,6 +4791,42 @@ mod tests { .expect_err("unexpectedly succeeded to replace an invalid placeholder"); } + #[test] + fn test_replace_placeholder_mismatched_metadata() { + let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]); + + // Create a prepared statement with explicit fields that do not have metadata + let plan = table_scan(TableReference::none(), &schema, None) + .unwrap() + .filter(col("id").eq(placeholder("$1"))) + .unwrap() + .build() + .unwrap(); + let prepared_builder = LogicalPlanBuilder::new(plan) + .prepare( + "".to_string(), + vec![Field::new("", DataType::Int32, true).into()], + ) + .unwrap(); + + // Attempt to bind a parameter with metadata + let mut scalar_meta = HashMap::new(); + scalar_meta.insert("some_key".to_string(), "some_value".to_string()); + let param_values = ParamValues::List(vec![Literal::new( + ScalarValue::Int32(Some(42)), + Some(scalar_meta.into()), + )]); + let err = prepared_builder + .plan() + .clone() + .with_param_values(param_values) + .unwrap_err(); + assert_eq!( + err.message(), + "Expected parameter of type Int32, got Int32<{\"some_key\": \"some_value\"}> at index 0" + ); + } + #[test] fn test_nullable_schema_after_grouping_set() { let schema = Schema::new(vec![ From df5aba8ef4639185653d5038431fe02cdb097d0c Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 16 Oct 2025 16:42:32 -0500 Subject: [PATCH 24/46] add placeholder inference test --- datafusion/expr/src/expr.rs | 43 +++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 2bcdee2cc419..6538a87aa527 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -2666,7 +2666,8 @@ impl HashNode for Expr { } } -// Modifies expr if it is a placeholder with datatype of right +// Modifies expr to match the DataType, metadata, and nullability of other if it is +// a placeholder with previously unspecified type information (i.e., most placeholders) fn rewrite_placeholder(expr: &mut Expr, other: &Expr, schema: &DFSchema) -> Result<()> { if let Expr::Placeholder(Placeholder { id: _, field }) = expr { if field.is_none() { @@ -2678,7 +2679,10 @@ fn rewrite_placeholder(expr: &mut Expr, other: &Expr, schema: &DFSchema) -> Resu )))?; } Ok((_, other_field)) => { - *field = Some(other_field); + // We can't infer the nullability of the future parameter that might + // be bound, so ensure this is set to true + *field = + Some(other_field.as_ref().clone().with_nullable(true).into()); } } }; @@ -3495,8 +3499,8 @@ pub fn physical_name(expr: &Expr) -> Result { mod test { use crate::expr_fn::col; use crate::{ - case, lit, qualified_wildcard, wildcard, wildcard_with_options, ColumnarValue, - ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility, + case, lit, placeholder, qualified_wildcard, wildcard, wildcard_with_options, + ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility, }; use arrow::datatypes::{Field, Schema}; use sqlparser::ast; @@ -3609,6 +3613,37 @@ mod test { } } + #[test] + fn infer_placeholder_with_metadata() { + // name == $1 + let schema = + Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, true) + .with_metadata( + [("some_key".to_string(), "some_value".to_string())].into(), + )])); + let df_schema = DFSchema::try_from(schema).unwrap(); + + let expr = binary_expr(col("name"), Operator::Eq, placeholder("$1")); + + let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap(); + match inferred_expr { + Expr::BinaryExpr(BinaryExpr { right, .. }) => match *right { + Expr::Placeholder(placeholder) => { + assert_eq!( + placeholder.field.as_ref().unwrap().data_type(), + &DataType::Utf8 + ); + assert_eq!( + placeholder.field.unwrap().metadata(), + df_schema.field(0).metadata() + ); + } + _ => panic!("Expected Placeholder"), + }, + _ => panic!("Expected BinaryExpr"), + } + } + #[test] fn format_case_when() -> Result<()> { let expr = case(col("a")) From 08d0e7abadd451dd83b5d3693a74d4c98fada5d3 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 16 Oct 2025 16:49:28 -0500 Subject: [PATCH 25/46] add nullability to test --- datafusion/expr/src/expr.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 6538a87aa527..acdd2cffafe1 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -3615,9 +3615,9 @@ mod test { #[test] fn infer_placeholder_with_metadata() { - // name == $1 + // name == $1, where name is a non-nullable string let schema = - Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, true) + Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, false) .with_metadata( [("some_key".to_string(), "some_value".to_string())].into(), )])); @@ -3634,9 +3634,11 @@ mod test { &DataType::Utf8 ); assert_eq!( - placeholder.field.unwrap().metadata(), + placeholder.field.as_ref().unwrap().metadata(), df_schema.field(0).metadata() ); + // Inferred placeholder should still be nullable + assert!(placeholder.field.as_ref().unwrap().is_nullable()); } _ => panic!("Expected Placeholder"), }, From 2b4b8e78f477c51fae668e54be795b6e92d08514 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 16 Oct 2025 17:16:25 -0500 Subject: [PATCH 26/46] tests for proto --- .../tests/cases/roundtrip_logical_plan.rs | 31 +++++++++++++++++++ datafusion/proto/tests/cases/serialize.rs | 18 ++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index c5d4b49092d9..b8e14671fc53 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -1071,6 +1071,37 @@ async fn roundtrip_logical_plan_with_view_scan() -> Result<()> { Ok(()) } +#[tokio::test] +async fn roundtrip_logical_plan_prepared_statement_with_metadata() -> Result<()> { + let ctx = SessionContext::new(); + + let plan = ctx + .sql("SELECT $1") + .await + .unwrap() + .into_optimized_plan() + .unwrap(); + let prepared = LogicalPlanBuilder::new(plan) + .prepare( + "".to_string(), + vec![Field::new("", DataType::Int32, true) + .with_metadata( + [("some_key".to_string(), "some_value".to_string())].into(), + ) + .into()], + ) + .unwrap() + .plan() + .clone(); + + let bytes = logical_plan_to_bytes(&prepared)?; + let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?; + assert_eq!(format!("{prepared}"), format!("{logical_round_trip}")); + // Also check exact equality in case metadata isn't reflected in the display string + assert_eq!(&prepared, &logical_round_trip); + Ok(()) +} + pub mod proto { #[derive(Clone, PartialEq, ::prost::Message)] pub struct TopKPlanProto { diff --git a/datafusion/proto/tests/cases/serialize.rs b/datafusion/proto/tests/cases/serialize.rs index c9ef4377d43b..3040bdba3b64 100644 --- a/datafusion/proto/tests/cases/serialize.rs +++ b/datafusion/proto/tests/cases/serialize.rs @@ -18,10 +18,11 @@ use std::sync::Arc; use arrow::array::ArrayRef; -use arrow::datatypes::DataType; +use arrow::datatypes::{DataType, Field}; use datafusion::execution::FunctionRegistry; use datafusion::prelude::SessionContext; +use datafusion_expr::expr::Placeholder; use datafusion_expr::{col, create_udf, lit, ColumnarValue}; use datafusion_expr::{Expr, Volatility}; use datafusion_functions::string; @@ -136,6 +137,21 @@ fn roundtrip_qualified_alias() { assert_eq!(qual_alias, roundtrip_expr(&qual_alias)); } +#[test] +fn roundtrip_placeholder_with_metadata() { + let expr = Expr::Placeholder(Placeholder::new_with_metadata( + "placeholder_id".to_string(), + Some( + Field::new("", DataType::Utf8, false) + .with_metadata( + [("some_key".to_string(), "some_value".to_string())].into(), + ) + .into(), + ), + )); + assert_eq!(expr, roundtrip_expr(&expr)); +} + #[test] fn roundtrip_deeply_nested_binary_expr() { // We need more stack space so this doesn't overflow in dev builds From 57437b934d0256eb403634372f92057fba2e0807 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 16 Oct 2025 17:35:55 -0500 Subject: [PATCH 27/46] make sure I don't forget these --- datafusion/sql/tests/cases/params.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/datafusion/sql/tests/cases/params.rs b/datafusion/sql/tests/cases/params.rs index 0a355b4aab75..f4ccf2825034 100644 --- a/datafusion/sql/tests/cases/params.rs +++ b/datafusion/sql/tests/cases/params.rs @@ -704,6 +704,16 @@ fn test_prepare_statement_to_plan_one_param() { ); } +#[test] +fn test_update_infer_with_metadata() { + // TODO +} + +#[test] +fn test_insert_infer_with_metadata() { + // TOOD +} + #[test] fn test_prepare_statement_to_plan_data_type() { let sql = "PREPARE my_plan(DOUBLE) AS SELECT id, age FROM person WHERE age = $1"; From 70f36639f5123184c9c24c33d0f6dec31902a70b Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 16 Oct 2025 17:37:59 -0500 Subject: [PATCH 28/46] fix typo --- datafusion/sql/tests/cases/params.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sql/tests/cases/params.rs b/datafusion/sql/tests/cases/params.rs index f4ccf2825034..0dcb4ff056b0 100644 --- a/datafusion/sql/tests/cases/params.rs +++ b/datafusion/sql/tests/cases/params.rs @@ -711,7 +711,7 @@ fn test_update_infer_with_metadata() { #[test] fn test_insert_infer_with_metadata() { - // TOOD + // TODO } #[test] From 58807fd34475630992b73f825991c0be5c657c00 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 16 Oct 2025 17:58:45 -0500 Subject: [PATCH 29/46] remove comment --- datafusion/core/src/execution/context/mod.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index c84c20f03b66..6d5acfb7f476 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1261,8 +1261,6 @@ impl SessionContext { .into_iter() .zip(prepared.fields.iter()) .map(|(e, dt)| -> Result<_> { - // This is fishy...we're casting storage without checking if an - // extension type supports the destination e.cast_storage_to(dt.data_type()) }) .collect::>()?; From 4bfc5cd9b173188da84d45c334fa44ef85c2cc2b Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 16 Oct 2025 18:02:28 -0500 Subject: [PATCH 30/46] fix --- datafusion/core/tests/sql/select.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs index 133e73538285..a8485cd8fa9e 100644 --- a/datafusion/core/tests/sql/select.rs +++ b/datafusion/core/tests/sql/select.rs @@ -351,7 +351,7 @@ async fn test_query_parameters_with_metadata() -> Result<()> { assert_eq!(schema.field(1).metadata(), &metadata2); assert_batches_eq!( - vec![ + [ "+----+-----+", "| $1 | $2 |", "+----+-----+", From 81c3baff30d1462f5315d2b76985062deb106d5e Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 16 Oct 2025 21:18:01 -0500 Subject: [PATCH 31/46] fmt --- datafusion/core/src/execution/context/mod.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 6d5acfb7f476..6f27975c171c 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1260,9 +1260,7 @@ impl SessionContext { params = params .into_iter() .zip(prepared.fields.iter()) - .map(|(e, dt)| -> Result<_> { - e.cast_storage_to(dt.data_type()) - }) + .map(|(e, dt)| -> Result<_> { e.cast_storage_to(dt.data_type()) }) .collect::>()?; } From 860e0139e67166111454261c23bf70d5da817383 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 16 Oct 2025 21:40:15 -0500 Subject: [PATCH 32/46] fix error string comparison --- datafusion/expr/src/logical_plan/plan.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 09449be8fbce..58b4553807dc 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -4816,15 +4816,11 @@ mod tests { ScalarValue::Int32(Some(42)), Some(scalar_meta.into()), )]); - let err = prepared_builder + prepared_builder .plan() .clone() .with_param_values(param_values) - .unwrap_err(); - assert_eq!( - err.message(), - "Expected parameter of type Int32, got Int32<{\"some_key\": \"some_value\"}> at index 0" - ); + .expect_err("prepared field metadata mismatch unexpectedly succeeded"); } #[test] From b6c71b31bbdaa89599133a557208117a0d5aa396 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 16 Oct 2025 23:14:17 -0500 Subject: [PATCH 33/46] add update/insert test --- datafusion/sql/tests/cases/params.rs | 167 ++++++++++++++++++++++++++- datafusion/sql/tests/common/mod.rs | 8 ++ 2 files changed, 171 insertions(+), 4 deletions(-) diff --git a/datafusion/sql/tests/cases/params.rs b/datafusion/sql/tests/cases/params.rs index 0dcb4ff056b0..4e8758762af0 100644 --- a/datafusion/sql/tests/cases/params.rs +++ b/datafusion/sql/tests/cases/params.rs @@ -16,8 +16,8 @@ // under the License. use crate::logical_plan; -use arrow::datatypes::DataType; -use datafusion_common::{assert_contains, ParamValues, ScalarValue}; +use arrow::datatypes::{DataType, Field, FieldRef}; +use datafusion_common::{assert_contains, metadata::Literal, ParamValues, ScalarValue}; use datafusion_expr::{LogicalPlan, Prepare, Statement}; use insta::assert_snapshot; use itertools::Itertools as _; @@ -51,6 +51,34 @@ impl ParameterTest<'_> { } } +pub struct ParameterTestWithMetadata<'a> { + pub sql: &'a str, + pub expected_types: Vec<(&'a str, Option)>, + pub param_values: Vec, +} + +impl ParameterTestWithMetadata<'_> { + pub fn run(&self) -> String { + let plan = logical_plan(self.sql).unwrap(); + + let actual_types = plan.get_parameter_fields().unwrap(); + let expected_types: HashMap> = self + .expected_types + .iter() + .map(|(k, v)| ((*k).to_string(), v.clone())) + .collect(); + + assert_eq!(actual_types, expected_types); + + let plan_with_params = plan + .clone() + .with_param_values(ParamValues::List(self.param_values.clone())) + .unwrap(); + + format!("** Initial Plan:\n{plan}\n** Final Plan:\n{plan_with_params}") + } +} + fn generate_prepare_stmt_and_data_types(sql: &str) -> (LogicalPlan, String) { let plan = logical_plan(sql).unwrap(); let data_types = match &plan { @@ -706,12 +734,143 @@ fn test_prepare_statement_to_plan_one_param() { #[test] fn test_update_infer_with_metadata() { - // TODO + // Here the uuid field is inferred as nullable because it appears in the filter + // (and not in the update values, where its nullability would be inferred) + let uuid_field = Field::new("", DataType::FixedSizeBinary(16), true).with_metadata( + [("ARROW:extension:name".to_string(), "arrow.uuid".to_string())].into(), + ); + let uuid_bytes = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let expected_types = vec![ + ( + "$1", + Some(Field::new("last_name", DataType::Utf8, false).into()), + ), + ("$2", Some(uuid_field.clone().with_name("id").into())), + ]; + let param_values = vec![ + Literal::new(ScalarValue::from("Turing"), None), + Literal::new( + ScalarValue::FixedSizeBinary(16, Some(uuid_bytes)), + Some(uuid_field.metadata().into()), + ), + ]; + + // Check a normal update + let test = ParameterTestWithMetadata { + sql: "update person_with_uuid_extension set last_name=$1 where id=$2", + expected_types: expected_types.clone(), + param_values: param_values.clone(), + }; + + assert_snapshot!( + test.run(), + @r#" + ** Initial Plan: + Dml: op=[Update] table=[person_with_uuid_extension] + Projection: person_with_uuid_extension.id AS id, person_with_uuid_extension.first_name AS first_name, $1 AS last_name + Filter: person_with_uuid_extension.id = $2 + TableScan: person_with_uuid_extension + ** Final Plan: + Dml: op=[Update] table=[person_with_uuid_extension] + Projection: person_with_uuid_extension.id AS id, person_with_uuid_extension.first_name AS first_name, Utf8("Turing") AS last_name + Filter: person_with_uuid_extension.id = FixedSizeBinary(16, "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16") FieldMetadata { inner: {"ARROW:extension:name": "arrow.uuid"} } + TableScan: person_with_uuid_extension + "# + ); + + // Check a prepared update + let test = ParameterTestWithMetadata { + sql: "PREPARE my_plan AS update person_with_uuid_extension set last_name=$1 where id=$2", + expected_types, + param_values + }; + + assert_snapshot!( + test.run(), + @r#" + ** Initial Plan: + Prepare: "my_plan" [Utf8, FixedSizeBinary(16)<{"ARROW:extension:name": "arrow.uuid"}>] + Dml: op=[Update] table=[person_with_uuid_extension] + Projection: person_with_uuid_extension.id AS id, person_with_uuid_extension.first_name AS first_name, $1 AS last_name + Filter: person_with_uuid_extension.id = $2 + TableScan: person_with_uuid_extension + ** Final Plan: + Dml: op=[Update] table=[person_with_uuid_extension] + Projection: person_with_uuid_extension.id AS id, person_with_uuid_extension.first_name AS first_name, Utf8("Turing") AS last_name + Filter: person_with_uuid_extension.id = FixedSizeBinary(16, "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16") FieldMetadata { inner: {"ARROW:extension:name": "arrow.uuid"} } + TableScan: person_with_uuid_extension + "# + ); } #[test] fn test_insert_infer_with_metadata() { - // TODO + let uuid_field = Field::new("", DataType::FixedSizeBinary(16), false).with_metadata( + [("ARROW:extension:name".to_string(), "arrow.uuid".to_string())].into(), + ); + let uuid_bytes = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let expected_types = vec![ + ("$1", Some(uuid_field.clone().with_name("id").into())), + ( + "$2", + Some(Field::new("first_name", DataType::Utf8, false).into()), + ), + ( + "$3", + Some(Field::new("last_name", DataType::Utf8, false).into()), + ), + ]; + let param_values = vec![ + Literal::new( + ScalarValue::FixedSizeBinary(16, Some(uuid_bytes)), + Some(uuid_field.metadata().into()), + ), + Literal::new(ScalarValue::from("Alan"), None), + Literal::new(ScalarValue::from("Turing"), None), + ]; + + // Check a normal insert + let test = ParameterTestWithMetadata { + sql: "insert into person_with_uuid_extension (id, first_name, last_name) values ($1, $2, $3)", + expected_types: expected_types.clone(), + param_values: param_values.clone() + }; + + assert_snapshot!( + test.run(), + @r#" + ** Initial Plan: + Dml: op=[Insert Into] table=[person_with_uuid_extension] + Projection: column1 AS id, column2 AS first_name, column3 AS last_name + Values: ($1, $2, $3) + ** Final Plan: + Dml: op=[Insert Into] table=[person_with_uuid_extension] + Projection: column1 AS id, column2 AS first_name, column3 AS last_name + Values: (FixedSizeBinary(16, "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16") FieldMetadata { inner: {"ARROW:extension:name": "arrow.uuid"} } AS $1, Utf8("Alan") AS $2, Utf8("Turing") AS $3) + "# + ); + + // Check a prepared insert + let test = ParameterTestWithMetadata { + sql: "PREPARE my_plan AS insert into person_with_uuid_extension (id, first_name, last_name) values ($1, $2, $3)", + expected_types, + param_values + }; + + assert_snapshot!( + test.run(), + @r#" + ** Initial Plan: + Prepare: "my_plan" [FixedSizeBinary(16)<{"ARROW:extension:name": "arrow.uuid"}>, Utf8, Utf8] + Dml: op=[Insert Into] table=[person_with_uuid_extension] + Projection: column1 AS id, column2 AS first_name, column3 AS last_name + Values: ($1, $2, $3) + ** Final Plan: + Dml: op=[Insert Into] table=[person_with_uuid_extension] + Projection: column1 AS id, column2 AS first_name, column3 AS last_name + Values: (FixedSizeBinary(16, "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16") FieldMetadata { inner: {"ARROW:extension:name": "arrow.uuid"} } AS $1, Utf8("Alan") AS $2, Utf8("Turing") AS $3) + "# + ); } #[test] diff --git a/datafusion/sql/tests/common/mod.rs b/datafusion/sql/tests/common/mod.rs index ee1b761970de..5d9fd9f2c374 100644 --- a/datafusion/sql/tests/common/mod.rs +++ b/datafusion/sql/tests/common/mod.rs @@ -151,6 +151,14 @@ impl ContextProvider for MockContextProvider { ), Field::new("😀", DataType::Int32, false), ])), + "person_with_uuid_extension" => Ok(Schema::new(vec![ + Field::new("id", DataType::FixedSizeBinary(16), false).with_metadata( + [("ARROW:extension:name".to_string(), "arrow.uuid".to_string())] + .into(), + ), + Field::new("first_name", DataType::Utf8, false), + Field::new("last_name", DataType::Utf8, false), + ])), "orders" => Ok(Schema::new(vec![ Field::new("order_id", DataType::UInt32, false), Field::new("customer_id", DataType::UInt32, false), From 8752fd85609948a8ab86d779926132c71de23182 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 21 Oct 2025 15:48:59 -0500 Subject: [PATCH 34/46] fix for merge --- .../proto/src/logical_plan/from_proto.rs | 1 - datafusion/proto/src/logical_plan/mod.rs | 21 ------------------- .../tests/cases/roundtrip_logical_plan.rs | 2 +- 3 files changed, 1 insertion(+), 23 deletions(-) diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index e96536bdcd47..f57818b571f9 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -18,7 +18,6 @@ use std::sync::Arc; use arrow::datatypes::Field; -use datafusion::execution::registry::FunctionRegistry; use datafusion_common::{ exec_datafusion_err, internal_err, plan_datafusion_err, NullEquality, RecursionUnnestOption, Result, ScalarValue, TableReference, UnnestOptions, diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs index 8dd9f851284c..9644c9f69fea 100644 --- a/datafusion/proto/src/logical_plan/mod.rs +++ b/datafusion/proto/src/logical_plan/mod.rs @@ -34,27 +34,6 @@ use crate::{ use crate::protobuf::{proto_error, ToProtoError}; use arrow::datatypes::{DataType, Field, Schema, SchemaBuilder, SchemaRef}; -use datafusion::datasource::cte_worktable::CteWorkTable; -use datafusion::datasource::file_format::arrow::ArrowFormat; -#[cfg(feature = "avro")] -use datafusion::datasource::file_format::avro::AvroFormat; -#[cfg(feature = "parquet")] -use datafusion::datasource::file_format::parquet::ParquetFormat; -use datafusion::datasource::file_format::{ - file_type_to_format, format_as_file_type, FileFormatFactory, -}; -use datafusion::{ - datasource::{ - file_format::{ - csv::CsvFormat, json::JsonFormat as OtherNdJsonFormat, FileFormat, - }, - listing::{ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl}, - view::ViewTable, - TableProvider, - }, - datasource::{provider_as_source, source_as_provider}, - prelude::SessionContext, -}; use datafusion_catalog::cte_worktable::CteWorkTable; use datafusion_common::file_options::file_type::FileType; use datafusion_common::{ diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 9a420f577e38..16d01139d880 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -1099,7 +1099,7 @@ async fn roundtrip_logical_plan_prepared_statement_with_metadata() -> Result<()> .clone(); let bytes = logical_plan_to_bytes(&prepared)?; - let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?; + let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?; assert_eq!(format!("{prepared}"), format!("{logical_round_trip}")); // Also check exact equality in case metadata isn't reflected in the display string assert_eq!(&prepared, &logical_round_trip); From 46d4e1307524adfac6840dba85a889332e43505e Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 21 Oct 2025 16:36:02 -0500 Subject: [PATCH 35/46] review comments --- datafusion/expr/src/expr_schema.rs | 1 - datafusion/sql/src/expr/mod.rs | 10 +++++++--- datafusion/sql/src/planner.rs | 13 ++++++++----- datafusion/sql/src/statement.rs | 7 ++++--- datafusion/sql/tests/cases/params.rs | 14 ++++++++++---- 5 files changed, 29 insertions(+), 16 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 4f1da2c442a3..318c4aba1693 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -947,7 +947,6 @@ mod tests { ), )); - assert!(expr.nullable(&schema).unwrap()); assert_eq!( expr.data_type_and_nullable(&schema).unwrap(), (DataType::Utf8, true) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 5cff1e349b18..fef0505e993f 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -287,7 +287,9 @@ impl SqlToRel<'_, S> { schema, planner_context, )?), - self.convert_data_type(&data_type)?.data_type().clone(), + self.convert_data_type_to_field(&data_type)? + .data_type() + .clone(), ))) } @@ -297,7 +299,9 @@ impl SqlToRel<'_, S> { uses_odbc_syntax: _, }) => Ok(Expr::Cast(Cast::new( Box::new(lit(value.into_string().unwrap())), - self.convert_data_type(&data_type)?.data_type().clone(), + self.convert_data_type_to_field(&data_type)? + .data_type() + .clone(), ))), SQLExpr::IsNull(expr) => Ok(Expr::IsNull(Box::new( @@ -969,7 +973,7 @@ impl SqlToRel<'_, S> { return not_impl_err!("CAST with format is not supported: {format}"); } - let dt = self.convert_data_type(&data_type)?; + let dt = self.convert_data_type_to_field(&data_type)?; let expr = self.sql_expr_to_logical_expr(expr, schema, planner_context)?; // numeric constants are treated as seconds (rather as nanoseconds) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 05bb1121131b..f62965c6e293 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -428,7 +428,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let mut fields = Vec::with_capacity(columns.len()); for column in columns { - let data_type = self.convert_data_type(&column.data_type)?; + let data_type = self.convert_data_type_to_field(&column.data_type)?; let not_nullable = column .options .iter() @@ -589,7 +589,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { }) } - pub(crate) fn convert_data_type(&self, sql_type: &SQLDataType) -> Result { + pub(crate) fn convert_data_type_to_field( + &self, + sql_type: &SQLDataType, + ) -> Result { // First check if any of the registered type_planner can handle this type if let Some(type_planner) = self.context_provider.get_type_planner() { if let Some(data_type) = type_planner.plan_type(sql_type)? { @@ -601,7 +604,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { match sql_type { SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_sql_type)) => { // Arrays may be multi-dimensional. - let inner_data_type = self.convert_data_type(inner_sql_type)?; + let inner_data_type = self.convert_data_type_to_field(inner_sql_type)?; // Lists are allowed to have an arbitrarily named field; // however, a name other than 'item' will cause it to fail an @@ -622,7 +625,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { inner_sql_type, maybe_array_size, )) => { - let inner_data_type = self.convert_data_type(inner_sql_type)?; + let inner_data_type = self.convert_data_type_to_field(inner_sql_type)?; if let Some(array_size) = maybe_array_size { Ok(Field::new( "", @@ -763,7 +766,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { .iter() .enumerate() .map(|(idx, field)| { - let data_type = self.convert_data_type(&field.field_type)?; + let data_type = self.convert_data_type_to_field(&field.field_type)?; let field_name = match &field.field_name { Some(ident) => ident.clone(), None => Ident::new(format!("c{idx}")), diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 4338285d678b..45b20bbb0526 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -732,7 +732,7 @@ impl SqlToRel<'_, S> { // Convert parser data types to DataFusion data types let mut fields: Vec = data_types .into_iter() - .map(|t| self.convert_data_type(&t)) + .map(|t| self.convert_data_type_to_field(&t)) .collect::>()?; // Create planner context with parameters @@ -1179,7 +1179,7 @@ impl SqlToRel<'_, S> { .. }) => { let return_type = match return_type { - Some(t) => Some(self.convert_data_type(&t)?), + Some(t) => Some(self.convert_data_type_to_field(&t)?), None => None, }; let mut planner_context = PlannerContext::new(); @@ -1190,7 +1190,8 @@ impl SqlToRel<'_, S> { let function_args = function_args .into_iter() .map(|arg| { - let data_type = self.convert_data_type(&arg.data_type)?; + let data_type = + self.convert_data_type_to_field(&arg.data_type)?; let default_expr = match arg.default_expr { Some(expr) => Some(self.sql_to_expr( diff --git a/datafusion/sql/tests/cases/params.rs b/datafusion/sql/tests/cases/params.rs index 4e8758762af0..4ac90195e382 100644 --- a/datafusion/sql/tests/cases/params.rs +++ b/datafusion/sql/tests/cases/params.rs @@ -17,7 +17,11 @@ use crate::logical_plan; use arrow::datatypes::{DataType, Field, FieldRef}; -use datafusion_common::{assert_contains, metadata::Literal, ParamValues, ScalarValue}; +use datafusion_common::{ + assert_contains, + metadata::{format_type_and_metadata, Literal}, + ParamValues, ScalarValue, +}; use datafusion_expr::{LogicalPlan, Prepare, Statement}; use insta::assert_snapshot; use itertools::Itertools as _; @@ -82,9 +86,11 @@ impl ParameterTestWithMetadata<'_> { fn generate_prepare_stmt_and_data_types(sql: &str) -> (LogicalPlan, String) { let plan = logical_plan(sql).unwrap(); let data_types = match &plan { - LogicalPlan::Statement(Statement::Prepare(Prepare { fields, .. })) => { - fields.iter().map(|f| f.data_type()).join(", ").to_string() - } + LogicalPlan::Statement(Statement::Prepare(Prepare { fields, .. })) => fields + .iter() + .map(|f| format_type_and_metadata(f.data_type(), Some(f.metadata()))) + .join(", ") + .to_string(), _ => panic!("Expected a Prepare statement"), }; (plan, data_types) From 91d9549bc3e0875ebf22a3a213a91b60db35ccf3 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 21 Oct 2025 16:50:25 -0500 Subject: [PATCH 36/46] rename literal --- datafusion/common/src/metadata.rs | 6 ++--- datafusion/common/src/param_value.rs | 23 ++++++++++++-------- datafusion/core/src/execution/context/mod.rs | 8 ++++--- datafusion/core/tests/sql/select.rs | 9 +++++--- datafusion/expr/src/logical_plan/plan.rs | 4 ++-- datafusion/sql/tests/cases/params.rs | 14 ++++++------ 6 files changed, 37 insertions(+), 27 deletions(-) diff --git a/datafusion/common/src/metadata.rs b/datafusion/common/src/metadata.rs index 9520ed016f7c..61dc285de603 100644 --- a/datafusion/common/src/metadata.rs +++ b/datafusion/common/src/metadata.rs @@ -24,12 +24,12 @@ use crate::{error::_plan_err, DataFusionError, ScalarValue}; /// A [`ScalarValue`] with optional [`FieldMetadata`] #[derive(Debug, Clone)] -pub struct Literal { +pub struct ScalarAndMetadata { pub value: ScalarValue, pub metadata: Option, } -impl Literal { +impl ScalarAndMetadata { /// Create a new Literal from a scalar value with optional [`FieldMetadata`] pub fn new(value: ScalarValue, metadata: Option) -> Self { Self { value, metadata } @@ -60,7 +60,7 @@ impl Literal { target_type: &DataType, ) -> Result { let new_value = self.value().cast_to(target_type)?; - Ok(Literal::new(new_value, self.metadata.clone())) + Ok(ScalarAndMetadata::new(new_value, self.metadata.clone())) } } diff --git a/datafusion/common/src/param_value.rs b/datafusion/common/src/param_value.rs index 5b831c9f3047..5abd21564656 100644 --- a/datafusion/common/src/param_value.rs +++ b/datafusion/common/src/param_value.rs @@ -16,7 +16,7 @@ // under the License. use crate::error::{_plan_datafusion_err, _plan_err}; -use crate::metadata::{check_metadata_with_storage_equal, Literal}; +use crate::metadata::{check_metadata_with_storage_equal, ScalarAndMetadata}; use crate::{Result, ScalarValue}; use arrow::datatypes::{DataType, Field, FieldRef}; use std::collections::HashMap; @@ -25,9 +25,9 @@ use std::collections::HashMap; #[derive(Debug, Clone)] pub enum ParamValues { /// For positional query parameters, like `SELECT * FROM test WHERE a > $1 AND b = $2` - List(Vec), + List(Vec), /// For named query parameters, like `SELECT * FROM test WHERE a > $foo AND b = $goo` - Map(HashMap), + Map(HashMap), } impl ParamValues { @@ -81,7 +81,7 @@ impl ParamValues { } } - pub fn get_placeholders_with_values(&self, id: &str) -> Result { + pub fn get_placeholders_with_values(&self, id: &str) -> Result { match self { ParamValues::List(list) => { if id.is_empty() { @@ -115,7 +115,12 @@ impl ParamValues { impl From> for ParamValues { fn from(value: Vec) -> Self { - Self::List(value.into_iter().map(|v| Literal::new(v, None)).collect()) + Self::List( + value + .into_iter() + .map(|v| ScalarAndMetadata::new(v, None)) + .collect(), + ) } } @@ -124,9 +129,9 @@ where K: Into, { fn from(value: Vec<(K, ScalarValue)>) -> Self { - let value: HashMap = value + let value: HashMap = value .into_iter() - .map(|(k, v)| (k.into(), Literal::new(v, None))) + .map(|(k, v)| (k.into(), ScalarAndMetadata::new(v, None))) .collect(); Self::Map(value) } @@ -137,9 +142,9 @@ where K: Into, { fn from(value: HashMap) -> Self { - let value: HashMap = value + let value: HashMap = value .into_iter() - .map(|(k, v)| (k.into(), Literal::new(v, None))) + .map(|(k, v)| (k.into(), ScalarAndMetadata::new(v, None))) .collect(); Self::Map(value) } diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 6f27975c171c..7d1ed2ec9e9a 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -64,7 +64,7 @@ use datafusion_catalog::{ DynamicFileCatalog, TableFunction, TableFunctionImpl, UrlTableFactory, }; use datafusion_common::config::ConfigOptions; -use datafusion_common::metadata::Literal; +use datafusion_common::metadata::ScalarAndMetadata; use datafusion_common::{ config::{ConfigExtension, TableOptions}, exec_datafusion_err, exec_err, internal_datafusion_err, not_impl_err, @@ -1239,10 +1239,12 @@ impl SessionContext { })?; // Only allow literals as parameters for now. - let mut params: Vec = parameters + let mut params: Vec = parameters .into_iter() .map(|e| match e { - Expr::Literal(scalar, metadata) => Ok(Literal::new(scalar, metadata)), + Expr::Literal(scalar, metadata) => { + Ok(ScalarAndMetadata::new(scalar, metadata)) + } _ => not_impl_err!("Unsupported parameter type: {}", e), }) .collect::>()?; diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs index a8485cd8fa9e..2eb3ba36dd90 100644 --- a/datafusion/core/tests/sql/select.rs +++ b/datafusion/core/tests/sql/select.rs @@ -19,7 +19,7 @@ use std::collections::HashMap; use super::*; use datafusion::assert_batches_eq; -use datafusion_common::{metadata::Literal, ParamValues, ScalarValue}; +use datafusion_common::{metadata::ScalarAndMetadata, ParamValues, ScalarValue}; use insta::assert_snapshot; #[tokio::test] @@ -332,8 +332,11 @@ async fn test_query_parameters_with_metadata() -> Result<()> { let df_with_params_replaced = df .with_param_values(ParamValues::List(vec![ - Literal::new(ScalarValue::UInt32(Some(1)), Some(metadata1.clone().into())), - Literal::new( + ScalarAndMetadata::new( + ScalarValue::UInt32(Some(1)), + Some(metadata1.clone().into()), + ), + ScalarAndMetadata::new( ScalarValue::Utf8(Some("two".to_string())), Some(metadata2.clone().into()), ), diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 5e70cbf1ed91..9541f35e3062 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -4251,7 +4251,7 @@ mod tests { binary_expr, col, exists, in_subquery, lit, placeholder, scalar_subquery, GroupingSet, }; - use datafusion_common::metadata::Literal; + use datafusion_common::metadata::ScalarAndMetadata; use datafusion_common::tree_node::{ TransformedResult, TreeNodeRewriter, TreeNodeVisitor, }; @@ -4813,7 +4813,7 @@ mod tests { // Attempt to bind a parameter with metadata let mut scalar_meta = HashMap::new(); scalar_meta.insert("some_key".to_string(), "some_value".to_string()); - let param_values = ParamValues::List(vec![Literal::new( + let param_values = ParamValues::List(vec![ScalarAndMetadata::new( ScalarValue::Int32(Some(42)), Some(scalar_meta.into()), )]); diff --git a/datafusion/sql/tests/cases/params.rs b/datafusion/sql/tests/cases/params.rs index 4ac90195e382..e1075da5f999 100644 --- a/datafusion/sql/tests/cases/params.rs +++ b/datafusion/sql/tests/cases/params.rs @@ -19,7 +19,7 @@ use crate::logical_plan; use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::{ assert_contains, - metadata::{format_type_and_metadata, Literal}, + metadata::{format_type_and_metadata, ScalarAndMetadata}, ParamValues, ScalarValue, }; use datafusion_expr::{LogicalPlan, Prepare, Statement}; @@ -58,7 +58,7 @@ impl ParameterTest<'_> { pub struct ParameterTestWithMetadata<'a> { pub sql: &'a str, pub expected_types: Vec<(&'a str, Option)>, - pub param_values: Vec, + pub param_values: Vec, } impl ParameterTestWithMetadata<'_> { @@ -754,8 +754,8 @@ fn test_update_infer_with_metadata() { ("$2", Some(uuid_field.clone().with_name("id").into())), ]; let param_values = vec![ - Literal::new(ScalarValue::from("Turing"), None), - Literal::new( + ScalarAndMetadata::new(ScalarValue::from("Turing"), None), + ScalarAndMetadata::new( ScalarValue::FixedSizeBinary(16, Some(uuid_bytes)), Some(uuid_field.metadata().into()), ), @@ -827,12 +827,12 @@ fn test_insert_infer_with_metadata() { ), ]; let param_values = vec![ - Literal::new( + ScalarAndMetadata::new( ScalarValue::FixedSizeBinary(16, Some(uuid_bytes)), Some(uuid_field.metadata().into()), ), - Literal::new(ScalarValue::from("Alan"), None), - Literal::new(ScalarValue::from("Turing"), None), + ScalarAndMetadata::new(ScalarValue::from("Alan"), None), + ScalarAndMetadata::new(ScalarValue::from("Turing"), None), ]; // Check a normal insert From d665a6e18f01a9b052a2e4d3a9136e6bb2413ecc Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 21 Oct 2025 17:00:53 -0500 Subject: [PATCH 37/46] maybe LiteralValue? --- datafusion/common/src/metadata.rs | 6 +++--- datafusion/common/src/param_value.rs | 18 +++++++++--------- datafusion/core/src/execution/context/mod.rs | 6 +++--- datafusion/core/tests/sql/select.rs | 6 +++--- datafusion/expr/src/logical_plan/plan.rs | 4 ++-- datafusion/sql/tests/cases/params.rs | 14 +++++++------- 6 files changed, 27 insertions(+), 27 deletions(-) diff --git a/datafusion/common/src/metadata.rs b/datafusion/common/src/metadata.rs index 61dc285de603..30e9dbec7d3f 100644 --- a/datafusion/common/src/metadata.rs +++ b/datafusion/common/src/metadata.rs @@ -24,12 +24,12 @@ use crate::{error::_plan_err, DataFusionError, ScalarValue}; /// A [`ScalarValue`] with optional [`FieldMetadata`] #[derive(Debug, Clone)] -pub struct ScalarAndMetadata { +pub struct LiteralValue { pub value: ScalarValue, pub metadata: Option, } -impl ScalarAndMetadata { +impl LiteralValue { /// Create a new Literal from a scalar value with optional [`FieldMetadata`] pub fn new(value: ScalarValue, metadata: Option) -> Self { Self { value, metadata } @@ -60,7 +60,7 @@ impl ScalarAndMetadata { target_type: &DataType, ) -> Result { let new_value = self.value().cast_to(target_type)?; - Ok(ScalarAndMetadata::new(new_value, self.metadata.clone())) + Ok(LiteralValue::new(new_value, self.metadata.clone())) } } diff --git a/datafusion/common/src/param_value.rs b/datafusion/common/src/param_value.rs index 5abd21564656..9793582cb113 100644 --- a/datafusion/common/src/param_value.rs +++ b/datafusion/common/src/param_value.rs @@ -16,7 +16,7 @@ // under the License. use crate::error::{_plan_datafusion_err, _plan_err}; -use crate::metadata::{check_metadata_with_storage_equal, ScalarAndMetadata}; +use crate::metadata::{check_metadata_with_storage_equal, LiteralValue}; use crate::{Result, ScalarValue}; use arrow::datatypes::{DataType, Field, FieldRef}; use std::collections::HashMap; @@ -25,9 +25,9 @@ use std::collections::HashMap; #[derive(Debug, Clone)] pub enum ParamValues { /// For positional query parameters, like `SELECT * FROM test WHERE a > $1 AND b = $2` - List(Vec), + List(Vec), /// For named query parameters, like `SELECT * FROM test WHERE a > $foo AND b = $goo` - Map(HashMap), + Map(HashMap), } impl ParamValues { @@ -81,7 +81,7 @@ impl ParamValues { } } - pub fn get_placeholders_with_values(&self, id: &str) -> Result { + pub fn get_placeholders_with_values(&self, id: &str) -> Result { match self { ParamValues::List(list) => { if id.is_empty() { @@ -118,7 +118,7 @@ impl From> for ParamValues { Self::List( value .into_iter() - .map(|v| ScalarAndMetadata::new(v, None)) + .map(|v| LiteralValue::new(v, None)) .collect(), ) } @@ -129,9 +129,9 @@ where K: Into, { fn from(value: Vec<(K, ScalarValue)>) -> Self { - let value: HashMap = value + let value: HashMap = value .into_iter() - .map(|(k, v)| (k.into(), ScalarAndMetadata::new(v, None))) + .map(|(k, v)| (k.into(), LiteralValue::new(v, None))) .collect(); Self::Map(value) } @@ -142,9 +142,9 @@ where K: Into, { fn from(value: HashMap) -> Self { - let value: HashMap = value + let value: HashMap = value .into_iter() - .map(|(k, v)| (k.into(), ScalarAndMetadata::new(v, None))) + .map(|(k, v)| (k.into(), LiteralValue::new(v, None))) .collect(); Self::Map(value) } diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 7d1ed2ec9e9a..2a00114c6226 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -64,7 +64,7 @@ use datafusion_catalog::{ DynamicFileCatalog, TableFunction, TableFunctionImpl, UrlTableFactory, }; use datafusion_common::config::ConfigOptions; -use datafusion_common::metadata::ScalarAndMetadata; +use datafusion_common::metadata::LiteralValue; use datafusion_common::{ config::{ConfigExtension, TableOptions}, exec_datafusion_err, exec_err, internal_datafusion_err, not_impl_err, @@ -1239,11 +1239,11 @@ impl SessionContext { })?; // Only allow literals as parameters for now. - let mut params: Vec = parameters + let mut params: Vec = parameters .into_iter() .map(|e| match e { Expr::Literal(scalar, metadata) => { - Ok(ScalarAndMetadata::new(scalar, metadata)) + Ok(LiteralValue::new(scalar, metadata)) } _ => not_impl_err!("Unsupported parameter type: {}", e), }) diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs index 2eb3ba36dd90..e0a23fde6e8b 100644 --- a/datafusion/core/tests/sql/select.rs +++ b/datafusion/core/tests/sql/select.rs @@ -19,7 +19,7 @@ use std::collections::HashMap; use super::*; use datafusion::assert_batches_eq; -use datafusion_common::{metadata::ScalarAndMetadata, ParamValues, ScalarValue}; +use datafusion_common::{metadata::LiteralValue, ParamValues, ScalarValue}; use insta::assert_snapshot; #[tokio::test] @@ -332,11 +332,11 @@ async fn test_query_parameters_with_metadata() -> Result<()> { let df_with_params_replaced = df .with_param_values(ParamValues::List(vec![ - ScalarAndMetadata::new( + LiteralValue::new( ScalarValue::UInt32(Some(1)), Some(metadata1.clone().into()), ), - ScalarAndMetadata::new( + LiteralValue::new( ScalarValue::Utf8(Some("two".to_string())), Some(metadata2.clone().into()), ), diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 9541f35e3062..8d03676e7aae 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -4251,7 +4251,7 @@ mod tests { binary_expr, col, exists, in_subquery, lit, placeholder, scalar_subquery, GroupingSet, }; - use datafusion_common::metadata::ScalarAndMetadata; + use datafusion_common::metadata::LiteralValue; use datafusion_common::tree_node::{ TransformedResult, TreeNodeRewriter, TreeNodeVisitor, }; @@ -4813,7 +4813,7 @@ mod tests { // Attempt to bind a parameter with metadata let mut scalar_meta = HashMap::new(); scalar_meta.insert("some_key".to_string(), "some_value".to_string()); - let param_values = ParamValues::List(vec![ScalarAndMetadata::new( + let param_values = ParamValues::List(vec![LiteralValue::new( ScalarValue::Int32(Some(42)), Some(scalar_meta.into()), )]); diff --git a/datafusion/sql/tests/cases/params.rs b/datafusion/sql/tests/cases/params.rs index e1075da5f999..f4d6cec45f3b 100644 --- a/datafusion/sql/tests/cases/params.rs +++ b/datafusion/sql/tests/cases/params.rs @@ -19,7 +19,7 @@ use crate::logical_plan; use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::{ assert_contains, - metadata::{format_type_and_metadata, ScalarAndMetadata}, + metadata::{format_type_and_metadata, LiteralValue}, ParamValues, ScalarValue, }; use datafusion_expr::{LogicalPlan, Prepare, Statement}; @@ -58,7 +58,7 @@ impl ParameterTest<'_> { pub struct ParameterTestWithMetadata<'a> { pub sql: &'a str, pub expected_types: Vec<(&'a str, Option)>, - pub param_values: Vec, + pub param_values: Vec, } impl ParameterTestWithMetadata<'_> { @@ -754,8 +754,8 @@ fn test_update_infer_with_metadata() { ("$2", Some(uuid_field.clone().with_name("id").into())), ]; let param_values = vec![ - ScalarAndMetadata::new(ScalarValue::from("Turing"), None), - ScalarAndMetadata::new( + LiteralValue::new(ScalarValue::from("Turing"), None), + LiteralValue::new( ScalarValue::FixedSizeBinary(16, Some(uuid_bytes)), Some(uuid_field.metadata().into()), ), @@ -827,12 +827,12 @@ fn test_insert_infer_with_metadata() { ), ]; let param_values = vec![ - ScalarAndMetadata::new( + LiteralValue::new( ScalarValue::FixedSizeBinary(16, Some(uuid_bytes)), Some(uuid_field.metadata().into()), ), - ScalarAndMetadata::new(ScalarValue::from("Alan"), None), - ScalarAndMetadata::new(ScalarValue::from("Turing"), None), + LiteralValue::new(ScalarValue::from("Alan"), None), + LiteralValue::new(ScalarValue::from("Turing"), None), ]; // Check a normal insert From 8c2f549c376bd89fd7ab87d7ab8e3cc3de6ed275 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 21 Oct 2025 17:14:08 -0500 Subject: [PATCH 38/46] remove custom field logic --- datafusion/expr/src/expr_schema.rs | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 318c4aba1693..b5dd1dd67a02 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -431,25 +431,11 @@ impl ExprSchemable for Expr { let field = match self { Expr::Alias(Alias { expr, - name, + name: _, metadata, .. }) => { - let field = match &**expr { - Expr::Placeholder(Placeholder { field, .. }) => match &field { - // TODO: This seems to be pulling metadata/types from the schema - // based on the Alias destination. name? Is this correct? - None => schema - .field_from_column(&Column::from_name(name)) - .map(|f| f.clone().with_name(&schema_name)), - Some(field) => Ok(field - .as_ref() - .clone() - .with_name(&schema_name) - .with_nullable(expr.nullable(schema)?)), - }, - _ => expr.to_field(schema).map(|(_, f)| f.as_ref().clone()), - }?; + let field = expr.to_field(schema).map(|(_, f)| f.as_ref().clone())?; let mut combined_metadata = expr.metadata(schema)?; if let Some(metadata) = metadata { From f3fdf4a98aeb25394bbe3fd22d3e18f94b79e183 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 22 Oct 2025 14:52:23 -0500 Subject: [PATCH 39/46] use named constant --- datafusion/sql/src/planner.rs | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index f62965c6e293..b0010aabcf33 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -605,17 +605,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_sql_type)) => { // Arrays may be multi-dimensional. let inner_data_type = self.convert_data_type_to_field(inner_sql_type)?; - - // Lists are allowed to have an arbitrarily named field; - // however, a name other than 'item' will cause it to fail an - // == check against a more idiomatically created list in - // arrow-rs which causes issues. We use the low-level - // constructor here to preserve extension metadata from the - // child type. Ok(Field::new( "", DataType::List( - inner_data_type.as_ref().clone().with_name("item").into(), + inner_data_type + .as_ref() + .clone() + .with_name(Field::LIST_FIELD_DEFAULT_NAME) + .into(), ), true, ) @@ -630,7 +627,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Ok(Field::new( "", DataType::FixedSizeList( - inner_data_type.as_ref().clone().with_name("item").into(), + inner_data_type + .as_ref() + .clone() + .with_name(Field::LIST_FIELD_DEFAULT_NAME) + .into(), *array_size as i32, ), true, @@ -640,7 +641,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Ok(Field::new( "", DataType::List( - inner_data_type.as_ref().clone().with_name("item").into(), + inner_data_type + .as_ref() + .clone() + .with_name(Field::LIST_FIELD_DEFAULT_NAME) + .into(), ), true, ) From aecbfa92068e5eb93ddb35e87a22db28443f422b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 22 Oct 2025 16:25:57 -0400 Subject: [PATCH 40/46] Add DataTypeExt and FieldExt to assist converting to/from Fields and DataTypes --- datafusion/common/src/datatype.rs | 96 +++++++++++++++++++++++++++++++ datafusion/common/src/lib.rs | 1 + datafusion/sql/src/planner.rs | 51 +++++----------- 3 files changed, 110 insertions(+), 38 deletions(-) create mode 100644 datafusion/common/src/datatype.rs diff --git a/datafusion/common/src/datatype.rs b/datafusion/common/src/datatype.rs new file mode 100644 index 000000000000..2ab2ffa3408f --- /dev/null +++ b/datafusion/common/src/datatype.rs @@ -0,0 +1,96 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! [DataTypeExt] extension trait for converting DataTypes to Fields + +use crate::arrow::datatypes::{DataType, Field}; +use std::sync::Arc; + +/// DataFusion extension methods for Arrow [`DataType`] +pub trait DataTypeExt { + /// convert the type to field with nullable type and "" name + fn into_nullable_field(self) -> Field; + + /// convert the type to field ref with nullable type and "" name + fn into_nullable_field_ref(self) -> Arc; + + // +} + +impl DataTypeExt for DataType { + fn into_nullable_field(self) -> Field { + Field::new("", self, true) + } + + fn into_nullable_field_ref(self) -> Arc { + Arc::new(Field::new("", self, true)) + } +} + +/// DataFusion extension methods for Arrow [`Field`] +pub trait FieldExt { + /// Returns a new Field representing a List of this Field's DataType. + fn into_list(self) -> Self; + + /// Return a new Field representing this Field as the item type of a FixedSizeList + fn into_fixed_size_list(self, list_size: i32) -> Self; + + /// Note that lists are allowed to have an arbitrarily named field; + /// however, a name other than 'item' will cause it to fail an + /// == check against a more idiomatically created list in + /// arrow-rs which causes issues. + fn into_list_item(self) -> Self; +} + +impl FieldExt for Field { + fn into_list(self) -> Self { + DataType::List(Arc::new(self.into_list_item())).into_nullable_field() + } + + fn into_fixed_size_list(self, list_size: i32) -> Self { + DataType::FixedSizeList(self.into_list_item().into(), list_size) + .into_nullable_field() + } + fn into_list_item(self) -> Self { + if self.name() != "item" { + self.with_name("item") + } else { + self + } + } +} + +impl FieldExt for Arc { + fn into_list(self) -> Self { + DataType::List(self.into_list_item()) + .into_nullable_field() + .into() + } + + fn into_fixed_size_list(self, list_size: i32) -> Self { + DataType::FixedSizeList(self.into_list_item(), list_size) + .into_nullable_field() + .into() + } + fn into_list_item(self) -> Self { + if self.name() != "item" { + Arc::unwrap_or_clone(self).with_name("item").into() + } else { + self + } + } +} diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs index 7b612df90d83..76c7b46e3273 100644 --- a/datafusion/common/src/lib.rs +++ b/datafusion/common/src/lib.rs @@ -39,6 +39,7 @@ pub mod alias; pub mod cast; pub mod config; pub mod cse; +pub mod datatype; pub mod diagnostic; pub mod display; pub mod encryption; diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index f62965c6e293..ea5d96e35770 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -21,8 +21,10 @@ use std::str::FromStr; use std::sync::Arc; use std::vec; +use crate::utils::make_decimal_type; use arrow::datatypes::*; use datafusion_common::config::SqlParserOptions; +use datafusion_common::datatype::{DataTypeExt, FieldExt}; use datafusion_common::error::add_possible_columns_to_diag; use datafusion_common::TableReference; use datafusion_common::{ @@ -31,15 +33,13 @@ use datafusion_common::{ }; use datafusion_common::{not_impl_err, plan_err, DFSchema, DataFusionError, Result}; use datafusion_expr::logical_plan::{LogicalPlan, LogicalPlanBuilder}; +pub use datafusion_expr::planner::ContextProvider; use datafusion_expr::utils::find_column_exprs; use datafusion_expr::{col, Expr}; use sqlparser::ast::{ArrayElemTypeDef, ExactNumberInfo, TimezoneInfo}; use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption}; use sqlparser::ast::{DataType as SQLDataType, Ident, ObjectName, TableAlias}; -use crate::utils::make_decimal_type; -pub use datafusion_expr::planner::ContextProvider; - /// SQL parser options #[derive(Debug, Clone, Copy)] pub struct ParserOptions { @@ -596,7 +596,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // First check if any of the registered type_planner can handle this type if let Some(type_planner) = self.context_provider.get_type_planner() { if let Some(data_type) = type_planner.plan_type(sql_type)? { - return Ok(Field::new("", data_type, true).into()); + return Ok(data_type.into_nullable_field_ref()); } } @@ -604,47 +604,22 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { match sql_type { SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_sql_type)) => { // Arrays may be multi-dimensional. - let inner_data_type = self.convert_data_type_to_field(inner_sql_type)?; - - // Lists are allowed to have an arbitrarily named field; - // however, a name other than 'item' will cause it to fail an - // == check against a more idiomatically created list in - // arrow-rs which causes issues. We use the low-level - // constructor here to preserve extension metadata from the - // child type. - Ok(Field::new( - "", - DataType::List( - inner_data_type.as_ref().clone().with_name("item").into(), - ), - true, - ) - .into()) + Ok(self.convert_data_type_to_field(inner_sql_type)?.into_list()) } SQLDataType::Array(ArrayElemTypeDef::SquareBracket( inner_sql_type, maybe_array_size, )) => { - let inner_data_type = self.convert_data_type_to_field(inner_sql_type)?; + let inner_field = self.convert_data_type_to_field(inner_sql_type)?; if let Some(array_size) = maybe_array_size { - Ok(Field::new( - "", - DataType::FixedSizeList( - inner_data_type.as_ref().clone().with_name("item").into(), - *array_size as i32, - ), - true, - ) - .into()) + let array_size: i32 = (*array_size).try_into().map_err(|_| { + plan_datafusion_err!( + "Array size must be a positive 32 bit integer, got {array_size}" + ) + })?; + Ok(inner_field.into_fixed_size_list(array_size)) } else { - Ok(Field::new( - "", - DataType::List( - inner_data_type.as_ref().clone().with_name("item").into(), - ), - true, - ) - .into()) + Ok(inner_field.into_list()) } } SQLDataType::Array(ArrayElemTypeDef::None) => { From d41616d214d8896a1aef88f17709c686507ab138 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 22 Oct 2025 16:23:22 -0500 Subject: [PATCH 41/46] rename literalvalue --- datafusion/common/src/metadata.rs | 6 +++--- datafusion/common/src/param_value.rs | 18 +++++++++--------- datafusion/core/src/execution/context/mod.rs | 6 +++--- datafusion/core/tests/sql/select.rs | 6 +++--- datafusion/expr/src/logical_plan/plan.rs | 4 ++-- datafusion/sql/tests/cases/params.rs | 14 +++++++------- 6 files changed, 27 insertions(+), 27 deletions(-) diff --git a/datafusion/common/src/metadata.rs b/datafusion/common/src/metadata.rs index 30e9dbec7d3f..61dc285de603 100644 --- a/datafusion/common/src/metadata.rs +++ b/datafusion/common/src/metadata.rs @@ -24,12 +24,12 @@ use crate::{error::_plan_err, DataFusionError, ScalarValue}; /// A [`ScalarValue`] with optional [`FieldMetadata`] #[derive(Debug, Clone)] -pub struct LiteralValue { +pub struct ScalarAndMetadata { pub value: ScalarValue, pub metadata: Option, } -impl LiteralValue { +impl ScalarAndMetadata { /// Create a new Literal from a scalar value with optional [`FieldMetadata`] pub fn new(value: ScalarValue, metadata: Option) -> Self { Self { value, metadata } @@ -60,7 +60,7 @@ impl LiteralValue { target_type: &DataType, ) -> Result { let new_value = self.value().cast_to(target_type)?; - Ok(LiteralValue::new(new_value, self.metadata.clone())) + Ok(ScalarAndMetadata::new(new_value, self.metadata.clone())) } } diff --git a/datafusion/common/src/param_value.rs b/datafusion/common/src/param_value.rs index 9793582cb113..5abd21564656 100644 --- a/datafusion/common/src/param_value.rs +++ b/datafusion/common/src/param_value.rs @@ -16,7 +16,7 @@ // under the License. use crate::error::{_plan_datafusion_err, _plan_err}; -use crate::metadata::{check_metadata_with_storage_equal, LiteralValue}; +use crate::metadata::{check_metadata_with_storage_equal, ScalarAndMetadata}; use crate::{Result, ScalarValue}; use arrow::datatypes::{DataType, Field, FieldRef}; use std::collections::HashMap; @@ -25,9 +25,9 @@ use std::collections::HashMap; #[derive(Debug, Clone)] pub enum ParamValues { /// For positional query parameters, like `SELECT * FROM test WHERE a > $1 AND b = $2` - List(Vec), + List(Vec), /// For named query parameters, like `SELECT * FROM test WHERE a > $foo AND b = $goo` - Map(HashMap), + Map(HashMap), } impl ParamValues { @@ -81,7 +81,7 @@ impl ParamValues { } } - pub fn get_placeholders_with_values(&self, id: &str) -> Result { + pub fn get_placeholders_with_values(&self, id: &str) -> Result { match self { ParamValues::List(list) => { if id.is_empty() { @@ -118,7 +118,7 @@ impl From> for ParamValues { Self::List( value .into_iter() - .map(|v| LiteralValue::new(v, None)) + .map(|v| ScalarAndMetadata::new(v, None)) .collect(), ) } @@ -129,9 +129,9 @@ where K: Into, { fn from(value: Vec<(K, ScalarValue)>) -> Self { - let value: HashMap = value + let value: HashMap = value .into_iter() - .map(|(k, v)| (k.into(), LiteralValue::new(v, None))) + .map(|(k, v)| (k.into(), ScalarAndMetadata::new(v, None))) .collect(); Self::Map(value) } @@ -142,9 +142,9 @@ where K: Into, { fn from(value: HashMap) -> Self { - let value: HashMap = value + let value: HashMap = value .into_iter() - .map(|(k, v)| (k.into(), LiteralValue::new(v, None))) + .map(|(k, v)| (k.into(), ScalarAndMetadata::new(v, None))) .collect(); Self::Map(value) } diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 27d5735a7346..448ee5264afd 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -64,7 +64,7 @@ use datafusion_catalog::{ DynamicFileCatalog, TableFunction, TableFunctionImpl, UrlTableFactory, }; use datafusion_common::config::ConfigOptions; -use datafusion_common::metadata::LiteralValue; +use datafusion_common::metadata::ScalarAndMetadata; use datafusion_common::{ config::{ConfigExtension, TableOptions}, exec_datafusion_err, exec_err, internal_datafusion_err, not_impl_err, @@ -1266,11 +1266,11 @@ impl SessionContext { })?; // Only allow literals as parameters for now. - let mut params: Vec = parameters + let mut params: Vec = parameters .into_iter() .map(|e| match e { Expr::Literal(scalar, metadata) => { - Ok(LiteralValue::new(scalar, metadata)) + Ok(ScalarAndMetadata::new(scalar, metadata)) } _ => not_impl_err!("Unsupported parameter type: {}", e), }) diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs index e0a23fde6e8b..2eb3ba36dd90 100644 --- a/datafusion/core/tests/sql/select.rs +++ b/datafusion/core/tests/sql/select.rs @@ -19,7 +19,7 @@ use std::collections::HashMap; use super::*; use datafusion::assert_batches_eq; -use datafusion_common::{metadata::LiteralValue, ParamValues, ScalarValue}; +use datafusion_common::{metadata::ScalarAndMetadata, ParamValues, ScalarValue}; use insta::assert_snapshot; #[tokio::test] @@ -332,11 +332,11 @@ async fn test_query_parameters_with_metadata() -> Result<()> { let df_with_params_replaced = df .with_param_values(ParamValues::List(vec![ - LiteralValue::new( + ScalarAndMetadata::new( ScalarValue::UInt32(Some(1)), Some(metadata1.clone().into()), ), - LiteralValue::new( + ScalarAndMetadata::new( ScalarValue::Utf8(Some("two".to_string())), Some(metadata2.clone().into()), ), diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 8d03676e7aae..9541f35e3062 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -4251,7 +4251,7 @@ mod tests { binary_expr, col, exists, in_subquery, lit, placeholder, scalar_subquery, GroupingSet, }; - use datafusion_common::metadata::LiteralValue; + use datafusion_common::metadata::ScalarAndMetadata; use datafusion_common::tree_node::{ TransformedResult, TreeNodeRewriter, TreeNodeVisitor, }; @@ -4813,7 +4813,7 @@ mod tests { // Attempt to bind a parameter with metadata let mut scalar_meta = HashMap::new(); scalar_meta.insert("some_key".to_string(), "some_value".to_string()); - let param_values = ParamValues::List(vec![LiteralValue::new( + let param_values = ParamValues::List(vec![ScalarAndMetadata::new( ScalarValue::Int32(Some(42)), Some(scalar_meta.into()), )]); diff --git a/datafusion/sql/tests/cases/params.rs b/datafusion/sql/tests/cases/params.rs index f4d6cec45f3b..e1075da5f999 100644 --- a/datafusion/sql/tests/cases/params.rs +++ b/datafusion/sql/tests/cases/params.rs @@ -19,7 +19,7 @@ use crate::logical_plan; use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::{ assert_contains, - metadata::{format_type_and_metadata, LiteralValue}, + metadata::{format_type_and_metadata, ScalarAndMetadata}, ParamValues, ScalarValue, }; use datafusion_expr::{LogicalPlan, Prepare, Statement}; @@ -58,7 +58,7 @@ impl ParameterTest<'_> { pub struct ParameterTestWithMetadata<'a> { pub sql: &'a str, pub expected_types: Vec<(&'a str, Option)>, - pub param_values: Vec, + pub param_values: Vec, } impl ParameterTestWithMetadata<'_> { @@ -754,8 +754,8 @@ fn test_update_infer_with_metadata() { ("$2", Some(uuid_field.clone().with_name("id").into())), ]; let param_values = vec![ - LiteralValue::new(ScalarValue::from("Turing"), None), - LiteralValue::new( + ScalarAndMetadata::new(ScalarValue::from("Turing"), None), + ScalarAndMetadata::new( ScalarValue::FixedSizeBinary(16, Some(uuid_bytes)), Some(uuid_field.metadata().into()), ), @@ -827,12 +827,12 @@ fn test_insert_infer_with_metadata() { ), ]; let param_values = vec![ - LiteralValue::new( + ScalarAndMetadata::new( ScalarValue::FixedSizeBinary(16, Some(uuid_bytes)), Some(uuid_field.metadata().into()), ), - LiteralValue::new(ScalarValue::from("Alan"), None), - LiteralValue::new(ScalarValue::from("Turing"), None), + ScalarAndMetadata::new(ScalarValue::from("Alan"), None), + ScalarAndMetadata::new(ScalarValue::from("Turing"), None), ]; // Check a normal insert From d8d07a652a6da8e0029326fedd4e9e78045a0747 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 22 Oct 2025 17:29:59 -0500 Subject: [PATCH 42/46] more comments --- datafusion/common/src/param_value.rs | 2 +- datafusion/expr/src/expr.rs | 7 +++++-- datafusion/expr/src/expr_schema.rs | 4 ++-- datafusion/proto/proto/datafusion.proto | 4 ++++ datafusion/proto/src/logical_plan/from_proto.rs | 7 +++++-- datafusion/proto/tests/cases/roundtrip_logical_plan.rs | 2 -- datafusion/proto/tests/cases/serialize.rs | 2 +- datafusion/sql/src/expr/value.rs | 2 +- datafusion/sql/src/planner.rs | 6 +++--- 9 files changed, 22 insertions(+), 14 deletions(-) diff --git a/datafusion/common/src/param_value.rs b/datafusion/common/src/param_value.rs index 5abd21564656..5e6a12908ae1 100644 --- a/datafusion/common/src/param_value.rs +++ b/datafusion/common/src/param_value.rs @@ -35,7 +35,7 @@ impl ParamValues { /// /// Use [`ParamValues::verify_fields`] to ensure field metadata is considered when /// computing type equality. - #[deprecated] + #[deprecated(since = "51.0.0", note = "Use verifiy_fields instead")] pub fn verify(&self, expect: &[DataType]) -> Result<()> { // make dummy Fields let expect = expect diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index acdd2cffafe1..e1115b714053 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -32,7 +32,6 @@ use crate::{ExprSchemable, Operator, Signature, WindowFrame, WindowUDF}; use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::cse::{HashNode, NormalizeEq, Normalizeable}; -use datafusion_common::metadata::FieldMetadata; use datafusion_common::tree_node::{ Transformed, TransformedResult, TreeNode, TreeNodeContainer, TreeNodeRecursion, }; @@ -46,6 +45,9 @@ use sqlparser::ast::{ RenameSelectItem, ReplaceSelectElement, }; +// Moved in 51.0.0 to datafusion_common +pub use datafusion_common::metadata::FieldMetadata; + // This mirrors sqlparser::ast::NullTreatment but we need our own variant // for when the sql feature is disabled. #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)] @@ -1147,6 +1149,7 @@ pub struct Placeholder { impl Placeholder { /// Create a new Placeholder expression + #[deprecated(since = "51.0.0", note = "Use new_with_field instead")] pub fn new(id: String, data_type: Option) -> Self { Self { id, @@ -1155,7 +1158,7 @@ impl Placeholder { } /// Create a new Placeholder expression from a Field - pub fn new_with_metadata(id: String, field: Option) -> Self { + pub fn new_with_field(id: String, field: Option) -> Self { Self { id, field } } } diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index b5dd1dd67a02..8c557a5630f0 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -924,7 +924,7 @@ mod tests { placeholder_meta.insert("bar".to_string(), "buzz".to_string()); let placeholder_meta = FieldMetadata::from(placeholder_meta); - let expr = Expr::Placeholder(Placeholder::new_with_metadata( + let expr = Expr::Placeholder(Placeholder::new_with_field( "".to_string(), Some( Field::new("", DataType::Utf8, true) @@ -947,7 +947,7 @@ mod tests { assert_eq!(placeholder_meta, expr_alias.metadata(&schema).unwrap()); // Non-nullable placeholder field should remain non-nullable - let expr = Expr::Placeholder(Placeholder::new_with_metadata( + let expr = Expr::Placeholder(Placeholder::new_with_field( "".to_string(), Some(Field::new("", DataType::Utf8, false).into()), )); diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 8dd35188d0f4..f9400d14a59c 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -179,6 +179,8 @@ message CreateExternalTableNode { message PrepareNode { string name = 1; + // We serialize both the data types and the fields for compatibility with + // older versions (newer versions populate both). repeated datafusion_common.ArrowType data_types = 2; LogicalPlanNode input = 3; repeated datafusion_common.Field fields = 4; @@ -413,6 +415,8 @@ message Wildcard { message PlaceholderNode { string id = 1; + // We serialize the data type, metadata, and nullability separately to maintain + // compatibility with older versions datafusion_common.ArrowType data_type = 2; optional bool nullable = 3; map metadata = 4; diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index f57818b571f9..75981325da46 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -635,12 +635,15 @@ pub fn parse_expr( nullable, metadata, }) => match data_type { - None => Ok(Expr::Placeholder(Placeholder::new(id.clone(), None))), + None => Ok(Expr::Placeholder(Placeholder::new_with_field( + id.clone(), + None, + ))), Some(data_type) => { let field = Field::new("", data_type.try_into()?, nullable.unwrap_or(true)) .with_metadata(metadata.clone()); - Ok(Expr::Placeholder(Placeholder::new_with_metadata( + Ok(Expr::Placeholder(Placeholder::new_with_field( id.clone(), Some(field.into()), ))) diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 16d01139d880..4c1a8d57b376 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -1101,8 +1101,6 @@ async fn roundtrip_logical_plan_prepared_statement_with_metadata() -> Result<()> let bytes = logical_plan_to_bytes(&prepared)?; let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?; assert_eq!(format!("{prepared}"), format!("{logical_round_trip}")); - // Also check exact equality in case metadata isn't reflected in the display string - assert_eq!(&prepared, &logical_round_trip); Ok(()) } diff --git a/datafusion/proto/tests/cases/serialize.rs b/datafusion/proto/tests/cases/serialize.rs index 7a0c867072b4..f45a62e94874 100644 --- a/datafusion/proto/tests/cases/serialize.rs +++ b/datafusion/proto/tests/cases/serialize.rs @@ -139,7 +139,7 @@ fn roundtrip_qualified_alias() { #[test] fn roundtrip_placeholder_with_metadata() { - let expr = Expr::Placeholder(Placeholder::new_with_metadata( + let expr = Expr::Placeholder(Placeholder::new_with_field( "placeholder_id".to_string(), Some( Field::new("", DataType::Utf8, false) diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs index d71f3e3f9ea9..f987382ecbdb 100644 --- a/datafusion/sql/src/expr/value.rs +++ b/datafusion/sql/src/expr/value.rs @@ -133,7 +133,7 @@ impl SqlToRel<'_, S> { // Data type of the parameter debug!("type of param {param} param_data_types[idx]: {param_type:?}"); - Ok(Expr::Placeholder(Placeholder::new_with_metadata( + Ok(Expr::Placeholder(Placeholder::new_with_field( param, param_type.cloned(), ))) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index ea5d96e35770..f462ee445a21 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -625,9 +625,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLDataType::Array(ArrayElemTypeDef::None) => { not_impl_err!("Arrays with unspecified type is not supported") } - other => { - Ok(Field::new("", self.convert_simple_data_type(other)?, true).into()) - } + other => Ok(self + .convert_simple_data_type(other)? + .into_nullable_field_ref()), } } From f83ec4cc7a01f39fdbd903a1e3b47cc6cb8b74f2 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 22 Oct 2025 17:30:49 -0500 Subject: [PATCH 43/46] one more deprecation --- datafusion/sql/src/expr/value.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs index f987382ecbdb..3abb2752988f 100644 --- a/datafusion/sql/src/expr/value.rs +++ b/datafusion/sql/src/expr/value.rs @@ -121,7 +121,7 @@ impl SqlToRel<'_, S> { Ok(index) => index - 1, Err(_) => { return if param_data_types.is_empty() { - Ok(Expr::Placeholder(Placeholder::new(param, None))) + Ok(Expr::Placeholder(Placeholder::new_with_field(param, None))) } else { // when PREPARE Statement, param_data_types length is always 0 plan_err!("Invalid placeholder, not a number: {param}") From e9df620e816c8e4eb56791ee513d9d09e9ebf6d3 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 22 Oct 2025 17:37:59 -0500 Subject: [PATCH 44/46] clean up ext traits --- datafusion/common/src/datatype.rs | 33 ++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/datafusion/common/src/datatype.rs b/datafusion/common/src/datatype.rs index 2ab2ffa3408f..4e791b97f354 100644 --- a/datafusion/common/src/datatype.rs +++ b/datafusion/common/src/datatype.rs @@ -17,18 +17,23 @@ //! [DataTypeExt] extension trait for converting DataTypes to Fields -use crate::arrow::datatypes::{DataType, Field}; +use crate::arrow::datatypes::{DataType, Field, FieldRef}; use std::sync::Arc; /// DataFusion extension methods for Arrow [`DataType`] pub trait DataTypeExt { - /// convert the type to field with nullable type and "" name + /// Convert the type to field with nullable type and "" name + /// + /// This is used to track the places where we convert a [`DataType`] + /// into a nameless field to interact with an API that is + /// capable of representing an extension type and/or nullability. fn into_nullable_field(self) -> Field; - /// convert the type to field ref with nullable type and "" name - fn into_nullable_field_ref(self) -> Arc; - - // + /// Convert the type to field ref with nullable type and "" name + /// + /// Concise wrapper around [`DataTypeExt::into_nullable_field`] that + /// constructs a [`FieldRef`] + fn into_nullable_field_ref(self) -> FieldRef; } impl DataTypeExt for DataType { @@ -36,7 +41,7 @@ impl DataTypeExt for DataType { Field::new("", self, true) } - fn into_nullable_field_ref(self) -> Arc { + fn into_nullable_field_ref(self) -> FieldRef { Arc::new(Field::new("", self, true)) } } @@ -49,6 +54,8 @@ pub trait FieldExt { /// Return a new Field representing this Field as the item type of a FixedSizeList fn into_fixed_size_list(self, list_size: i32) -> Self; + /// Create a field with the default list field name ("item") + /// /// Note that lists are allowed to have an arbitrarily named field; /// however, a name other than 'item' will cause it to fail an /// == check against a more idiomatically created list in @@ -65,9 +72,10 @@ impl FieldExt for Field { DataType::FixedSizeList(self.into_list_item().into(), list_size) .into_nullable_field() } + fn into_list_item(self) -> Self { - if self.name() != "item" { - self.with_name("item") + if self.name() != Field::LIST_FIELD_DEFAULT_NAME { + self.with_name(Field::LIST_FIELD_DEFAULT_NAME) } else { self } @@ -86,9 +94,12 @@ impl FieldExt for Arc { .into_nullable_field() .into() } + fn into_list_item(self) -> Self { - if self.name() != "item" { - Arc::unwrap_or_clone(self).with_name("item").into() + if self.name() != Field::LIST_FIELD_DEFAULT_NAME { + Arc::unwrap_or_clone(self) + .with_name(Field::LIST_FIELD_DEFAULT_NAME) + .into() } else { self } From 640a6f2351c009c7263a46c7e2e5548f4d359c87 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 23 Oct 2025 09:05:40 -0500 Subject: [PATCH 45/46] typos --- datafusion/common/src/datatype.rs | 2 +- datafusion/common/src/metadata.rs | 2 +- datafusion/common/src/param_value.rs | 2 +- datafusion/proto/src/generated/prost.rs | 4 ++++ 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/datafusion/common/src/datatype.rs b/datafusion/common/src/datatype.rs index 4e791b97f354..85ffcf689c3f 100644 --- a/datafusion/common/src/datatype.rs +++ b/datafusion/common/src/datatype.rs @@ -32,7 +32,7 @@ pub trait DataTypeExt { /// Convert the type to field ref with nullable type and "" name /// /// Concise wrapper around [`DataTypeExt::into_nullable_field`] that - /// constructs a [`FieldRef`] + /// constructs a [`FieldRef`]. fn into_nullable_field_ref(self) -> FieldRef; } diff --git a/datafusion/common/src/metadata.rs b/datafusion/common/src/metadata.rs index 61dc285de603..ec0b3bc81467 100644 --- a/datafusion/common/src/metadata.rs +++ b/datafusion/common/src/metadata.rs @@ -50,7 +50,7 @@ impl ScalarAndMetadata { (self.value, self.metadata) } - /// Cast this literal's storage type + /// Cast this values's storage type /// /// This operation assumes that if the underlying [ScalarValue] can be casted /// to a given type that any extension type represented by the metadata is also diff --git a/datafusion/common/src/param_value.rs b/datafusion/common/src/param_value.rs index 5e6a12908ae1..5ab58239e66c 100644 --- a/datafusion/common/src/param_value.rs +++ b/datafusion/common/src/param_value.rs @@ -35,7 +35,7 @@ impl ParamValues { /// /// Use [`ParamValues::verify_fields`] to ensure field metadata is considered when /// computing type equality. - #[deprecated(since = "51.0.0", note = "Use verifiy_fields instead")] + #[deprecated(since = "51.0.0", note = "Use verify_fields instead")] pub fn verify(&self, expect: &[DataType]) -> Result<()> { // make dummy Fields let expect = expect diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 6bdbd46476fb..12b417627411 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -278,6 +278,8 @@ pub struct CreateExternalTableNode { pub struct PrepareNode { #[prost(string, tag = "1")] pub name: ::prost::alloc::string::String, + /// We serialize both the data types and the fields for compatibility with + /// older versions (newer versions populate both). #[prost(message, repeated, tag = "2")] pub data_types: ::prost::alloc::vec::Vec, #[prost(message, optional, boxed, tag = "3")] @@ -653,6 +655,8 @@ pub struct Wildcard { pub struct PlaceholderNode { #[prost(string, tag = "1")] pub id: ::prost::alloc::string::String, + /// We serialize the data type, metadata, and nullability separately to maintain + /// compatibility with older versions #[prost(message, optional, tag = "2")] pub data_type: ::core::option::Option, #[prost(bool, optional, tag = "3")] From 4fdaebfc2b73e5013d4c3cb5559836ab03e12de6 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 23 Oct 2025 09:24:53 -0500 Subject: [PATCH 46/46] fix confusingly named variable --- datafusion/sql/src/planner.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index f462ee445a21..99138e1b0016 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -740,13 +740,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let fields = fields .iter() .enumerate() - .map(|(idx, field)| { - let data_type = self.convert_data_type_to_field(&field.field_type)?; - let field_name = match &field.field_name { + .map(|(idx, sql_struct_field)| { + let field = self.convert_data_type_to_field(&sql_struct_field.field_type)?; + let field_name = match &sql_struct_field.field_name { Some(ident) => ident.clone(), None => Ident::new(format!("c{idx}")), }; - Ok(data_type.as_ref().clone().with_name(self.ident_normalizer.normalize(field_name))) + Ok(field.as_ref().clone().with_name(self.ident_normalizer.normalize(field_name))) }) .collect::>>()?; Ok(DataType::Struct(Fields::from(fields)))