From 9b426188e1f1225ffd02796e1d7bd13909ee75df Mon Sep 17 00:00:00 2001 From: Tishj Date: Fri, 19 Sep 2025 11:22:17 +0200 Subject: [PATCH] construct a list out of the values, to pass to 'pyarrow.compute.Expression.isin' --- .../arrow/pyarrow_filter_pushdown.cpp | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/duckdb_py/arrow/pyarrow_filter_pushdown.cpp b/src/duckdb_py/arrow/pyarrow_filter_pushdown.cpp index 66a6e3fa..af05789a 100644 --- a/src/duckdb_py/arrow/pyarrow_filter_pushdown.cpp +++ b/src/duckdb_py/arrow/pyarrow_filter_pushdown.cpp @@ -160,6 +160,15 @@ py::object GetScalar(Value &constant, const string &timezone_config, const Arrow } } +static py::list TransformInList(const InFilter &in) { + py::list res; + ClientProperties default_properties; + for (auto &val : in.values) { + res.append(PythonObject::FromValue(val, val.type(), default_properties)); + } + return res; +} + py::object TransformFilterRecursive(TableFilter &filter, vector column_ref, const string &timezone_config, const ArrowType &type) { auto &import_cache = *DuckDBPyConnection::ImportCache(); @@ -282,17 +291,9 @@ py::object TransformFilterRecursive(TableFilter &filter, vector column_r } case TableFilterType::IN_FILTER: { auto &in_filter = filter.Cast(); - ConjunctionOrFilter or_filter; - value_set_t unique_values; - for (const auto &value : in_filter.values) { - if (unique_values.find(value) == unique_values.end()) { - unique_values.insert(value); - } - } - for (const auto &value : unique_values) { - or_filter.child_filters.push_back(make_uniq(ExpressionType::COMPARE_EQUAL, value)); - } - return TransformFilterRecursive(or_filter, column_ref, timezone_config, type); + auto constant_field = field(py::tuple(py::cast(column_ref))); + auto in_list = TransformInList(in_filter); + return constant_field.attr("isin")(std::move(in_list)); } case TableFilterType::DYNAMIC_FILTER: { //! Ignore dynamic filters for now, not necessary for correctness