From ecd0c87f136db93d847439dbde61c164a96cdfda Mon Sep 17 00:00:00 2001 From: Henry Harbeck Date: Tue, 28 Oct 2025 22:12:22 +1000 Subject: [PATCH 01/39] explicitly import importlib.util; remove erroneous print --- adbc_driver_duckdb/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/adbc_driver_duckdb/__init__.py b/adbc_driver_duckdb/__init__.py index e81f5090..f925ea9e 100644 --- a/adbc_driver_duckdb/__init__.py +++ b/adbc_driver_duckdb/__init__.py @@ -19,7 +19,7 @@ import enum import functools -import importlib +import importlib.util import typing import adbc_driver_manager @@ -46,5 +46,4 @@ def driver_path() -> str: if duckdb_module_spec is None: msg = "Could not find duckdb shared library. Did you pip install duckdb?" raise ImportError(msg) - print(f"Found duckdb shared library at {duckdb_module_spec.origin}") return duckdb_module_spec.origin From e825f8266b582eb00bfabe1068ffbfb92961f213 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Fri, 31 Oct 2025 09:04:12 +0100 Subject: [PATCH 02/39] bumped submodule --- external/duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/duckdb b/external/duckdb index abd077cd..d9028d09 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit abd077cd1ee41fcd9417f7f1e61fe9228da02416 +Subproject commit d9028d09d56640599dd8307dd9ae6c8837267e9f From ec3264b789a696d0b4de86f1a8570a12f4abae89 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Thu, 30 Oct 2025 11:12:35 +0100 Subject: [PATCH 03/39] add targeted test workflow --- .github/workflows/targeted_test.yml | 86 +++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 .github/workflows/targeted_test.yml diff --git a/.github/workflows/targeted_test.yml b/.github/workflows/targeted_test.yml new file mode 100644 index 00000000..812bb9c5 --- /dev/null +++ b/.github/workflows/targeted_test.yml @@ -0,0 +1,86 @@ +name: Targeted Platform Testing + +on: + workflow_dispatch: + inputs: + platform: + description: 'Platform to test on' + required: true + type: choice + options: + - 'windows-2025' + - 'ubuntu-24.04' + - 'ubuntu-24.04-arm' + - 'macos-15' + - 'macos-15-intel' + python_version: + description: 'Python version to test' + required: true + type: choice + options: + - '3.9' + - '3.10' + - '3.11' + - '3.12' + - '3.13' + - '3.14' + testsuite: + description: 'Test suite to run (ignored if custom_test_path is provided)' + required: false + type: choice + options: + - 'fast' + - 'all' + default: 'fast' + custom_test_path: + description: 'Custom test path (must be in tests/ directory, overrides testsuite)' + required: false + type: string + +jobs: + test: + name: 'Test with Python ${{ inputs.python_version }} on ${{ inputs.platform }}' + runs-on: ${{ inputs.platform }} + + steps: + - name: Checkout DuckDB Python + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: true + + - name: Install uv + uses: astral-sh/setup-uv@v7 + with: + version: "0.9.0" + enable-cache: false + python-version: ${{ inputs.python_version }} + + - name: Set and validate test path + id: test_path + shell: bash + run: | + if [[ -n "${{ inputs.custom_test_path }}" ]]; then + # test path was passed in + tests_base="$( pwd -P )/tests" + test_path="${{ inputs.custom_test_path }}" + + # Ensure the given test path exists + [[ -e "$test_path" ]] || { echo "${test_path} does not exist"; exit 1; } + + # Resolve custom test path to absolute path + test_path_abs=$(cd "$test_path" 2>/dev/null && pwd -P || ( cd "$(dirname "$test_path")" && printf '%s/%s' "$(pwd -P)" "$(basename "$test_path")" ) ) + + # Make sure test_path_abs is inside tests_base + [[ "$test_path_abs" == "$tests_base" || "$test_path_abs" == "$tests_base"/* ]] || { echo "${test_path_abs} is not part of ${tests_base}?"; exit 1; } + + echo "test_path=$test_path_abs" >> $GITHUB_OUTPUT + else + # use a testsuite + echo "test_path=$GITHUB_WORKSPACE/${{ inputs.testsuite == 'fast' && 'tests/fast' || 'tests' }}" >> $GITHUB_OUTPUT + fi + + - name: Run tests + shell: bash + run: | + uv run pytest -vv ${{ steps.test_path.outputs.test_path }} From fdef1fc28213c4a7d319dfc412e64df3ea50d0ae Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Fri, 31 Oct 2025 09:16:00 +0100 Subject: [PATCH 04/39] Remove xfail annotations on adbc tests --- tests/fast/adbc/test_adbc.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/fast/adbc/test_adbc.py b/tests/fast/adbc/test_adbc.py index 80920a99..c20d2a0e 100644 --- a/tests/fast/adbc/test_adbc.py +++ b/tests/fast/adbc/test_adbc.py @@ -1,5 +1,4 @@ import datetime -import sys from pathlib import Path import adbc_driver_manager.dbapi @@ -29,7 +28,6 @@ def example_table(): ) -@xfail(sys.platform == "win32", reason="adbc-driver-manager.adbc_get_info() returns an empty dict on windows") def test_connection_get_info(duck_conn): assert duck_conn.adbc_get_info() != {} @@ -42,9 +40,6 @@ def test_connection_get_table_types(duck_conn): assert duck_conn.adbc_get_table_types() == ["BASE TABLE"] -@xfail( - sys.platform == "win32", reason="adbc-driver-manager.adbc_get_objects() returns an invalid schema dict on windows" -) def test_connection_get_objects(duck_conn): with duck_conn.cursor() as cursor: cursor.execute("CREATE TABLE getobjects (ints BIGINT PRIMARY KEY)") @@ -66,9 +61,6 @@ def test_connection_get_objects(duck_conn): assert depth_all.schema == depth_catalogs.schema -@xfail( - sys.platform == "win32", reason="adbc-driver-manager.adbc_get_objects() returns an invalid schema dict on windows" -) def test_connection_get_objects_filters(duck_conn): with duck_conn.cursor() as cursor: cursor.execute("CREATE TABLE getobjects (ints BIGINT PRIMARY KEY)") @@ -207,7 +199,6 @@ def test_statement_query(duck_conn): assert cursor.fetch_arrow_table().to_pylist() == [{"foo": 1}] -@xfail(sys.platform == "win32", reason="adbc-driver-manager returns an invalid table schema on windows") def test_insertion(duck_conn): table = example_table() reader = table.to_reader() @@ -234,7 +225,6 @@ def test_insertion(duck_conn): assert cursor.fetch_arrow_table().to_pydict() == {"count_star()": [8]} -@xfail(sys.platform == "win32", reason="adbc-driver-manager returns an invalid table schema on windows") def test_read(duck_conn): with duck_conn.cursor() as cursor: filename = Path(__file__).parent / ".." / "data" / "category.csv" From 0304a873a0eb37c779c7decdd7b17b8758019d11 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Fri, 31 Oct 2025 14:15:56 +0100 Subject: [PATCH 05/39] fix config dict value typehint --- _duckdb-stubs/__init__.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index 6c36d7be..8040f3c7 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -1048,7 +1048,7 @@ def commit(*, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnectio def connect( database: str | pathlib.Path = ":memory:", read_only: bool = False, - config: dict[str, str] | None = None, + config: dict[str, str | bool | int | float | list[str]] | None = None, ) -> DuckDBPyConnection: ... def create_function( name: str, From e991b2a70aaaa1d057cc33af3d8eb5d6fcbfecb4 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Fri, 31 Oct 2025 13:24:36 +0100 Subject: [PATCH 06/39] Add df data and tz type columns back into the same loc after type conversion --- .../include/duckdb_python/pyresult.hpp | 2 +- src/duckdb_py/pyresult.cpp | 26 +++++++++---------- tests/fast/pandas/test_column_order.py | 16 ++++++++++++ 3 files changed, 29 insertions(+), 15 deletions(-) create mode 100644 tests/fast/pandas/test_column_order.py diff --git a/src/duckdb_py/include/duckdb_python/pyresult.hpp b/src/duckdb_py/include/duckdb_python/pyresult.hpp index fc3641c4..941a203b 100644 --- a/src/duckdb_py/include/duckdb_python/pyresult.hpp +++ b/src/duckdb_py/include/duckdb_python/pyresult.hpp @@ -66,7 +66,7 @@ struct DuckDBPyResult { PandasDataFrame FrameFromNumpy(bool date_as_object, const py::handle &o); - void ChangeToTZType(PandasDataFrame &df); + void ConvertDateTimeTypes(PandasDataFrame &df, bool date_as_object) const; unique_ptr FetchNext(QueryResult &result); unique_ptr FetchNextRaw(QueryResult &result); unique_ptr InitializeNumpyConversion(bool pandas = false); diff --git a/src/duckdb_py/pyresult.cpp b/src/duckdb_py/pyresult.cpp index 43edf0e1..e92f6abe 100644 --- a/src/duckdb_py/pyresult.cpp +++ b/src/duckdb_py/pyresult.cpp @@ -287,8 +287,13 @@ py::dict DuckDBPyResult::FetchNumpyInternal(bool stream, idx_t vectors_per_chunk return res; } +static void ReplaceDFColumn(PandasDataFrame &df, const char *col_name, idx_t idx, const py::handle &new_value) { + df.attr("drop")("columns"_a = col_name, "inplace"_a = true); + df.attr("insert")(idx, col_name, new_value, "allow_duplicates"_a = false); +} + // TODO: unify these with an enum/flag to indicate which conversions to do -void DuckDBPyResult::ChangeToTZType(PandasDataFrame &df) { +void DuckDBPyResult::ConvertDateTimeTypes(PandasDataFrame &df, bool date_as_object) const { auto names = df.attr("columns").cast>(); for (idx_t i = 0; i < result->ColumnCount(); i++) { @@ -297,8 +302,10 @@ void DuckDBPyResult::ChangeToTZType(PandasDataFrame &df) { auto utc_local = df[names[i].c_str()].attr("dt").attr("tz_localize")("UTC"); auto new_value = utc_local.attr("dt").attr("tz_convert")(result->client_properties.time_zone); // We need to create the column anew because the exact dt changed to a new timezone - df.attr("drop")("columns"_a = names[i].c_str(), "inplace"_a = true); - df.attr("__setitem__")(names[i].c_str(), new_value); + ReplaceDFColumn(df, names[i].c_str(), i, new_value); + } else if (date_as_object && result->types[i] == LogicalType::DATE) { + auto new_value = df[names[i].c_str()].attr("dt").attr("date"); + ReplaceDFColumn(df, names[i].c_str(), i, new_value); } } } @@ -374,20 +381,11 @@ PandasDataFrame DuckDBPyResult::FrameFromNumpy(bool date_as_object, const py::ha } PandasDataFrame df = py::cast(pandas.attr("DataFrame").attr("from_dict")(o)); - // Unfortunately we have to do a type change here for timezones since these types are not supported by numpy - ChangeToTZType(df); + // Convert TZ and (optionally) Date types + ConvertDateTimeTypes(df, date_as_object); auto names = df.attr("columns").cast>(); D_ASSERT(result->ColumnCount() == names.size()); - if (date_as_object) { - for (idx_t i = 0; i < result->ColumnCount(); i++) { - if (result->types[i] == LogicalType::DATE) { - auto new_value = df[names[i].c_str()].attr("dt").attr("date"); - df.attr("drop")("columns"_a = names[i].c_str(), "inplace"_a = true); - df.attr("__setitem__")(names[i].c_str(), new_value); - } - } - } return df; } diff --git a/tests/fast/pandas/test_column_order.py b/tests/fast/pandas/test_column_order.py new file mode 100644 index 00000000..0600bc4c --- /dev/null +++ b/tests/fast/pandas/test_column_order.py @@ -0,0 +1,16 @@ +import duckdb + + +class TestColumnOrder: + def test_column_order(self, duckdb_cursor): + to_execute = """ + CREATE OR REPLACE TABLE t1 AS ( + SELECT NULL AS col1, + NULL::TIMESTAMPTZ AS timepoint, + NULL::DATE AS date, + ); + SELECT timepoint, date, col1 FROM t1; + """ + df = duckdb.execute(to_execute).fetchdf() + cols = list(df.columns) + assert cols == ["timepoint", "date", "col1"] From 83aa04ac10566d9a2f3247564d14a76dcd2c0984 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Fri, 31 Oct 2025 14:59:49 +0100 Subject: [PATCH 07/39] Enable pyarrow with python 3.14 --- pyproject.toml | 8 ++++---- tests/conftest.py | 42 ------------------------------------------ 2 files changed, 4 insertions(+), 46 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7df13b61..f5983840 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ all = [ # users can install duckdb with 'duckdb[all]', which will install this l "fsspec", # used in duckdb.filesystem "numpy", # used in duckdb.experimental.spark and in duckdb.fetchnumpy() "pandas", # used for pandas dataframes all over the place - "pyarrow; python_version < '3.14'", # used for pyarrow support + "pyarrow", # used for pyarrow support "adbc-driver-manager", # for the adbc driver ] @@ -226,7 +226,7 @@ stubdeps = [ # dependencies used for typehints in the stubs "fsspec", "pandas", "polars", - "pyarrow; python_version < '3.14'", + "pyarrow", ] test = [ # dependencies used for running tests "adbc-driver-manager", @@ -248,7 +248,7 @@ test = [ # dependencies used for running tests "urllib3", "fsspec>=2022.11.0", "pandas>=2.0.0", - "pyarrow>=18.0.0; python_version < '3.14'", + "pyarrow>=18.0.0", "torch>=2.2.2; python_version < '3.14' and ( sys_platform != 'darwin' or platform_machine != 'x86_64' or python_version < '3.13' )", "tensorflow==2.14.0; sys_platform == 'darwin' and python_version < '3.12'", "tensorflow-cpu>=2.14.0; sys_platform == 'linux' and platform_machine != 'aarch64' and python_version < '3.12'", @@ -265,7 +265,7 @@ scripts = [ # dependencies used for running scripts "pandas", "pcpp", "polars", - "pyarrow; python_version < '3.14'", + "pyarrow", "pytz" ] pypi = [ # dependencies used by the pypi cleanup script diff --git a/tests/conftest.py b/tests/conftest.py index f2ad7cec..bfb458a5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,4 @@ import os -import sys import warnings from importlib import import_module from pathlib import Path @@ -36,47 +35,6 @@ def import_pandas(): pytest.skip("Couldn't import pandas") -@pytest.hookimpl(hookwrapper=True) -def pytest_runtest_call(item): - """Convert missing pyarrow imports to skips. - - TODO(evertlammerts): Remove skip when pyarrow releases for 3.14. - https://github.com/duckdblabs/duckdb-internal/issues/6182 - """ - outcome = yield - if sys.version_info[:2] == (3, 14): - try: - outcome.get_result() - except ImportError as e: - if e.name == "pyarrow": - pytest.skip(f"pyarrow not available - {item.name} requires pyarrow") - else: - raise - - -@pytest.hookimpl(hookwrapper=True) -def pytest_make_collect_report(collector): - """Wrap module collection to catch pyarrow import errors on Python 3.14. - - If we're on Python 3.14 and a test module raises ModuleNotFoundError - for 'pyarrow', mark the entire module as xfailed rather than failing collection. - - TODO(evertlammerts): Remove skip when pyarrow releases for 3.14. - https://github.com/duckdblabs/duckdb-internal/issues/6182 - """ - outcome = yield - report: pytest.CollectReport = outcome.get_result() - - if sys.version_info[:2] == (3, 14): - # Only handle failures from module collectors - if report.failed and collector.__class__.__name__ == "Module": - longreprtext = report.longreprtext - if "ModuleNotFoundError: No module named 'pyarrow'" in longreprtext: - report.outcome = "skipped" - reason = f"XFAIL: [pyarrow not available] {longreprtext}" - report.longrepr = (report.fspath, None, reason) - - # https://docs.pytest.org/en/latest/example/simple.html#control-skipping-of-tests-according-to-command-line-option # https://stackoverflow.com/a/47700320 def pytest_addoption(parser): From 48382326da6b7ded83490397aa29181b25fdd464 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Sat, 1 Nov 2025 16:44:23 +0100 Subject: [PATCH 08/39] use macos-15-intel now that macos-13 is closing down --- .github/workflows/packaging_wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/packaging_wheels.yml b/.github/workflows/packaging_wheels.yml index a2e4f857..8e656abd 100644 --- a/.github/workflows/packaging_wheels.yml +++ b/.github/workflows/packaging_wheels.yml @@ -37,7 +37,7 @@ jobs: - { os: ubuntu-24.04-arm, arch: aarch64, cibw_system: manylinux } - { os: macos-15, arch: arm64, cibw_system: macosx } - { os: macos-15, arch: universal2, cibw_system: macosx } - - { os: macos-13, arch: x86_64, cibw_system: macosx } + - { os: macos-15-intel, arch: x86_64, cibw_system: macosx } minimal: - ${{ inputs.minimal }} exclude: From 07c1414259cdf54b567e3e83c84b141e3f015f20 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Sat, 1 Nov 2025 16:55:55 +0100 Subject: [PATCH 09/39] release s3 upload fix --- .github/workflows/release.yml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f54b0f76..f3550fb0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -132,14 +132,12 @@ jobs: path: artifacts/ merge-multiple: true - - name: Authenticate with AWS - uses: aws-actions/configure-aws-credentials@v4 - with: - aws-region: 'us-east-2' - aws-access-key-id: ${{ secrets.S3_DUCKDB_STAGING_ID }} - aws-secret-access-key: ${{ secrets.S3_DUCKDB_STAGING_KEY }} - - name: Upload Artifacts + env: + AWS_ENDPOINT_URL: ${{ secrets.S3_DUCKDB_STAGING_ENDPOINT }} + AWS_ACCESS_KEY_ID: ${{ secrets.S3_DUCKDB_STAGING_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_DUCKDB_STAGING_KEY }} + run: | aws s3 cp artifacts ${{ needs.workflow_state.outputs.s3_url }} --recursive From c04f9b8ac1b5486efae451dc4f7c4a779430e159 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Tue, 4 Nov 2025 14:08:17 +0100 Subject: [PATCH 10/39] spark imports --- duckdb/__init__.py | 4 ++++ duckdb/experimental/__init__.py | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/duckdb/__init__.py b/duckdb/__init__.py index e1a4aa9a..0fb3311c 100644 --- a/duckdb/__init__.py +++ b/duckdb/__init__.py @@ -201,6 +201,9 @@ Value, ) +# explicitly make the experimental module available +from . import experimental + __all__: list[str] = [ "BinaryValue", "BinderException", @@ -316,6 +319,7 @@ "enum_type", "execute", "executemany", + "experimental", "extract_statements", "fetch_arrow_table", "fetch_df", diff --git a/duckdb/experimental/__init__.py b/duckdb/experimental/__init__.py index 1b5ee51b..51d08709 100644 --- a/duckdb/experimental/__init__.py +++ b/duckdb/experimental/__init__.py @@ -1,3 +1,5 @@ from . import spark # noqa: D104 -__all__ = spark.__all__ +__all__ = [ + "spark", +] From 02ceac147be0dfb06ba8d25de7619c528b4d39ad Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Wed, 5 Nov 2025 10:56:23 +0100 Subject: [PATCH 11/39] Bumped submodule --- external/duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/duckdb b/external/duckdb index d9028d09..7043621a 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit d9028d09d56640599dd8307dd9ae6c8837267e9f +Subproject commit 7043621a83d1be17ba6b278f0f7a3ec65df98d93 From c14473ec1c5f58f34e2e08215dba2b6847e23522 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Wed, 5 Nov 2025 13:07:32 +0100 Subject: [PATCH 12/39] Fix failing test due to changed error msg --- tests/fast/test_relation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/fast/test_relation.py b/tests/fast/test_relation.py index 7b60a105..4d6f6591 100644 --- a/tests/fast/test_relation.py +++ b/tests/fast/test_relation.py @@ -280,7 +280,9 @@ def test_value_relation(self, duckdb_cursor): rel = duckdb_cursor.values((const(1), const(2), const(3)), const(4)) # Using Expressions that can't be resolved: - with pytest.raises(duckdb.BinderException, match='Referenced column "a" not found in FROM clause!'): + with pytest.raises( + duckdb.BinderException, match='Referenced column "a" was not found because the FROM clause is missing' + ): duckdb_cursor.values(duckdb.ColumnExpression("a")) def test_insert_into_operator(self): From e35a2ceedf5f85671f393a01d6621ea1e27b4516 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Wed, 5 Nov 2025 13:21:58 +0100 Subject: [PATCH 13/39] mypy shouldn't check experimental.spark --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index f5983840..0c08c412 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -327,6 +327,10 @@ exclude = [ "tests", "scripts", ] +[[tool.mypy.overrides]] +module = "duckdb.experimental.*" +ignore_errors = true + [[tool.mypy.overrides]] module = [ "fsspec.*", From 17a0cad1a2a9d052c5829ed3811cd485c182474d Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Wed, 5 Nov 2025 16:11:17 +0100 Subject: [PATCH 14/39] remove experimental import because of the transitive dependencies that requires --- duckdb/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/duckdb/__init__.py b/duckdb/__init__.py index 0fb3311c..e1a4aa9a 100644 --- a/duckdb/__init__.py +++ b/duckdb/__init__.py @@ -201,9 +201,6 @@ Value, ) -# explicitly make the experimental module available -from . import experimental - __all__: list[str] = [ "BinaryValue", "BinderException", @@ -319,7 +316,6 @@ "enum_type", "execute", "executemany", - "experimental", "extract_statements", "fetch_arrow_table", "fetch_df", From f5618a3c3b09a4cb1983617a271628c37079800d Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Fri, 7 Nov 2025 09:05:36 +0100 Subject: [PATCH 15/39] Bumped submodule --- external/duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/duckdb b/external/duckdb index 7043621a..783f08ff 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit 7043621a83d1be17ba6b278f0f7a3ec65df98d93 +Subproject commit 783f08ffd89b1d1290b2d3dec0b3ba12d8c233bf From f3b8c8adf29dfd9c69df182eabb98fc7316221c0 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Fri, 7 Nov 2025 09:30:17 +0100 Subject: [PATCH 16/39] Bumped submodule --- external/duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/duckdb b/external/duckdb index 783f08ff..7ce99bc0 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit 783f08ffd89b1d1290b2d3dec0b3ba12d8c233bf +Subproject commit 7ce99bc04130615dfc3a39dfb79177a8942fefba From 95a9968bc10720401f41e982d2cf99b64399fe5d Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Sat, 1 Nov 2025 11:25:23 +0100 Subject: [PATCH 17/39] Fix InsertRelation on attached database --- src/duckdb_py/pyrelation.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/duckdb_py/pyrelation.cpp b/src/duckdb_py/pyrelation.cpp index 3553bff0..5d77dc9f 100644 --- a/src/duckdb_py/pyrelation.cpp +++ b/src/duckdb_py/pyrelation.cpp @@ -23,6 +23,8 @@ #include "duckdb/common/arrow/physical_arrow_collector.hpp" #include "duckdb_python/arrow/arrow_export_utils.hpp" +#include + namespace duckdb { DuckDBPyRelation::DuckDBPyRelation(shared_ptr rel_p) : rel(std::move(rel_p)) { @@ -1511,7 +1513,7 @@ DuckDBPyRelation &DuckDBPyRelation::Execute() { void DuckDBPyRelation::InsertInto(const string &table) { AssertRelation(); auto parsed_info = QualifiedName::Parse(table); - auto insert = rel->InsertRel(parsed_info.schema, parsed_info.name); + auto insert = rel->InsertRel(parsed_info.catalog, parsed_info.schema, parsed_info.name); PyExecuteRelation(insert); } @@ -1565,14 +1567,24 @@ void DuckDBPyRelation::Update(const py::object &set_p, const py::object &where) void DuckDBPyRelation::Insert(const py::object ¶ms) { AssertRelation(); - if (!IsAcceptedInsertRelationType(*this->rel)) { + if (this->rel->type != RelationType::TABLE_RELATION) { throw InvalidInputException("'DuckDBPyRelation.insert' can only be used on a table relation"); } vector> values {DuckDBPyConnection::TransformPythonParamList(params)}; D_ASSERT(py::gil_check()); py::gil_scoped_release release; - rel->Insert(values); + // Grab table info + auto table_relation = static_cast(this->rel.get()); + auto catalog = table_relation->description->database; + auto schema = table_relation->description->schema; + auto table = table_relation->description->table; + // Create a value relation + vector column_names; + auto value_rel = + make_shared_ptr(this->rel->context->GetContext(), values, std::move(column_names), "values"); + // Now insert + value_rel->Insert(catalog, schema, table); } void DuckDBPyRelation::Create(const string &table) { From 10e0ef38a24b22741c97b18afc18e0b2579c2ec9 Mon Sep 17 00:00:00 2001 From: Julian Meyers Date: Sat, 8 Nov 2025 18:58:52 -0600 Subject: [PATCH 18/39] Add explicit polars overloads --- _duckdb-stubs/__init__.pyi | 41 +++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index 8040f3c7..a4965632 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -318,7 +318,18 @@ class DuckDBPyConnection: def list_type(self, type: sqltypes.DuckDBPyType) -> sqltypes.DuckDBPyType: ... def load_extension(self, extension: str) -> None: ... def map_type(self, key: sqltypes.DuckDBPyType, value: sqltypes.DuckDBPyType) -> sqltypes.DuckDBPyType: ... - def pl(self, rows_per_batch: pytyping.SupportsInt = 1000000, *, lazy: bool = False) -> polars.DataFrame: ... + @pytyping.overload + def pl( + self, rows_per_batch: pytyping.SupportsInt = 1000000, *, lazy: pytyping.Literal[False] = ... + ) -> polars.DataFrame: ... + @pytyping.overload + def pl( + self, rows_per_batch: pytyping.SupportsInt = 1000000, *, lazy: pytyping.Literal[True] + ) -> polars.LazyFrame: ... + @pytyping.overload + def pl( + self, rows_per_batch: pytyping.SupportsInt = 1000000, *, lazy: bool = False + ) -> pytyping.Union[polars.DataFrame, polars.LazyFrame]: ... def query(self, query: str, *, alias: str = "", params: object = None) -> DuckDBPyRelation: ... def query_progress(self) -> float: ... def read_csv( @@ -596,7 +607,16 @@ class DuckDBPyRelation: ) -> DuckDBPyRelation: ... def order(self, order_expr: str) -> DuckDBPyRelation: ... def percent_rank(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... - def pl(self, batch_size: pytyping.SupportsInt = 1000000, *, lazy: bool = False) -> polars.DataFrame: ... + @pytyping.overload + def pl( + self, batch_size: pytyping.SupportsInt = 1000000, *, lazy: pytyping.Literal[False] = ... + ) -> polars.DataFrame: ... + @pytyping.overload + def pl(self, batch_size: pytyping.SupportsInt = 1000000, *, lazy: pytyping.Literal[True]) -> polars.LazyFrame: ... + @pytyping.overload + def pl( + self, batch_size: pytyping.SupportsInt = 1000000, *, lazy: bool = False + ) -> pytyping.Union[polars.DataFrame, polars.LazyFrame]: ... def product( self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" ) -> DuckDBPyRelation: ... @@ -1241,12 +1261,27 @@ def map_type( def order( df: pandas.DataFrame, order_expr: str, *, connection: DuckDBPyConnection | None = None ) -> DuckDBPyRelation: ... +@pytyping.overload def pl( rows_per_batch: pytyping.SupportsInt = 1000000, *, - lazy: bool = False, + lazy: pytyping.Literal[False] = ..., connection: DuckDBPyConnection | None = None, ) -> polars.DataFrame: ... +@pytyping.overload +def pl( + rows_per_batch: pytyping.SupportsInt = 1000000, + *, + lazy: pytyping.Literal[True], + connection: DuckDBPyConnection | None = None, +) -> polars.LazyFrame: ... +@pytyping.overload +def pl( + rows_per_batch: pytyping.SupportsInt = 1000000, + *, + lazy: bool = False, + connection: DuckDBPyConnection | None = None, +) -> pytyping.Union[polars.DataFrame, polars.LazyFrame]: ... def project( df: pandas.DataFrame, *args: str | Expression, groups: str = "", connection: DuckDBPyConnection | None = None ) -> DuckDBPyRelation: ... From 20bfd52a0932327fafd94fa16a25c6acc38f5dcb Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Thu, 6 Nov 2025 11:54:51 +0100 Subject: [PATCH 19/39] review feedback --- external/duckdb | 2 +- .../include/duckdb_python/pyrelation.hpp | 2 +- src/duckdb_py/pyrelation.cpp | 20 ++----------------- tests/fast/test_insert.py | 4 +--- 4 files changed, 5 insertions(+), 23 deletions(-) diff --git a/external/duckdb b/external/duckdb index 7ce99bc0..95fcb8f1 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit 7ce99bc04130615dfc3a39dfb79177a8942fefba +Subproject commit 95fcb8f18819b1a77df079a7fcb753a8c2f52844 diff --git a/src/duckdb_py/include/duckdb_python/pyrelation.hpp b/src/duckdb_py/include/duckdb_python/pyrelation.hpp index e1f78b5a..e272ca41 100644 --- a/src/duckdb_py/include/duckdb_python/pyrelation.hpp +++ b/src/duckdb_py/include/duckdb_python/pyrelation.hpp @@ -235,7 +235,7 @@ struct DuckDBPyRelation { void InsertInto(const string &table); - void Insert(const py::object ¶ms = py::list()); + void Insert(const py::object ¶ms = py::list()) const; void Update(const py::object &set, const py::object &where = py::none()); void Create(const string &table); diff --git a/src/duckdb_py/pyrelation.cpp b/src/duckdb_py/pyrelation.cpp index 5d77dc9f..08b001be 100644 --- a/src/duckdb_py/pyrelation.cpp +++ b/src/duckdb_py/pyrelation.cpp @@ -23,8 +23,6 @@ #include "duckdb/common/arrow/physical_arrow_collector.hpp" #include "duckdb_python/arrow/arrow_export_utils.hpp" -#include - namespace duckdb { DuckDBPyRelation::DuckDBPyRelation(shared_ptr rel_p) : rel(std::move(rel_p)) { @@ -1517,10 +1515,6 @@ void DuckDBPyRelation::InsertInto(const string &table) { PyExecuteRelation(insert); } -static bool IsAcceptedInsertRelationType(const Relation &relation) { - return relation.type == RelationType::TABLE_RELATION; -} - void DuckDBPyRelation::Update(const py::object &set_p, const py::object &where) { AssertRelation(); unique_ptr condition; @@ -1565,7 +1559,7 @@ void DuckDBPyRelation::Update(const py::object &set_p, const py::object &where) return rel->Update(std::move(names), std::move(expressions), std::move(condition)); } -void DuckDBPyRelation::Insert(const py::object ¶ms) { +void DuckDBPyRelation::Insert(const py::object ¶ms) const { AssertRelation(); if (this->rel->type != RelationType::TABLE_RELATION) { throw InvalidInputException("'DuckDBPyRelation.insert' can only be used on a table relation"); @@ -1574,17 +1568,7 @@ void DuckDBPyRelation::Insert(const py::object ¶ms) { D_ASSERT(py::gil_check()); py::gil_scoped_release release; - // Grab table info - auto table_relation = static_cast(this->rel.get()); - auto catalog = table_relation->description->database; - auto schema = table_relation->description->schema; - auto table = table_relation->description->table; - // Create a value relation - vector column_names; - auto value_rel = - make_shared_ptr(this->rel->context->GetContext(), values, std::move(column_names), "values"); - // Now insert - value_rel->Insert(catalog, schema, table); + rel->Insert(values); } void DuckDBPyRelation::Create(const string &table) { diff --git a/tests/fast/test_insert.py b/tests/fast/test_insert.py index a61efd2e..455e6e48 100644 --- a/tests/fast/test_insert.py +++ b/tests/fast/test_insert.py @@ -27,6 +27,4 @@ def test_insert_with_schema(self, duckdb_cursor): res = duckdb_cursor.table("not_main.tbl").fetchall() assert len(res) == 10 - # TODO: This is not currently supported # noqa: TD002, TD003 - with pytest.raises(duckdb.CatalogException, match="Table with name tbl does not exist"): - duckdb_cursor.table("not_main.tbl").insert([42, 21, 1337]) + duckdb_cursor.table("not_main.tbl").insert((42,)) From 70380a14a1afd0b2117f41fcba65e0b2d888ee1a Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Tue, 11 Nov 2025 15:12:02 +0100 Subject: [PATCH 20/39] Submodule at 68d7555 for 1.4.2 release --- external/duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/duckdb b/external/duckdb index 95fcb8f1..68d7555f 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit 95fcb8f18819b1a77df079a7fcb753a8c2f52844 +Subproject commit 68d7555f68bd25c1a251ccca2e6338949c33986a From 30e53c4cac65ebd0c1812f9d2f4eb6b50e7f0b23 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Wed, 12 Nov 2025 13:29:20 +0100 Subject: [PATCH 21/39] Fix project metadata --- CHANGELOG.md | 19 ------------------- pyproject.toml | 4 ++-- 2 files changed, 2 insertions(+), 21 deletions(-) delete mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index d4f4b61b..00000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,19 +0,0 @@ -# Changelog - -## v1.4.1 -**DuckDB Core**: v1.4.1 - -### Bug Fixes -- **ADBC Driver**: Fixed ADBC driver implementation (#81) -- **SQLAlchemy compatibility**: Added `__hash__` method overload (#61) -- **Error Handling**: Reset PyErr before throwing Python exceptions (#69) -- **Polars Lazyframes**: Fixed Polars expression pushdown (#102) - -### Code Quality Improvements & Developer Experience -- **MyPy Support**: MyPy is functional again and better integrated with the dev workflow -- **Stubs**: Re-created and manually curated stubs for the binary extension -- **Type Shadowing**: Deprecated `typing` and `functional` modules -- **Linting & Formatting**: Comprehensive code quality improvements with Ruff -- **Type Annotations**: Added missing overloads and improved type coverage -- **Pre-commit Integration**: Added ruff, clang-format, cmake-format and mypy configs -- **CI/CD**: Added code quality workflow diff --git a/pyproject.toml b/pyproject.toml index 0c08c412..6895b12e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ readme = "README.md" keywords = ["DuckDB", "Database", "SQL", "OLAP"] requires-python = ">=3.9.0" classifiers = [ - "Development Status :: 3 - Alpha", + "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Topic :: Database", @@ -40,7 +40,7 @@ maintainers = [{name = "DuckDB Foundation"}] Documentation = "https://duckdb.org/docs/stable/clients/python/overview" Source = "https://github.com/duckdb/duckdb-python" Issues = "https://github.com/duckdb/duckdb-python/issues" -Changelog = "https://github.com/duckdb/duckdb/releases" +Changelog = "https://github.com/duckdb/duckdb-python/releases" [project.optional-dependencies] all = [ # users can install duckdb with 'duckdb[all]', which will install this list From 4746ac80e9e0a4f17215e064c9c05e248688fb87 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Wed, 5 Nov 2025 15:03:51 +0100 Subject: [PATCH 22/39] Create a PR to bump submodule if nightly is stale --- .github/workflows/release.yml | 61 ++++++++--- .github/workflows/submodule_auto_pr.yml | 128 ++++++++++++++++++++++++ 2 files changed, 173 insertions(+), 16 deletions(-) create mode 100644 .github/workflows/submodule_auto_pr.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f3550fb0..727f8027 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -22,6 +22,11 @@ on: options: - test - prod + nightly-stale-after-days: + type: string + description: After how many days should nightlies be considered stale + required: true + default: 3 store-s3: type: boolean description: Also store test packages in S3 (always true for prod) @@ -41,6 +46,17 @@ jobs: duckdb-sha: ${{ inputs.duckdb-sha }} set-version: ${{ inputs.stable-version }} + submodule_pr: + name: Create or update PR to bump submodule to given SHA + needs: build_sdist + uses: ./.github/workflows/submodule_auto_pr.yml + with: + duckdb-python-sha: ${{ inputs.duckdb-python-sha }} + duckdb-sha: ${{ inputs.duckdb-sha }} + secrets: + # reusable workflows and secrets are not great: https://github.com/actions/runner/issues/3206 + DUCKDBLABS_BOT_TOKEN: ${{ secrets.DUCKDBLABS_BOT_TOKEN }} + workflow_state: name: Set state for the release workflow needs: build_sdist @@ -51,23 +67,36 @@ jobs: runs-on: ubuntu-latest steps: - id: index_check - name: Check ${{ needs.build_sdist.outputs.package-version }} on PyPI + name: Check version on PyPI run: | - set -eu - # Check PyPI whether the release we're building is already present + set -ex pypi_hostname=${{ inputs.pypi-index == 'test' && 'test.' || '' }}pypi.org - pkg_version=${{ needs.build_sdist.outputs.package-version }} - url=https://${pypi_hostname}/pypi/duckdb/${pkg_version}/json - http_status=$( curl -s -o /dev/null -w "%{http_code}" $url || echo $? ) - if [[ $http_status == "200" ]]; then - echo "::warning::Package version ${pkg_version} is already present on ${pypi_hostname}" - pypi_state=VERSION_FOUND - elif [[ $http_status == 000* ]]; then - echo "::error::Error checking PyPI at ${url}: curl exit code ${http_status#'000'}" - pypi_state=UNKNOWN - else - echo "::notice::Package version ${pkg_version} not found on ${pypi_hostname} (http status: ${http_status})" + # install duckdb + curl https://install.duckdb.org | sh + # query pypi + result=$(cat <>'upload_time_iso_8601')::DATE AS age, + FROM read_json('https://${pypi_hostname}/pypi/duckdb/json') AS jd + CROSS JOIN json_each(jd.releases) AS rel(key, value) + CROSS JOIN unnest(FROM_JSON(rel.value, '["JSON"]')) AS file(value) + WHERE rel.key='${{ needs.build_sdist.outputs.package-version }}' + LIMIT 1; + EOF + ) + if [ -z "$result" ]; then pypi_state=VERSION_NOT_FOUND + else + pypi_state=VERSION_FOUND + fi + if [[ -z "${{ inputs.stable-version }}" ]]; then + age=${result#age = } + if [ "${age}" -ge "${{ inputs.nightly-stale-after-days }}" ]; then + echo "::warning title=Stale nightly for ${{ github.ref_name }}::Nightly is ${age} days old (max=${{ inputs.nightly-stale-after-days }})" + fi fi echo "pypi_state=${pypi_state}" >> $GITHUB_OUTPUT @@ -96,7 +125,7 @@ jobs: echo "::notice::S3 upload disabled in inputs, not generating S3 URL" exit 0 fi - if [[ VERSION_FOUND == "${{ steps.index_check.outputs.pypi_state }}" ]]; then + if [[ VERSION_NOT_FOUND != "${{ steps.index_check.outputs.pypi_state }}" ]]; then echo "::warning::S3 upload disabled because package version already uploaded to PyPI" exit 0 fi @@ -110,7 +139,7 @@ jobs: build_wheels: name: Build and test releases needs: workflow_state - if: ${{ needs.workflow_state.outputs.pypi_state != 'VERSION_FOUND' }} + if: ${{ needs.workflow_state.outputs.pypi_state == 'VERSION_NOT_FOUND' }} uses: ./.github/workflows/packaging_wheels.yml with: minimal: false diff --git a/.github/workflows/submodule_auto_pr.yml b/.github/workflows/submodule_auto_pr.yml new file mode 100644 index 00000000..97c63ba0 --- /dev/null +++ b/.github/workflows/submodule_auto_pr.yml @@ -0,0 +1,128 @@ +name: Submodule Auto PR +on: + workflow_call: + inputs: + duckdb-python-sha: + type: string + description: The commit to build against (defaults to latest commit of current ref) + required: false + duckdb-sha: + type: string + description: The DuckDB submodule commit or ref to build against + required: true + auto-land: + type: boolean + description: Immediately merge the PR (placeholder - doesn't work) + default: false + secrets: + DUCKDBLABS_BOT_TOKEN: + description: Github token of the DuckDBLabs bot + required: true + +defaults: + run: + shell: bash + +jobs: + create_pr: + name: Create PR to bump duckdb submodule to given SHA + runs-on: ubuntu-latest + steps: + - name: Checkout DuckDB Python + uses: actions/checkout@v4 + with: + ref: ${{ inputs.duckdb-python-sha }} + fetch-depth: 0 + submodules: true + + - name: Checkout or Create Needed Branch + run: | + git fetch --all + head_sha=${{ inputs.duckdb-python-sha }} + branch_name="vendoring-${{ github.ref_name }}" + if [[ `git rev-parse --verify ${branch_name} 2>/dev/null` ]]; then + # branch exists + git checkout ${branch_name} + else + # new branch + git checkout -b ${branch_name} + fi + [[ ${head_sha} ]] && git reset --hard ${head_sha} || true + + - name: Checkout DuckDB at Given SHA + run: | + cd external/duckdb + git fetch origin + git checkout ${{ inputs.duckdb-sha }} + + - name: Determine GH PR Command + id: gh_pr_command + env: + GH_TOKEN: ${{ secrets.DUCKDBLABS_BOT_TOKEN }} + run: | + pr_url=$( gh pr list --head vendoring-${{ github.ref_name }} --state open --json url --jq '.[].url' ) + if [[ $pr_url ]]; then + echo "::notice::Found existing pr, will edit (${pr_url})" + gh_command="edit ${pr_url}" + else + echo "::notice::No existing PR, will create new" + gh_command="create --head vendoring-${{ github.ref_name }} --base ${{ github.ref_name }}" + fi + echo "subcommand=${gh_command}" >> $GITHUB_OUTPUT + + - name: Set Git User + run: | + git config --global user.email "github_bot@duckdblabs.com" + git config --global user.name "DuckDB Labs GitHub Bot" + + - name: Create PR to Bump DuckDB Submodule + env: + GH_TOKEN: ${{ secrets.DUCKDBLABS_BOT_TOKEN }} + run: | + # First commit and push + git add external/duckdb + git commit -m "Bump submodule" + git push --force origin vendoring-${{ github.ref_name }} + # create PR msg + echo "Bump duckdb submodule:" > body.txt + echo "- Target branch: ${{ github.ref_name }}" >> body.txt + echo "- Date: $( date +"%Y-%m-%d %H:%M:%S" )" >> body.txt + echo "- DuckDB SHA: ${{ inputs.duckdb-sha }}" >> body.txt + echo "- Trigger: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" >> body.txt + subcommand="${{ steps.gh_pr_command.outputs.subcommand }}" + gh pr ${subcommand} \ + --title "[duckdb-labs bot] Bump DuckDB submodule" \ + --body-file body.txt > output.txt 2>&1 + success=$? + # Show summary + url=$( [[ $success ]] && gh pr view vendoring-${{ github.ref_name }} --json url --jq .url || true ) + echo "## Submodule PR Summary" >> $GITHUB_STEP_SUMMARY + if [[ $success ]]; then + prefix=$( [[ $subcommand == edit* ]] && echo "Created" || echo "Updated" ) + echo "### ${prefix} PR: [${url}](${url})" >> $GITHUB_STEP_SUMMARY + else + echo "### Failed to create PR" >> $GITHUB_STEP_SUMMARY + fi + echo '```' >> $GITHUB_STEP_SUMMARY + cat output.txt >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + [[ $success ]] || exit 1 + + - name: Automerge PR + if: ${{ inputs.auto-land }} + env: + GH_TOKEN: ${{ secrets.DUCKDBLABS_BOT_TOKEN }} + run: | + # PLACEHOLDER: DUCKDBLABS_BOT_TOKEN DOES NOT HAVE PERMISSIONS TO MERGE PRS + set -ex + gh pr merge vendoring-${{ github.ref_name }} --rebase > output.txt + success=$? + # Show summary + if [[ $success ]]; then + echo "### PR merged" >> $GITHUB_STEP_SUMMARY + else + echo "### Failed to auto-merge PR" >> $GITHUB_STEP_SUMMARY + fi + echo '```' >> $GITHUB_STEP_SUMMARY + cat output.txt >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY From 96762c1b08c8807b8fbd5d99f058499c713d6c0c Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Wed, 19 Nov 2025 12:16:37 +0000 Subject: [PATCH 23/39] Bump submodule --- external/duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/duckdb b/external/duckdb index 68d7555f..19cad945 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit 68d7555f68bd25c1a251ccca2e6338949c33986a +Subproject commit 19cad945a5eb01066d8ce888965813b52484bce2 From 8f1aa4b429717cfc82d8bbd319cf37eb0b8eada8 Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Thu, 20 Nov 2025 12:42:43 +0000 Subject: [PATCH 24/39] Bump submodule --- external/duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/duckdb b/external/duckdb index 19cad945..a2ae6927 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit 19cad945a5eb01066d8ce888965813b52484bce2 +Subproject commit a2ae69278e36a919880c521d18789dca9fb6f15a From c7ed4f67822c3dd6c922d41a28209e25745faa16 Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Fri, 21 Nov 2025 07:08:08 +0000 Subject: [PATCH 25/39] Bump submodule --- external/duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/duckdb b/external/duckdb index a2ae6927..5659b336 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit a2ae69278e36a919880c521d18789dca9fb6f15a +Subproject commit 5659b336314888a0725e4d6118becb26ce5f06c5 From 4460b3212c65fcd24067ec56ab6bff9dfa203d38 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Mon, 24 Nov 2025 11:17:21 +0100 Subject: [PATCH 26/39] don't fail if the submodule is at the same commit as the input --- .github/workflows/submodule_auto_pr.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/submodule_auto_pr.yml b/.github/workflows/submodule_auto_pr.yml index 97c63ba0..43a9860a 100644 --- a/.github/workflows/submodule_auto_pr.yml +++ b/.github/workflows/submodule_auto_pr.yml @@ -79,7 +79,9 @@ jobs: env: GH_TOKEN: ${{ secrets.DUCKDBLABS_BOT_TOKEN }} run: | - # First commit and push + # No need to do anything if the submodule is already at the given sha + [[ `git status --porcelain -- external/duckdb` == "" ]] && exit 0 + # We have changes. Commit and push git add external/duckdb git commit -m "Bump submodule" git push --force origin vendoring-${{ github.ref_name }} From c542c18781a97fa1dfa0977ffa8a178d966c738c Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Wed, 26 Nov 2025 05:54:54 +0000 Subject: [PATCH 27/39] Bump submodule --- external/duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/duckdb b/external/duckdb index 5659b336..24ca471d 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit 5659b336314888a0725e4d6118becb26ce5f06c5 +Subproject commit 24ca471d643f9a7ef16e1a20b78285636d4c3bfe From d1a4643081d610ec72c1a6d08d8fea329d6ba7f6 Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Sat, 29 Nov 2025 05:44:38 +0000 Subject: [PATCH 28/39] Bump submodule --- external/duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/duckdb b/external/duckdb index 24ca471d..9c1f71da 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit 24ca471d643f9a7ef16e1a20b78285636d4c3bfe +Subproject commit 9c1f71da3a94611611b9c88aa3a957d32915ee06 From 9ec8614c38fee55bfb1422c3d58a356be2cbcdbb Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Sun, 30 Nov 2025 05:47:13 +0000 Subject: [PATCH 29/39] Bump submodule --- external/duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/duckdb b/external/duckdb index 9c1f71da..136dd6ad 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit 9c1f71da3a94611611b9c88aa3a957d32915ee06 +Subproject commit 136dd6ada5619cd9293a03cca717515f7436cfa8 From f39e0101ffc97e7521861d2d19ec1800619a5f82 Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Tue, 2 Dec 2025 05:50:26 +0000 Subject: [PATCH 30/39] Bump submodule --- external/duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/duckdb b/external/duckdb index 136dd6ad..36451fcf 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit 136dd6ada5619cd9293a03cca717515f7436cfa8 +Subproject commit 36451fcf23b4d8ebf2cb52e26eef93f7fb54561b From 8ab292cd73f9494227446edb4eedf7946535048b Mon Sep 17 00:00:00 2001 From: Matthew Bayer Date: Mon, 1 Dec 2025 14:18:27 -0500 Subject: [PATCH 31/39] Add filename_pattern to to_parquet Python API --- _duckdb-stubs/__init__.pyi | 2 + .../include/duckdb_python/pyrelation.hpp | 2 +- src/duckdb_py/pyrelation.cpp | 10 ++++- src/duckdb_py/pyrelation/initialize.cpp | 3 +- tests/fast/api/test_to_parquet.py | 43 +++++++++++++++++++ 5 files changed, 57 insertions(+), 3 deletions(-) diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index a4965632..6b323184 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -720,6 +720,7 @@ class DuckDBPyRelation: partition_by: pytyping.List[str] | None = None, write_partition_columns: bool | None = None, append: bool | None = None, + filename_pattern: str | None = None, ) -> None: ... def to_table(self, table_name: str) -> None: ... def to_view(self, view_name: str, replace: bool = True) -> DuckDBPyRelation: ... @@ -772,6 +773,7 @@ class DuckDBPyRelation: partition_by: pytyping.List[str] | None = None, write_partition_columns: bool | None = None, append: bool | None = None, + filename_pattern: str | None = None, ) -> None: ... @property def alias(self) -> str: ... diff --git a/src/duckdb_py/include/duckdb_python/pyrelation.hpp b/src/duckdb_py/include/duckdb_python/pyrelation.hpp index e272ca41..06cf9e94 100644 --- a/src/duckdb_py/include/duckdb_python/pyrelation.hpp +++ b/src/duckdb_py/include/duckdb_python/pyrelation.hpp @@ -214,7 +214,7 @@ struct DuckDBPyRelation { const py::object &row_group_size = py::none(), const py::object &overwrite = py::none(), const py::object &per_thread_output = py::none(), const py::object &use_tmp_file = py::none(), const py::object &partition_by = py::none(), const py::object &write_partition_columns = py::none(), - const py::object &append = py::none()); + const py::object &append = py::none(), const py::object &filename_pattern = py::none()); void ToCSV(const string &filename, const py::object &sep = py::none(), const py::object &na_rep = py::none(), const py::object &header = py::none(), const py::object "echar = py::none(), diff --git a/src/duckdb_py/pyrelation.cpp b/src/duckdb_py/pyrelation.cpp index 08b001be..bbc7a2ec 100644 --- a/src/duckdb_py/pyrelation.cpp +++ b/src/duckdb_py/pyrelation.cpp @@ -1213,7 +1213,8 @@ void DuckDBPyRelation::ToParquet(const string &filename, const py::object &compr const py::object &row_group_size_bytes, const py::object &row_group_size, const py::object &overwrite, const py::object &per_thread_output, const py::object &use_tmp_file, const py::object &partition_by, - const py::object &write_partition_columns, const py::object &append) { + const py::object &write_partition_columns, const py::object &append, + const py::object &filename_pattern) { case_insensitive_map_t> options; if (!py::none().is(compression)) { @@ -1304,6 +1305,13 @@ void DuckDBPyRelation::ToParquet(const string &filename, const py::object &compr options["use_tmp_file"] = {Value::BOOLEAN(py::bool_(use_tmp_file))}; } + if (!py::none().is(filename_pattern)) { + if (!py::isinstance(filename_pattern)) { + throw InvalidInputException("to_parquet only accepts 'filename_pattern' as a string"); + } + options["filename_pattern"] = {Value(py::str(filename_pattern))}; + } + auto write_parquet = rel->WriteParquetRel(filename, std::move(options)); PyExecuteRelation(write_parquet); } diff --git a/src/duckdb_py/pyrelation/initialize.cpp b/src/duckdb_py/pyrelation/initialize.cpp index cd1f042c..7bfea441 100644 --- a/src/duckdb_py/pyrelation/initialize.cpp +++ b/src/duckdb_py/pyrelation/initialize.cpp @@ -36,7 +36,8 @@ static void InitializeConsumers(py::class_ &m) { py::arg("row_group_size_bytes") = py::none(), py::arg("row_group_size") = py::none(), py::arg("overwrite") = py::none(), py::arg("per_thread_output") = py::none(), py::arg("use_tmp_file") = py::none(), py::arg("partition_by") = py::none(), - py::arg("write_partition_columns") = py::none(), py::arg("append") = py::none()); + py::arg("write_partition_columns") = py::none(), py::arg("append") = py::none(), + py::arg("filename_pattern") = py::none()); DefineMethod( {"to_csv", "write_csv"}, m, &DuckDBPyRelation::ToCSV, "Write the relation object to a CSV file in 'file_name'", diff --git a/tests/fast/api/test_to_parquet.py b/tests/fast/api/test_to_parquet.py index 8d8162b0..75693159 100644 --- a/tests/fast/api/test_to_parquet.py +++ b/tests/fast/api/test_to_parquet.py @@ -170,3 +170,46 @@ def test_append(self, pd): ("shinji", 123.0, "a"), ] assert result.execute().fetchall() == expected + + @pytest.mark.parametrize("pd", [NumpyPandas(), ArrowPandas()]) + def test_filename_pattern_with_index(self, pd): + temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) # noqa: PTH118 + df = pd.DataFrame( + { + "name": ["rei", "shinji", "asuka", "kaworu"], + "float": [321.0, 123.0, 23.0, 340.0], + "category": ["a", "a", "b", "c"], + } + ) + rel = duckdb.from_df(df) + rel.to_parquet(temp_file_name, partition_by=["category"], filename_pattern="orders_{i}") + # Check that files follow the pattern with {i} + files_a = os.listdir(f"{temp_file_name}/category=a") + files_b = os.listdir(f"{temp_file_name}/category=b") + files_c = os.listdir(f"{temp_file_name}/category=c") + assert all("orders_" in f and f.endswith(".parquet") for f in files_a) + assert all("orders_" in f and f.endswith(".parquet") for f in files_b) + assert all("orders_" in f and f.endswith(".parquet") for f in files_c) + # Verify data integrity + result = duckdb.sql(f"FROM read_parquet('{temp_file_name}/*/*.parquet', hive_partitioning=TRUE)") + expected = [("rei", 321.0, "a"), ("shinji", 123.0, "a"), ("asuka", 23.0, "b"), ("kaworu", 340.0, "c")] + assert result.execute().fetchall() == expected + + @pytest.mark.parametrize("pd", [NumpyPandas(), ArrowPandas()]) + def test_filename_pattern_with_uuid(self, pd): + temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) # noqa: PTH118 + df = pd.DataFrame( + { + "name": ["rei", "shinji", "asuka", "kaworu"], + "float": [321.0, 123.0, 23.0, 340.0], + } + ) + rel = duckdb.from_df(df) + rel.to_parquet(temp_file_name, filename_pattern="file_{uuid}") + # Check that files follow the pattern with {uuid} + files = [f for f in os.listdir(temp_file_name) if f.endswith(".parquet")] + assert len(files) > 0 + assert all(f.startswith("file_") and f.endswith(".parquet") for f in files) + # Verify data integrity + result = duckdb.read_parquet(f"{temp_file_name}/*.parquet") + assert rel.execute().fetchall() == result.execute().fetchall() From 5cb2afbf07a048b3ae8b4080acb0df5f13b0bbc5 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Tue, 2 Dec 2025 15:27:36 +0100 Subject: [PATCH 32/39] Fix linting and test --- tests/fast/api/test_to_parquet.py | 36 ++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/tests/fast/api/test_to_parquet.py b/tests/fast/api/test_to_parquet.py index 75693159..f0952e68 100644 --- a/tests/fast/api/test_to_parquet.py +++ b/tests/fast/api/test_to_parquet.py @@ -1,4 +1,6 @@ import os +import pathlib +import re import tempfile import pytest @@ -184,12 +186,14 @@ def test_filename_pattern_with_index(self, pd): rel = duckdb.from_df(df) rel.to_parquet(temp_file_name, partition_by=["category"], filename_pattern="orders_{i}") # Check that files follow the pattern with {i} - files_a = os.listdir(f"{temp_file_name}/category=a") - files_b = os.listdir(f"{temp_file_name}/category=b") - files_c = os.listdir(f"{temp_file_name}/category=c") - assert all("orders_" in f and f.endswith(".parquet") for f in files_a) - assert all("orders_" in f and f.endswith(".parquet") for f in files_b) - assert all("orders_" in f and f.endswith(".parquet") for f in files_c) + files_a = list(pathlib.Path(f"{temp_file_name}/category=a").iterdir()) + files_b = list(pathlib.Path(f"{temp_file_name}/category=b").iterdir()) + files_c = list(pathlib.Path(f"{temp_file_name}/category=c").iterdir()) + filename_pattern = re.compile(r"^orders_[09]+\.parquet$") + assert all(filename_pattern.search(str(f.name)) for f in files_a) + assert all(filename_pattern.search(str(f.name)) for f in files_b) + assert all(filename_pattern.search(str(f.name)) for f in files_c) + # Verify data integrity result = duckdb.sql(f"FROM read_parquet('{temp_file_name}/*/*.parquet', hive_partitioning=TRUE)") expected = [("rei", 321.0, "a"), ("shinji", 123.0, "a"), ("asuka", 23.0, "b"), ("kaworu", 340.0, "c")] @@ -202,14 +206,22 @@ def test_filename_pattern_with_uuid(self, pd): { "name": ["rei", "shinji", "asuka", "kaworu"], "float": [321.0, 123.0, 23.0, 340.0], + "category": ["a", "a", "b", "c"], } ) rel = duckdb.from_df(df) - rel.to_parquet(temp_file_name, filename_pattern="file_{uuid}") + rel.to_parquet(temp_file_name, partition_by=["category"], filename_pattern="file_{uuid}") # Check that files follow the pattern with {uuid} - files = [f for f in os.listdir(temp_file_name) if f.endswith(".parquet")] - assert len(files) > 0 - assert all(f.startswith("file_") and f.endswith(".parquet") for f in files) + files_a = list(pathlib.Path(f"{temp_file_name}/category=a").iterdir()) + files_b = list(pathlib.Path(f"{temp_file_name}/category=b").iterdir()) + files_c = list(pathlib.Path(f"{temp_file_name}/category=c").iterdir()) + filename_pattern = re.compile(r"^file_[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12}\.parquet$") + print(files_a) + assert all(filename_pattern.search(str(f.name)) for f in files_a) + assert all(filename_pattern.search(str(f.name)) for f in files_b) + assert all(filename_pattern.search(str(f.name)) for f in files_c) + # Verify data integrity - result = duckdb.read_parquet(f"{temp_file_name}/*.parquet") - assert rel.execute().fetchall() == result.execute().fetchall() + result = duckdb.sql(f"FROM read_parquet('{temp_file_name}/*/*.parquet', hive_partitioning=TRUE)") + expected = [("rei", 321.0, "a"), ("shinji", 123.0, "a"), ("asuka", 23.0, "b"), ("kaworu", 340.0, "c")] + assert result.execute().fetchall() == expected From f1189724401fc3ec3d8c9a9e1b272cc459092c36 Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Wed, 3 Dec 2025 07:10:05 +0000 Subject: [PATCH 33/39] Bump submodule --- external/duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/duckdb b/external/duckdb index 36451fcf..0d4d302f 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit 36451fcf23b4d8ebf2cb52e26eef93f7fb54561b +Subproject commit 0d4d302fcf585b2068af6260bbc6387c862c08e2 From 112a6a23a9b18b4bd5acedc3ad7d6fec52f89457 Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Thu, 4 Dec 2025 04:58:51 +0000 Subject: [PATCH 34/39] Bump submodule --- external/duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/duckdb b/external/duckdb index 0d4d302f..b9dd2cf1 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit 0d4d302fcf585b2068af6260bbc6387c862c08e2 +Subproject commit b9dd2cf18bf6e71bac23709e1fa20ef16fcff3d2 From 28a6d224be1d65849a7049f716c123ee3e640c26 Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Fri, 5 Dec 2025 04:38:58 +0000 Subject: [PATCH 35/39] Bump submodule --- external/duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/duckdb b/external/duckdb index b9dd2cf1..2692b944 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit b9dd2cf18bf6e71bac23709e1fa20ef16fcff3d2 +Subproject commit 2692b94466b0244d8417591f7518efe3ffa04dbe From b3ef82cfdbf97c7ff44417b44ed734b54057e77c Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Mon, 1 Dec 2025 08:39:56 +0100 Subject: [PATCH 36/39] add windows arm64 build --- .github/workflows/packaging_wheels.yml | 3 +++ .github/workflows/targeted_test.yml | 8 +++++- pyproject.toml | 29 +++++++++++++-------- tests/fast/adbc/test_adbc.py | 6 ++--- tests/fast/adbc/test_connection_get_info.py | 15 ++++++----- tests/fast/adbc/test_statement_bind.py | 18 ++++++------- tests/fast/arrow/test_2426.py | 4 +++ tests/fast/arrow/test_arrow_fetch.py | 5 ++++ tests/fast/test_all_types.py | 1 + 9 files changed, 59 insertions(+), 30 deletions(-) diff --git a/.github/workflows/packaging_wheels.yml b/.github/workflows/packaging_wheels.yml index 8e656abd..e05e3cc0 100644 --- a/.github/workflows/packaging_wheels.yml +++ b/.github/workflows/packaging_wheels.yml @@ -33,6 +33,7 @@ jobs: python: [ cp39, cp310, cp311, cp312, cp313, cp314 ] platform: - { os: windows-2025, arch: amd64, cibw_system: win } + - { os: windows-11-arm, arch: ARM64, cibw_system: win } # cibw requires ARM64 to be uppercase - { os: ubuntu-24.04, arch: x86_64, cibw_system: manylinux } - { os: ubuntu-24.04-arm, arch: aarch64, cibw_system: manylinux } - { os: macos-15, arch: arm64, cibw_system: macosx } @@ -46,6 +47,8 @@ jobs: - { minimal: true, python: cp312 } - { minimal: true, python: cp313 } - { minimal: true, platform: { arch: universal2 } } + - { os: windows-11-arm, arch: ARM64, python: cp39 } # too many dependency problems for win arm64 + - { os: windows-11-arm, arch: ARM64, python: cp310 } # too many dependency problems for win arm64 runs-on: ${{ matrix.platform.os }} env: ### cibuildwheel configuration diff --git a/.github/workflows/targeted_test.yml b/.github/workflows/targeted_test.yml index 812bb9c5..13ae9566 100644 --- a/.github/workflows/targeted_test.yml +++ b/.github/workflows/targeted_test.yml @@ -9,6 +9,7 @@ on: type: choice options: - 'windows-2025' + - 'windows-11-arm' - 'ubuntu-24.04' - 'ubuntu-24.04-arm' - 'macos-15' @@ -36,6 +37,11 @@ on: description: 'Custom test path (must be in tests/ directory, overrides testsuite)' required: false type: string + verbose-uv: + description: 'Let uv generate verbose output (pytest verbosity is always on)' + required: false + type: boolean + default: true jobs: test: @@ -83,4 +89,4 @@ jobs: - name: Run tests shell: bash run: | - uv run pytest -vv ${{ steps.test_path.outputs.test_path }} + uv ${{ inputs.verbose-uv && 'run -v' || 'run' }} pytest -vv ${{ steps.test_path.outputs.test_path }} diff --git a/pyproject.toml b/pyproject.toml index 6895b12e..3bd54543 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -184,7 +184,8 @@ exclude = [ # - numpy: tensorflow doesn't play nice with numpy>2 so for every platform that can run tensorflow (cp39-cp311) we use # numpy<2. numpy<2 has no wheels for cp31[2|3], meaning an sdist will be used. However, on Windows amd64 + # cp313 this results in a segfault / access violation. To get around this, we install numpy>=2 on all >=cp312 -# platforms. +# platforms. Then for windows arm64, for which there is no tensorflow, we only allow numpy>=2.3 because that +# ships arm64 win32 wheels. ###################################################################################################### [tool.uv] @@ -197,6 +198,7 @@ environments = [ # no need to resolve packages beyond these platforms with uv... "python_version >= '3.9' and sys_platform == 'darwin' and platform_machine == 'arm64'", "python_version >= '3.9' and sys_platform == 'darwin' and platform_machine == 'x86_64'", "python_version >= '3.9' and sys_platform == 'win32' and platform_machine == 'AMD64'", + "python_version >= '3.11' and sys_platform == 'win32' and platform_machine == 'ARM64'", "python_version >= '3.9' and sys_platform == 'linux' and platform_machine == 'x86_64'", "python_version >= '3.9' and sys_platform == 'linux' and platform_machine == 'aarch64'", ] @@ -204,6 +206,7 @@ required-environments = [ # ... but do always resolve for all of them "python_version >= '3.9' and sys_platform == 'darwin' and platform_machine == 'arm64'", "python_version >= '3.9' and sys_platform == 'darwin' and platform_machine == 'x86_64'", "python_version >= '3.9' and sys_platform == 'win32' and platform_machine == 'AMD64'", + "python_version >= '3.11' and sys_platform == 'win32' and platform_machine == 'ARM64'", "python_version >= '3.9' and sys_platform == 'linux' and platform_machine == 'x86_64'", "python_version >= '3.9' and sys_platform == 'linux' and platform_machine == 'aarch64'", ] @@ -219,6 +222,7 @@ explicit = true torch = [ { index = "pytorch-cpu" } ] torchvision = [ { index = "pytorch-cpu" } ] +# todo: adjust for windows arm64 while test dependencies become available [dependency-groups] # used for development only, requires pip >=25.1.0 stubdeps = [ # dependencies used for typehints in the stubs "pybind11-stubgen", @@ -226,17 +230,18 @@ stubdeps = [ # dependencies used for typehints in the stubs "fsspec", "pandas", "polars", - "pyarrow", + "pyarrow; sys_platform != 'win32' or platform_machine != 'ARM64'", + "typing-extensions", ] test = [ # dependencies used for running tests - "adbc-driver-manager", + "adbc-driver-manager; sys_platform != 'win32' or platform_machine != 'ARM64'", "pytest", "pytest-reraise", "pytest-timeout", "pytest-timestamper", "coverage", "gcovr", - "gcsfs", + "gcsfs; sys_platform != 'win32' or platform_machine != 'ARM64'", "packaging", "polars", "psutil", @@ -246,16 +251,18 @@ test = [ # dependencies used for running tests "pytz", "requests", "urllib3", - "fsspec>=2022.11.0", + "fsspec>=2022.11.0; sys_platform != 'win32' or platform_machine != 'ARM64'", "pandas>=2.0.0", - "pyarrow>=18.0.0", - "torch>=2.2.2; python_version < '3.14' and ( sys_platform != 'darwin' or platform_machine != 'x86_64' or python_version < '3.13' )", + "pyarrow>=18.0.0; sys_platform != 'win32' or platform_machine != 'ARM64'", + "torch>=2.2.2; python_version < '3.14' and ( sys_platform != 'darwin' or platform_machine != 'x86_64' or python_version < '3.13' ) and ( sys_platform != 'win32' or platform_machine != 'ARM64' or python_version > '3.11' )", "tensorflow==2.14.0; sys_platform == 'darwin' and python_version < '3.12'", "tensorflow-cpu>=2.14.0; sys_platform == 'linux' and platform_machine != 'aarch64' and python_version < '3.12'", - "tensorflow-cpu>=2.14.0; sys_platform == 'win32' and python_version < '3.12'", + "tensorflow-cpu>=2.14.0; sys_platform == 'win32' and platform_machine != 'ARM64' and python_version < '3.12'", "tensorflow-cpu-aws==2.15.1; sys_platform == 'linux' and platform_machine == 'aarch64' and python_version < '3.12'", - "numpy<2; python_version < '3.12'", - "numpy>=2; python_version >= '3.12'", + "typing-extensions", + "numpy<2; ( sys_platform != 'win32' or platform_machine != 'ARM64' ) and python_version < '3.12'", + "numpy>=2; ( sys_platform != 'win32' or platform_machine != 'ARM64' ) and python_version >= '3.12'", + "numpy>=2.3; sys_platform == 'win32' and platform_machine == 'ARM64' and python_version >= '3.11'", ] scripts = [ # dependencies used for running scripts "cxxheaderparser", @@ -265,7 +272,7 @@ scripts = [ # dependencies used for running scripts "pandas", "pcpp", "polars", - "pyarrow", + "pyarrow; sys_platform != 'win32' or platform_machine != 'ARM64'", "pytz" ] pypi = [ # dependencies used by the pypi cleanup script diff --git a/tests/fast/adbc/test_adbc.py b/tests/fast/adbc/test_adbc.py index c20d2a0e..42ba8199 100644 --- a/tests/fast/adbc/test_adbc.py +++ b/tests/fast/adbc/test_adbc.py @@ -1,12 +1,12 @@ import datetime from pathlib import Path -import adbc_driver_manager.dbapi import numpy as np -import pyarrow import pytest -import adbc_driver_duckdb.dbapi +adbc_driver_manager = pytest.importorskip("adbc_driver_manager") +adbc_driver_duckdb = pytest.importorskip("adbc_driver_duckdb") +pyarrow = pytest.importorskip("pyarrow") xfail = pytest.mark.xfail driver_path = adbc_driver_duckdb.driver_path() diff --git a/tests/fast/adbc/test_connection_get_info.py b/tests/fast/adbc/test_connection_get_info.py index aa2b3d32..cd6298ed 100644 --- a/tests/fast/adbc/test_connection_get_info.py +++ b/tests/fast/adbc/test_connection_get_info.py @@ -1,19 +1,22 @@ -import pyarrow as pa +import pytest -import adbc_driver_duckdb.dbapi import duckdb +pa = pytest.importorskip("pyarrow") +pytest.importorskip("adbc_driver_manager") +adbc_driver_duckdb_dbapi = pytest.importorskip("adbc_driver_duckdb.dbapi") + class TestADBCConnectionGetInfo: def test_connection_basic(self): - con = adbc_driver_duckdb.dbapi.connect() + con = adbc_driver_duckdb_dbapi.connect() with con.cursor() as cursor: cursor.execute("select 42") res = cursor.fetchall() assert res == [(42,)] def test_connection_get_info_all(self): - con = adbc_driver_duckdb.dbapi.connect() + con = adbc_driver_duckdb_dbapi.connect() adbc_con = con.adbc_connection res = adbc_con.get_info() reader = pa.RecordBatchReader._import_from_c(res.address) @@ -37,7 +40,7 @@ def test_connection_get_info_all(self): assert string_values == expected_result def test_empty_result(self): - con = adbc_driver_duckdb.dbapi.connect() + con = adbc_driver_duckdb_dbapi.connect() adbc_con = con.adbc_connection res = adbc_con.get_info([1337]) reader = pa.RecordBatchReader._import_from_c(res.address) @@ -48,7 +51,7 @@ def test_empty_result(self): assert values.num_chunks == 0 def test_unrecognized_codes(self): - con = adbc_driver_duckdb.dbapi.connect() + con = adbc_driver_duckdb_dbapi.connect() adbc_con = con.adbc_connection res = adbc_con.get_info([0, 1000, 4, 2000]) reader = pa.RecordBatchReader._import_from_c(res.address) diff --git a/tests/fast/adbc/test_statement_bind.py b/tests/fast/adbc/test_statement_bind.py index d35693ff..e8df14c7 100644 --- a/tests/fast/adbc/test_statement_bind.py +++ b/tests/fast/adbc/test_statement_bind.py @@ -1,10 +1,10 @@ import sys -import adbc_driver_manager -import pyarrow as pa import pytest -import adbc_driver_duckdb.dbapi +pa = pytest.importorskip("pyarrow") +adbc_driver_manager = pytest.importorskip("adbc_driver_manager") +adbc_driver_duckdb_dbapi = pytest.importorskip("adbc_driver_duckdb.dbapi") xfail = pytest.mark.xfail @@ -35,7 +35,7 @@ def test_bind_multiple_rows(self): names=["ints"], ) - con = adbc_driver_duckdb.dbapi.connect() + con = adbc_driver_duckdb_dbapi.connect() with con.cursor() as cursor: statement = cursor.adbc_statement statement.set_sql_query("select ? * 2 as i") @@ -57,7 +57,7 @@ def test_bind_single_row(self): names=["ints"], ) - con = adbc_driver_duckdb.dbapi.connect() + con = adbc_driver_duckdb_dbapi.connect() with con.cursor() as cursor: statement = cursor.adbc_statement statement.set_sql_query("select ? * 2 as i") @@ -93,7 +93,7 @@ def test_multiple_parameters(self): names=["ints", "strings", "bools"], ) - con = adbc_driver_duckdb.dbapi.connect() + con = adbc_driver_duckdb_dbapi.connect() with con.cursor() as cursor: statement = cursor.adbc_statement statement.set_sql_query("select ? as a, ? as b, ? as c") @@ -123,7 +123,7 @@ def test_bind_composite_type(self): # Create the RecordBatch record_batch = pa.RecordBatch.from_arrays([struct_array], schema=schema) - con = adbc_driver_duckdb.dbapi.connect() + con = adbc_driver_duckdb_dbapi.connect() with con.cursor() as cursor: statement = cursor.adbc_statement statement.set_sql_query("select ? as a") @@ -146,7 +146,7 @@ def test_too_many_parameters(self): names=["ints", "strings"], ) - con = adbc_driver_duckdb.dbapi.connect() + con = adbc_driver_duckdb_dbapi.connect() with con.cursor() as cursor: statement = cursor.adbc_statement statement.set_sql_query("select ? as a") @@ -175,7 +175,7 @@ def test_not_enough_parameters(self): names=["strings"], ) - con = adbc_driver_duckdb.dbapi.connect() + con = adbc_driver_duckdb_dbapi.connect() with con.cursor() as cursor: statement = cursor.adbc_statement statement.set_sql_query("select ? as a, ? as b") diff --git a/tests/fast/arrow/test_2426.py b/tests/fast/arrow/test_2426.py index f43284d3..6f76613f 100644 --- a/tests/fast/arrow/test_2426.py +++ b/tests/fast/arrow/test_2426.py @@ -1,5 +1,9 @@ +import pytest + import duckdb +pytest.importorskip("pyarrow") + try: can_run = True except Exception: diff --git a/tests/fast/arrow/test_arrow_fetch.py b/tests/fast/arrow/test_arrow_fetch.py index 0547020f..ba5d13a4 100644 --- a/tests/fast/arrow/test_arrow_fetch.py +++ b/tests/fast/arrow/test_arrow_fetch.py @@ -1,5 +1,10 @@ +import pytest + import duckdb +pytest.importorskip("pyarrow") + + try: can_run = True except Exception: diff --git a/tests/fast/test_all_types.py b/tests/fast/test_all_types.py index 77074fdc..c4ba0e55 100644 --- a/tests/fast/test_all_types.py +++ b/tests/fast/test_all_types.py @@ -534,6 +534,7 @@ def test_fetchnumpy(self, cur_type): @pytest.mark.parametrize("cur_type", all_types) def test_arrow(self, cur_type): + pytest.importorskip("pyarrow") try: pass except Exception: From a2a798aec4bb12e4e9ec78c98ba7fb55149238c1 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Fri, 5 Dec 2025 23:15:37 +0100 Subject: [PATCH 37/39] Fix Windows ARM64 excludes in workflow --- .github/workflows/packaging_wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/packaging_wheels.yml b/.github/workflows/packaging_wheels.yml index e05e3cc0..23a16af7 100644 --- a/.github/workflows/packaging_wheels.yml +++ b/.github/workflows/packaging_wheels.yml @@ -47,8 +47,8 @@ jobs: - { minimal: true, python: cp312 } - { minimal: true, python: cp313 } - { minimal: true, platform: { arch: universal2 } } - - { os: windows-11-arm, arch: ARM64, python: cp39 } # too many dependency problems for win arm64 - - { os: windows-11-arm, arch: ARM64, python: cp310 } # too many dependency problems for win arm64 + - { python: cp39, platform: { os: windows-11-arm, arch: ARM64 } } # too many dependency problems for win arm64 + - { python: cp310, platform: { os: windows-11-arm, arch: ARM64 } } # too many dependency problems for win arm64 runs-on: ${{ matrix.platform.os }} env: ### cibuildwheel configuration From a76d96a4e7f073dcca43083a085ebb3d0202fd42 Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Sat, 6 Dec 2025 04:55:39 +0000 Subject: [PATCH 38/39] Bump submodule --- external/duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/duckdb b/external/duckdb index 2692b944..e05f4ff8 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit 2692b94466b0244d8417591f7518efe3ffa04dbe +Subproject commit e05f4ff81a8fbed460514f80e457260043ff8e9d From c5d6dc0de0b0b58b2a6e59b84df85d9cfa83bd66 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Mon, 8 Dec 2025 07:32:57 +0100 Subject: [PATCH 39/39] fix main builds --- .../duckdb_python/pandas/pandas_scan.hpp | 2 ++ src/duckdb_py/pandas/scan.cpp | 5 +++++ tests/fast/adbc/test_adbc.py | 19 ++++++++++--------- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/duckdb_py/include/duckdb_python/pandas/pandas_scan.hpp b/src/duckdb_py/include/duckdb_python/pandas/pandas_scan.hpp index 50565f05..0ef9a24c 100644 --- a/src/duckdb_py/include/duckdb_python/pandas/pandas_scan.hpp +++ b/src/duckdb_py/include/duckdb_python/pandas/pandas_scan.hpp @@ -55,6 +55,8 @@ struct PandasScanFunction : public TableFunction { static void PandasSerialize(Serializer &serializer, const optional_ptr bind_data, const TableFunction &function); + + static unique_ptr PandasDeserialize(Deserializer &deserializer, TableFunction &function); }; } // namespace duckdb diff --git a/src/duckdb_py/pandas/scan.cpp b/src/duckdb_py/pandas/scan.cpp index ebce31bb..47c7ba6c 100644 --- a/src/duckdb_py/pandas/scan.cpp +++ b/src/duckdb_py/pandas/scan.cpp @@ -66,6 +66,7 @@ PandasScanFunction::PandasScanFunction() cardinality = PandasScanCardinality; table_scan_progress = PandasProgress; serialize = PandasSerialize; + deserialize = PandasDeserialize; projection_pushdown = true; } @@ -235,4 +236,8 @@ void PandasScanFunction::PandasSerialize(Serializer &serializer, const optional_ throw NotImplementedException("PandasScan function cannot be serialized"); } +unique_ptr PandasScanFunction::PandasDeserialize(Deserializer &deserializer, TableFunction &function) { + throw NotImplementedException("PandasScan function cannot be deserialized"); +} + } // namespace duckdb diff --git a/tests/fast/adbc/test_adbc.py b/tests/fast/adbc/test_adbc.py index 42ba8199..f82d0982 100644 --- a/tests/fast/adbc/test_adbc.py +++ b/tests/fast/adbc/test_adbc.py @@ -5,6 +5,7 @@ import pytest adbc_driver_manager = pytest.importorskip("adbc_driver_manager") +adbc_driver_manager_dbapi = pytest.importorskip("adbc_driver_manager.dbapi") adbc_driver_duckdb = pytest.importorskip("adbc_driver_duckdb") pyarrow = pytest.importorskip("pyarrow") @@ -14,7 +15,7 @@ @pytest.fixture def duck_conn(): - with adbc_driver_manager.dbapi.connect(driver=driver_path, entrypoint="duckdb_adbc_init") as conn: + with adbc_driver_manager_dbapi.connect(driver=driver_path, entrypoint="duckdb_adbc_init") as conn: yield conn @@ -93,7 +94,7 @@ def test_commit(tmp_path): table = example_table() db_kwargs = {"path": f"{db}"} # Start connection with auto-commit off - with adbc_driver_manager.dbapi.connect( + with adbc_driver_manager_dbapi.connect( driver=driver_path, entrypoint="duckdb_adbc_init", db_kwargs=db_kwargs, @@ -103,7 +104,7 @@ def test_commit(tmp_path): cur.adbc_ingest("ingest", table, "create") # Check Data is not there - with adbc_driver_manager.dbapi.connect( + with adbc_driver_manager_dbapi.connect( driver=driver_path, entrypoint="duckdb_adbc_init", db_kwargs=db_kwargs, @@ -122,7 +123,7 @@ def test_commit(tmp_path): # This now works because we enabled autocommit with ( - adbc_driver_manager.dbapi.connect( + adbc_driver_manager_dbapi.connect( driver=driver_path, entrypoint="duckdb_adbc_init", db_kwargs=db_kwargs, @@ -216,8 +217,8 @@ def test_insertion(duck_conn): # Test Append with duck_conn.cursor() as cursor: with pytest.raises( - adbc_driver_manager.InternalError, - match=r'Table with name "ingest_table" already exists!', + adbc_driver_manager.ProgrammingError, + match=r"ALREADY_EXISTS", ): cursor.adbc_ingest("ingest_table", table, "create") cursor.adbc_ingest("ingest_table", table, "append") @@ -294,7 +295,7 @@ def test_large_chunk(tmp_path): db.unlink() db_kwargs = {"path": f"{db}"} with ( - adbc_driver_manager.dbapi.connect( + adbc_driver_manager_dbapi.connect( driver=driver_path, entrypoint="duckdb_adbc_init", db_kwargs=db_kwargs, @@ -320,7 +321,7 @@ def test_dictionary_data(tmp_path): db.unlink() db_kwargs = {"path": f"{db}"} with ( - adbc_driver_manager.dbapi.connect( + adbc_driver_manager_dbapi.connect( driver=driver_path, entrypoint="duckdb_adbc_init", db_kwargs=db_kwargs, @@ -348,7 +349,7 @@ def test_ree_data(tmp_path): db.unlink() db_kwargs = {"path": f"{db}"} with ( - adbc_driver_manager.dbapi.connect( + adbc_driver_manager_dbapi.connect( driver=driver_path, entrypoint="duckdb_adbc_init", db_kwargs=db_kwargs,