Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/duckdb_py/include/duckdb_python/pyresult.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ struct DuckDBPyResult {

PandasDataFrame FrameFromNumpy(bool date_as_object, const py::handle &o);

void ChangeToTZType(PandasDataFrame &df);
void ConvertDateTimeTypes(PandasDataFrame &df, bool date_as_object) const;
unique_ptr<DataChunk> FetchNext(QueryResult &result);
unique_ptr<DataChunk> FetchNextRaw(QueryResult &result);
unique_ptr<NumpyResultConversion> InitializeNumpyConversion(bool pandas = false);
Expand Down
26 changes: 12 additions & 14 deletions src/duckdb_py/pyresult.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,13 @@ py::dict DuckDBPyResult::FetchNumpyInternal(bool stream, idx_t vectors_per_chunk
return res;
}

static void ReplaceDFColumn(PandasDataFrame &df, const char *col_name, idx_t idx, const py::handle &new_value) {
df.attr("drop")("columns"_a = col_name, "inplace"_a = true);
df.attr("insert")(idx, col_name, new_value, "allow_duplicates"_a = false);
}

// TODO: unify these with an enum/flag to indicate which conversions to do
void DuckDBPyResult::ChangeToTZType(PandasDataFrame &df) {
void DuckDBPyResult::ConvertDateTimeTypes(PandasDataFrame &df, bool date_as_object) const {
auto names = df.attr("columns").cast<vector<string>>();

for (idx_t i = 0; i < result->ColumnCount(); i++) {
Expand All @@ -297,8 +302,10 @@ void DuckDBPyResult::ChangeToTZType(PandasDataFrame &df) {
auto utc_local = df[names[i].c_str()].attr("dt").attr("tz_localize")("UTC");
auto new_value = utc_local.attr("dt").attr("tz_convert")(result->client_properties.time_zone);
// We need to create the column anew because the exact dt changed to a new timezone
df.attr("drop")("columns"_a = names[i].c_str(), "inplace"_a = true);
df.attr("__setitem__")(names[i].c_str(), new_value);
ReplaceDFColumn(df, names[i].c_str(), i, new_value);
} else if (date_as_object && result->types[i] == LogicalType::DATE) {
auto new_value = df[names[i].c_str()].attr("dt").attr("date");
ReplaceDFColumn(df, names[i].c_str(), i, new_value);
}
}
}
Expand Down Expand Up @@ -374,20 +381,11 @@ PandasDataFrame DuckDBPyResult::FrameFromNumpy(bool date_as_object, const py::ha
}

PandasDataFrame df = py::cast<PandasDataFrame>(pandas.attr("DataFrame").attr("from_dict")(o));
// Unfortunately we have to do a type change here for timezones since these types are not supported by numpy
ChangeToTZType(df);
// Convert TZ and (optionally) Date types
ConvertDateTimeTypes(df, date_as_object);

auto names = df.attr("columns").cast<vector<string>>();
D_ASSERT(result->ColumnCount() == names.size());
if (date_as_object) {
for (idx_t i = 0; i < result->ColumnCount(); i++) {
if (result->types[i] == LogicalType::DATE) {
auto new_value = df[names[i].c_str()].attr("dt").attr("date");
df.attr("drop")("columns"_a = names[i].c_str(), "inplace"_a = true);
df.attr("__setitem__")(names[i].c_str(), new_value);
}
}
}
return df;
}

Expand Down
16 changes: 16 additions & 0 deletions tests/fast/pandas/test_column_order.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import duckdb


class TestColumnOrder:
def test_column_order(self, duckdb_cursor):
to_execute = """
CREATE OR REPLACE TABLE t1 AS (
SELECT NULL AS col1,
NULL::TIMESTAMPTZ AS timepoint,
NULL::DATE AS date,
);
SELECT timepoint, date, col1 FROM t1;
"""
df = duckdb.execute(to_execute).fetchdf()
cols = list(df.columns)
assert cols == ["timepoint", "date", "col1"]
Loading