Skip to content

Commit 1af7a22

Browse files
committed
[Variant] Add variant to arrow for Date64/Timestamp(Second/Millisecond)/Time32/Time64
1 parent 73dbd55 commit 1af7a22

File tree

3 files changed

+357
-32
lines changed

3 files changed

+357
-32
lines changed

parquet-variant-compute/src/type_conversion.rs

Lines changed: 80 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ macro_rules! impl_primitive_from_variant {
5656
impl PrimitiveFromVariant for $arrow_type {
5757
fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native> {
5858
let value = variant.$variant_method();
59-
$( let value = value.map($cast_fn); )?
59+
$( let value = value.and_then($cast_fn); )?
6060
value
6161
}
6262
}
@@ -84,14 +84,87 @@ impl_primitive_from_variant!(datatypes::UInt64Type, as_u64);
8484
impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
8585
impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
8686
impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
87-
impl_primitive_from_variant!(
88-
datatypes::Date32Type,
89-
as_naive_date,
90-
datatypes::Date32Type::from_naive_date
91-
);
87+
impl_primitive_from_variant!(datatypes::Date32Type, as_naive_date, |v| {
88+
Some(datatypes::Date32Type::from_naive_date(v))
89+
});
90+
impl_primitive_from_variant!(datatypes::Date64Type, as_naive_date, |v| {
91+
Some(datatypes::Date64Type::from_naive_date(v))
92+
});
93+
impl_primitive_from_variant!(datatypes::Time32SecondType, as_time_utc, |v| {
94+
// Return None if there are leftover nanoseconds
95+
if v.nanosecond() != 0 {
96+
None
97+
} else {
98+
Some(v.num_seconds_from_midnight() as i32)
99+
}
100+
});
101+
impl_primitive_from_variant!(datatypes::Time32MillisecondType, as_time_utc, |v| {
102+
// Return None if there are leftover microseconds
103+
if v.nanosecond() % 1_000_000 != 0 {
104+
None
105+
} else {
106+
Some((v.num_seconds_from_midnight() * 1_000) as i32 + (v.nanosecond() / 1_000_000) as i32)
107+
}
108+
});
92109
impl_primitive_from_variant!(datatypes::Time64MicrosecondType, as_time_utc, |v| {
93-
(v.num_seconds_from_midnight() * 1_000_000 + v.nanosecond() / 1_000) as i64
110+
Some((v.num_seconds_from_midnight() * 1_000_000 + v.nanosecond() / 1_000) as i64)
94111
});
112+
impl_primitive_from_variant!(datatypes::Time64NanosecondType, as_time_utc, |v| {
113+
// convert micro to nano seconds
114+
Some(v.num_seconds_from_midnight() as i64 * 1_000_000_000 + v.nanosecond() as i64)
115+
});
116+
impl_timestamp_from_variant!(
117+
datatypes::TimestampSecondType,
118+
as_timestamp_ntz_nanos,
119+
ntz = true,
120+
|timestamp| {
121+
// Return None if there are leftover nanoseconds
122+
if timestamp.nanosecond() != 0 {
123+
None
124+
} else {
125+
Self::make_value(timestamp)
126+
}
127+
}
128+
);
129+
impl_timestamp_from_variant!(
130+
datatypes::TimestampSecondType,
131+
as_timestamp_nanos,
132+
ntz = false,
133+
|timestamp| {
134+
// Return None if there are leftover nanoseconds
135+
if timestamp.nanosecond() != 0 {
136+
None
137+
} else {
138+
Self::make_value(timestamp.naive_utc())
139+
}
140+
}
141+
);
142+
impl_timestamp_from_variant!(
143+
datatypes::TimestampMillisecondType,
144+
as_timestamp_ntz_nanos,
145+
ntz = true,
146+
|timestamp| {
147+
// Return None if there are leftover microseconds
148+
if timestamp.nanosecond() % 1_000_000 != 0 {
149+
None
150+
} else {
151+
Self::make_value(timestamp)
152+
}
153+
}
154+
);
155+
impl_timestamp_from_variant!(
156+
datatypes::TimestampMillisecondType,
157+
as_timestamp_nanos,
158+
ntz = false,
159+
|timestamp| {
160+
// Return None if there are leftover microseconds
161+
if timestamp.nanosecond() % 1_000_000 != 0 {
162+
None
163+
} else {
164+
Self::make_value(timestamp.naive_utc())
165+
}
166+
}
167+
);
95168
impl_timestamp_from_variant!(
96169
datatypes::TimestampMicrosecondType,
97170
as_timestamp_ntz_micros,

parquet-variant-compute/src/variant_get.rs

Lines changed: 202 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -310,10 +310,10 @@ mod test {
310310
use crate::{VariantArray, VariantArrayBuilder, json_to_variant};
311311
use arrow::array::{
312312
Array, ArrayRef, AsArray, BinaryArray, BinaryViewArray, BooleanArray, Date32Array,
313-
Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array, Float32Array,
314-
Float64Array, Int8Array, Int16Array, Int32Array, Int64Array, LargeBinaryArray,
315-
LargeStringArray, NullBuilder, StringArray, StringViewArray, StructArray,
316-
Time64MicrosecondArray,
313+
Date64Array, Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
314+
Float32Array, Float64Array, Int8Array, Int16Array, Int32Array, Int64Array,
315+
LargeBinaryArray, LargeStringArray, NullBuilder, StringArray, StringViewArray, StructArray,
316+
Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
317317
};
318318
use arrow::buffer::NullBuffer;
319319
use arrow::compute::CastOptions;
@@ -973,6 +973,152 @@ mod test {
973973
}
974974
);
975975

976+
perfectly_shredded_variant_array_fn!(
977+
perfectly_shredded_timestamp_micro_variant_array_for_second_and_milli_second,
978+
|| {
979+
arrow::array::TimestampMicrosecondArray::from(vec![
980+
Some(1234), // can't be cast to second & millisecond
981+
Some(1234000), // can be cast to millisecond, but not second
982+
Some(1234000000), // can be cast to second & millisecond
983+
])
984+
.with_timezone("+00:00")
985+
}
986+
);
987+
988+
// The following two tests wants to cover the micro with timezone -> milli/second cases
989+
// there are three test items, which contains some items can be cast safely, and some can't
990+
perfectly_shredded_to_arrow_primitive_test!(
991+
get_variant_perfectly_shredded_timestamp_micro_as_timestamp_second,
992+
DataType::Timestamp(TimeUnit::Second, Some(Arc::from("+00:00"))),
993+
perfectly_shredded_timestamp_micro_variant_array_for_second_and_milli_second,
994+
arrow::array::TimestampSecondArray::from(vec![
995+
None,
996+
None, // Return None if can't be cast to second safely
997+
Some(1234)
998+
])
999+
.with_timezone("+00:00")
1000+
);
1001+
1002+
perfectly_shredded_to_arrow_primitive_test!(
1003+
get_variant_perfectly_shredded_timestamp_micro_as_timestamp_milli,
1004+
DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from("+00:00"))),
1005+
perfectly_shredded_timestamp_micro_variant_array_for_second_and_milli_second,
1006+
arrow::array::TimestampMillisecondArray::from(vec![
1007+
None, // Return None if can't be cast to millisecond safely
1008+
Some(1234),
1009+
Some(1234000)
1010+
])
1011+
.with_timezone("+00:00")
1012+
);
1013+
1014+
perfectly_shredded_variant_array_fn!(
1015+
perfectly_shredded_timestamp_micro_ntz_variant_array_for_second_and_milli_second,
1016+
|| {
1017+
arrow::array::TimestampMicrosecondArray::from(vec![
1018+
Some(1234), // can't be cast to second & millisecond
1019+
Some(1234000), // can be cast to millisecond, but not second
1020+
Some(1234000000), // can be cast to second & millisecond
1021+
])
1022+
}
1023+
);
1024+
1025+
// The following two tests wants to cover the micro_ntz -> milli/second cases
1026+
// there are three test items, which contains some items can be cast safely, and some can't
1027+
perfectly_shredded_to_arrow_primitive_test!(
1028+
get_variant_perfectly_shredded_timestamp_micro_ntz_as_timestamp_second,
1029+
DataType::Timestamp(TimeUnit::Second, None),
1030+
perfectly_shredded_timestamp_micro_ntz_variant_array_for_second_and_milli_second,
1031+
arrow::array::TimestampSecondArray::from(vec![
1032+
None,
1033+
None, // Return None if can't be cast to second safely
1034+
Some(1234)
1035+
])
1036+
);
1037+
1038+
perfectly_shredded_to_arrow_primitive_test!(
1039+
get_variant_perfectly_shredded_timestamp_micro_ntz_as_timestamp_milli,
1040+
DataType::Timestamp(TimeUnit::Millisecond, None),
1041+
perfectly_shredded_timestamp_micro_ntz_variant_array_for_second_and_milli_second,
1042+
arrow::array::TimestampMillisecondArray::from(vec![
1043+
None, // Return None if can't be cast to millisecond safely
1044+
Some(1234),
1045+
Some(1234000)
1046+
])
1047+
);
1048+
1049+
perfectly_shredded_variant_array_fn!(
1050+
perfectly_shredded_timestamp_nano_variant_array_for_second_and_milli_second,
1051+
|| {
1052+
arrow::array::TimestampNanosecondArray::from(vec![
1053+
Some(1234000), // can't be cast to second & millisecond
1054+
Some(1234000000), // can be cast to millisecond, but not second
1055+
Some(1234000000000), // can be cast to second & millisecond
1056+
])
1057+
.with_timezone("+00:00")
1058+
}
1059+
);
1060+
1061+
// The following two tests wants to cover the nano with timezone -> milli/second cases
1062+
// there are three test items, which contains some items can be cast safely, and some can't
1063+
perfectly_shredded_to_arrow_primitive_test!(
1064+
get_variant_perfectly_shredded_timestamp_nano_as_timestamp_second,
1065+
DataType::Timestamp(TimeUnit::Second, Some(Arc::from("+00:00"))),
1066+
perfectly_shredded_timestamp_nano_variant_array_for_second_and_milli_second,
1067+
arrow::array::TimestampSecondArray::from(vec![
1068+
None,
1069+
None, // Return None if can't be cast to second safely
1070+
Some(1234)
1071+
])
1072+
.with_timezone("+00:00")
1073+
);
1074+
1075+
perfectly_shredded_to_arrow_primitive_test!(
1076+
get_variant_perfectly_shredded_timestamp_nano_as_timestamp_milli,
1077+
DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from("+00:00"))),
1078+
perfectly_shredded_timestamp_nano_variant_array_for_second_and_milli_second,
1079+
arrow::array::TimestampMillisecondArray::from(vec![
1080+
None, // Return None if can't be cast to millisecond safely
1081+
Some(1234),
1082+
Some(1234000)
1083+
])
1084+
.with_timezone("+00:00")
1085+
);
1086+
1087+
perfectly_shredded_variant_array_fn!(
1088+
perfectly_shredded_timestamp_nano_ntz_variant_array_for_second_and_milli_second,
1089+
|| {
1090+
arrow::array::TimestampNanosecondArray::from(vec![
1091+
Some(1234000), // can't be cast to second & millisecond
1092+
Some(1234000000), // can be cast to millisecond, but not second
1093+
Some(1234000000000), // can be cast to second & millisecond
1094+
])
1095+
}
1096+
);
1097+
1098+
// The following two tests wants to cover the nano_ntz -> milli/second cases
1099+
// there are three test items, which contains some items can be cast safely, and some can't
1100+
perfectly_shredded_to_arrow_primitive_test!(
1101+
get_variant_perfectly_shredded_timestamp_nano_ntz_as_timestamp_second,
1102+
DataType::Timestamp(TimeUnit::Second, None),
1103+
perfectly_shredded_timestamp_nano_ntz_variant_array_for_second_and_milli_second,
1104+
arrow::array::TimestampSecondArray::from(vec![
1105+
None,
1106+
None, // Return None if can't be cast to second safely
1107+
Some(1234)
1108+
])
1109+
);
1110+
1111+
perfectly_shredded_to_arrow_primitive_test!(
1112+
get_variant_perfectly_shredded_timestamp_nano_ntz_as_timestamp_milli,
1113+
DataType::Timestamp(TimeUnit::Millisecond, None),
1114+
perfectly_shredded_timestamp_nano_ntz_variant_array_for_second_and_milli_second,
1115+
arrow::array::TimestampMillisecondArray::from(vec![
1116+
None, // Return None if can't be cast to millisecond safely
1117+
Some(1234),
1118+
Some(1234000)
1119+
])
1120+
);
1121+
9761122
perfectly_shredded_to_arrow_primitive_test!(
9771123
get_variant_perfectly_shredded_timestamp_nano_ntz_as_timestamp_nano_ntz,
9781124
DataType::Timestamp(TimeUnit::Nanosecond, None),
@@ -1016,6 +1162,17 @@ mod test {
10161162
Date32Array::from(vec![Some(-12345), Some(17586), Some(20000)])
10171163
);
10181164

1165+
perfectly_shredded_to_arrow_primitive_test!(
1166+
get_variant_perfectly_shredded_date_as_date64,
1167+
DataType::Date64,
1168+
perfectly_shredded_date_variant_array,
1169+
Date64Array::from(vec![
1170+
Some(-1066608000000),
1171+
Some(1519430400000),
1172+
Some(1728000000000)
1173+
])
1174+
);
1175+
10191176
perfectly_shredded_variant_array_fn!(perfectly_shredded_time_variant_array, || {
10201177
Time64MicrosecondArray::from(vec![Some(12345000), Some(87654000), Some(135792000)])
10211178
});
@@ -1027,6 +1184,47 @@ mod test {
10271184
Time64MicrosecondArray::from(vec![Some(12345000), Some(87654000), Some(135792000)])
10281185
);
10291186

1187+
perfectly_shredded_to_arrow_primitive_test!(
1188+
get_variant_perfectly_shredded_time_as_time64_nano,
1189+
DataType::Time64(TimeUnit::Nanosecond),
1190+
perfectly_shredded_time_variant_array,
1191+
Time64NanosecondArray::from(vec![
1192+
Some(12345000000),
1193+
Some(87654000000),
1194+
Some(135792000000)
1195+
])
1196+
);
1197+
1198+
perfectly_shredded_variant_array_fn!(perfectly_shredded_time_variant_array_for_time32, || {
1199+
Time64MicrosecondArray::from(vec![
1200+
Some(1234), // This can't be cast to Time32
1201+
Some(7654000), // This can be cast to Time32(Millisecond), but not Time32(Second)
1202+
Some(35792000000), // This can be cast to Time32(Second) & Time32(Millisecond)
1203+
])
1204+
});
1205+
1206+
perfectly_shredded_to_arrow_primitive_test!(
1207+
get_variant_perfectly_shredded_time_as_time32_second,
1208+
DataType::Time32(TimeUnit::Second),
1209+
perfectly_shredded_time_variant_array_for_time32,
1210+
Time32SecondArray::from(vec![
1211+
None,
1212+
None, // Return None if can't be cast to Time32(Second) safely
1213+
Some(35792)
1214+
])
1215+
);
1216+
1217+
perfectly_shredded_to_arrow_primitive_test!(
1218+
get_variant_perfectly_shredded_time_as_time32_milli,
1219+
DataType::Time32(TimeUnit::Millisecond),
1220+
perfectly_shredded_time_variant_array_for_time32,
1221+
Time32MillisecondArray::from(vec![
1222+
None, // Return None if can't be cast to Time32(Second) safely
1223+
Some(7654),
1224+
Some(35792000)
1225+
])
1226+
);
1227+
10301228
perfectly_shredded_variant_array_fn!(perfectly_shredded_null_variant_array, || {
10311229
let mut builder = NullBuilder::new();
10321230
builder.append_nulls(3);

0 commit comments

Comments
 (0)