Skip to content

Commit fe41a68

Browse files
nevi-mekszucs
authored andcommitted
ARROW-7324: [Rust] Add timezone to timestamp
This changes `DataType::TimeStamp(TimeUnit)` to `DataType::TimeStamp(TimeUnit, Option<String>)` where the `Option<String>` is an optional timezone. I would like some feedback on whether this option is fine. I haven't tried `Option<&str>` to avoid introducing a lifetime on the `DataType`. The timezone support should practically only affect temporal kernels (`fn value_as_date_time()`), but I haven't done anything there. This could be a follow up PR if there's a wider need for timezones in temporal kernels (I'll likely need them, but not urgent). Are you fine with the current proposal/implementation @andygrove @paddyhoran @sunchao @liurenjie1024 CC @andy-thomason to keep you in the loop. Closes #5970 from nevi-me/ARROW-7324 and squashes the following commits: 694bb13 <Neville Dipale> remove From<Vec<i64>> from timestamp arrays 7d3dba7 <Neville Dipale> test timezone roundtrip on IPC schema conversion 64bcfa3 <Neville Dipale> add constructors for timestamps with timezones 6746f96 <Neville Dipale> ARROW-7324: Add timezone to timestamp Authored-by: Neville Dipale <[email protected]> Signed-off-by: Krisztián Szűcs <[email protected]>
1 parent 40c000f commit fe41a68

File tree

14 files changed

+420
-153
lines changed

14 files changed

+420
-153
lines changed

rust/arrow/benches/cast_kernels.rs

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,19 @@ where
4343
criterion::black_box(cast(&array, &to_type).unwrap());
4444
}
4545

46+
// cast timestamp array from specified primitive array type to desired data type
47+
fn cast_timestamp_array<FROM>(size: usize, to_type: DataType) -> ()
48+
where
49+
FROM: ArrowTimestampType,
50+
Standard: Distribution<i64>,
51+
{
52+
let array = Arc::new(PrimitiveArray::<FROM>::from_vec(
53+
vec![random::<i64>(); size],
54+
None,
55+
)) as ArrayRef;
56+
criterion::black_box(cast(&array, &to_type).unwrap());
57+
}
58+
4659
fn add_benchmark(c: &mut Criterion) {
4760
c.bench_function("cast int32 to int32 512", |b| {
4861
b.iter(|| cast_array::<Int32Type>(512, DataType::Int32))
@@ -94,22 +107,22 @@ fn add_benchmark(c: &mut Criterion) {
94107
});
95108
c.bench_function("cast timestamp_ns to timestamp_s 512", |b| {
96109
b.iter(|| {
97-
cast_array::<TimestampNanosecondType>(
110+
cast_timestamp_array::<TimestampNanosecondType>(
98111
512,
99-
DataType::Timestamp(TimeUnit::Nanosecond),
112+
DataType::Timestamp(TimeUnit::Nanosecond, None),
100113
)
101114
})
102115
});
103116
c.bench_function("cast timestamp_ms to timestamp_ns 512", |b| {
104117
b.iter(|| {
105-
cast_array::<TimestampMillisecondType>(
118+
cast_timestamp_array::<TimestampMillisecondType>(
106119
512,
107-
DataType::Timestamp(TimeUnit::Nanosecond),
120+
DataType::Timestamp(TimeUnit::Nanosecond, None),
108121
)
109122
})
110123
});
111124
c.bench_function("cast timestamp_ms to i64 512", |b| {
112-
b.iter(|| cast_array::<TimestampMillisecondType>(512, DataType::Int64))
125+
b.iter(|| cast_timestamp_array::<TimestampMillisecondType>(512, DataType::Int64))
113126
});
114127
}
115128

rust/arrow/src/array/array.rs

Lines changed: 55 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -123,16 +123,16 @@ pub fn make_array(data: ArrayDataRef) -> ArrayRef {
123123
DataType::Time64(TimeUnit::Nanosecond) => {
124124
Arc::new(Time64NanosecondArray::from(data)) as ArrayRef
125125
}
126-
DataType::Timestamp(TimeUnit::Second) => {
126+
DataType::Timestamp(TimeUnit::Second, _) => {
127127
Arc::new(TimestampSecondArray::from(data)) as ArrayRef
128128
}
129-
DataType::Timestamp(TimeUnit::Millisecond) => {
129+
DataType::Timestamp(TimeUnit::Millisecond, _) => {
130130
Arc::new(TimestampMillisecondArray::from(data)) as ArrayRef
131131
}
132-
DataType::Timestamp(TimeUnit::Microsecond) => {
132+
DataType::Timestamp(TimeUnit::Microsecond, _) => {
133133
Arc::new(TimestampMicrosecondArray::from(data)) as ArrayRef
134134
}
135-
DataType::Timestamp(TimeUnit::Nanosecond) => {
135+
DataType::Timestamp(TimeUnit::Nanosecond, _) => {
136136
Arc::new(TimestampNanosecondArray::from(data)) as ArrayRef
137137
}
138138
DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef,
@@ -332,7 +332,7 @@ where
332332
(v % MILLISECONDS * MICROSECONDS) as u32,
333333
)),
334334
DataType::Time32(_) | DataType::Time64(_) => None,
335-
DataType::Timestamp(unit) => match unit {
335+
DataType::Timestamp(unit, _) => match unit {
336336
TimeUnit::Second => Some(NaiveDateTime::from_timestamp(v, 0)),
337337
TimeUnit::Millisecond => Some(NaiveDateTime::from_timestamp(
338338
// extract seconds from milliseconds
@@ -416,7 +416,7 @@ where
416416
_ => None,
417417
}
418418
}
419-
DataType::Timestamp(_) => match self.value_as_datetime(i) {
419+
DataType::Timestamp(_, _) => match self.value_as_datetime(i) {
420420
Some(datetime) => Some(datetime.time()),
421421
None => None,
422422
},
@@ -468,7 +468,7 @@ where
468468
None => write!(f, "null"),
469469
}
470470
}
471-
DataType::Timestamp(_) => match array.value_as_datetime(index) {
471+
DataType::Timestamp(_, _) => match array.value_as_datetime(index) {
472472
Some(datetime) => write!(f, "{:?}", datetime),
473473
None => write!(f, "null"),
474474
},
@@ -581,28 +581,7 @@ def_numeric_from_vec!(UInt32Type, u32, DataType::UInt32);
581581
def_numeric_from_vec!(UInt64Type, u64, DataType::UInt64);
582582
def_numeric_from_vec!(Float32Type, f32, DataType::Float32);
583583
def_numeric_from_vec!(Float64Type, f64, DataType::Float64);
584-
// TODO: add temporal arrays
585584

586-
def_numeric_from_vec!(
587-
TimestampSecondType,
588-
i64,
589-
DataType::Timestamp(TimeUnit::Second)
590-
);
591-
def_numeric_from_vec!(
592-
TimestampMillisecondType,
593-
i64,
594-
DataType::Timestamp(TimeUnit::Millisecond)
595-
);
596-
def_numeric_from_vec!(
597-
TimestampMicrosecondType,
598-
i64,
599-
DataType::Timestamp(TimeUnit::Microsecond)
600-
);
601-
def_numeric_from_vec!(
602-
TimestampNanosecondType,
603-
i64,
604-
DataType::Timestamp(TimeUnit::Nanosecond)
605-
);
606585
def_numeric_from_vec!(Date32Type, i32, DataType::Date32(DateUnit::Day));
607586
def_numeric_from_vec!(Date64Type, i64, DataType::Date64(DateUnit::Millisecond));
608587
def_numeric_from_vec!(Time32SecondType, i32, DataType::Time32(TimeUnit::Second));
@@ -622,6 +601,52 @@ def_numeric_from_vec!(
622601
DataType::Time64(TimeUnit::Nanosecond)
623602
);
624603

604+
impl<T: ArrowTimestampType> PrimitiveArray<T> {
605+
/// Construct a timestamp array from a vec of i64 values and an optional timezone
606+
pub fn from_vec(data: Vec<i64>, timezone: Option<Arc<String>>) -> Self {
607+
let array_data =
608+
ArrayData::builder(DataType::Timestamp(T::get_time_unit(), timezone))
609+
.len(data.len())
610+
.add_buffer(Buffer::from(data.to_byte_slice()))
611+
.build();
612+
PrimitiveArray::from(array_data)
613+
}
614+
}
615+
616+
impl<T: ArrowTimestampType> PrimitiveArray<T> {
617+
/// Construct a timestamp array from a vec of Option<i64> values and an optional timezone
618+
pub fn from_opt_vec(data: Vec<Option<i64>>, timezone: Option<Arc<String>>) -> Self {
619+
// TODO: duplicated from def_numeric_from_vec! macro, it looks possible to convert to generic
620+
let data_len = data.len();
621+
let num_bytes = bit_util::ceil(data_len, 8);
622+
let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false);
623+
let mut val_buf = MutableBuffer::new(data_len * mem::size_of::<i64>());
624+
625+
{
626+
let null = vec![0; mem::size_of::<i64>()];
627+
let null_slice = null_buf.data_mut();
628+
for (i, v) in data.iter().enumerate() {
629+
if let Some(n) = v {
630+
bit_util::set_bit(null_slice, i);
631+
// unwrap() in the following should be safe here since we've
632+
// made sure enough space is allocated for the values.
633+
val_buf.write(&n.to_byte_slice()).unwrap();
634+
} else {
635+
val_buf.write(&null).unwrap();
636+
}
637+
}
638+
}
639+
640+
let array_data =
641+
ArrayData::builder(DataType::Timestamp(T::get_time_unit(), timezone))
642+
.len(data_len)
643+
.add_buffer(val_buf.freeze())
644+
.null_bit_buffer(null_buf.freeze())
645+
.build();
646+
PrimitiveArray::from(array_data)
647+
}
648+
}
649+
625650
/// Constructs a boolean array from a vector. Should only be used for testing.
626651
impl From<Vec<bool>> for BooleanArray {
627652
fn from(data: Vec<bool>) -> Self {
@@ -1785,9 +1810,9 @@ mod tests {
17851810
#[test]
17861811
fn test_timestamp_fmt_debug() {
17871812
let arr: PrimitiveArray<TimestampMillisecondType> =
1788-
vec![1546214400000, 1546214400000].into();
1813+
TimestampMillisecondArray::from_vec(vec![1546214400000, 1546214400000], None);
17891814
assert_eq!(
1790-
"PrimitiveArray<Timestamp(Millisecond)>\n[\n 2018-12-31T00:00:00,\n 2018-12-31T00:00:00,\n]",
1815+
"PrimitiveArray<Timestamp(Millisecond, None)>\n[\n 2018-12-31T00:00:00,\n 2018-12-31T00:00:00,\n]",
17911816
format!("{:?}", arr)
17921817
);
17931818
}

rust/arrow/src/array/builder.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -877,16 +877,16 @@ impl StructBuilder {
877877
DataType::Time64(TimeUnit::Nanosecond) => {
878878
Box::new(Time64NanosecondBuilder::new(capacity))
879879
}
880-
DataType::Timestamp(TimeUnit::Second) => {
880+
DataType::Timestamp(TimeUnit::Second, _) => {
881881
Box::new(TimestampSecondBuilder::new(capacity))
882882
}
883-
DataType::Timestamp(TimeUnit::Millisecond) => {
883+
DataType::Timestamp(TimeUnit::Millisecond, _) => {
884884
Box::new(TimestampMillisecondBuilder::new(capacity))
885885
}
886-
DataType::Timestamp(TimeUnit::Microsecond) => {
886+
DataType::Timestamp(TimeUnit::Microsecond, _) => {
887887
Box::new(TimestampMicrosecondBuilder::new(capacity))
888888
}
889-
DataType::Timestamp(TimeUnit::Nanosecond) => {
889+
DataType::Timestamp(TimeUnit::Nanosecond, _) => {
890890
Box::new(TimestampNanosecondBuilder::new(capacity))
891891
}
892892
DataType::Struct(fields) => {

rust/arrow/src/compute/kernels/cast.rs

Lines changed: 24 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -431,8 +431,8 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
431431
_ => unreachable!("array type not supported"),
432432
}
433433
}
434-
(Timestamp(_), Int64) => cast_array_data::<Int64Type>(array, to_type.clone()),
435-
(Int64, Timestamp(to_unit)) => {
434+
(Timestamp(_, _), Int64) => cast_array_data::<Int64Type>(array, to_type.clone()),
435+
(Int64, Timestamp(to_unit, _)) => {
436436
use TimeUnit::*;
437437
match to_unit {
438438
Second => cast_array_data::<TimestampSecondType>(array, to_type.clone()),
@@ -447,7 +447,7 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
447447
}
448448
}
449449
}
450-
(Timestamp(from_unit), Timestamp(to_unit)) => {
450+
(Timestamp(from_unit, _), Timestamp(to_unit, _)) => {
451451
let time_array = Int64Array::from(array.data());
452452
let from_size = time_unit_multiple(&from_unit);
453453
let to_size = time_unit_multiple(&to_unit);
@@ -484,7 +484,7 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
484484
),
485485
}
486486
}
487-
(Timestamp(from_unit), Date32(_)) => {
487+
(Timestamp(from_unit, _), Date32(_)) => {
488488
let time_array = Int64Array::from(array.data());
489489
let from_size = time_unit_multiple(&from_unit) * SECONDS_IN_DAY;
490490
let mut b = Date32Builder::new(array.len());
@@ -498,7 +498,7 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
498498

499499
Ok(Arc::new(b.finish()) as ArrayRef)
500500
}
501-
(Timestamp(from_unit), Date64(_)) => {
501+
(Timestamp(from_unit, _), Date64(_)) => {
502502
let from_size = time_unit_multiple(&from_unit);
503503
let to_size = MILLISECONDS;
504504
if from_size != to_size {
@@ -933,12 +933,12 @@ mod tests {
933933

934934
#[test]
935935
#[should_panic(
936-
expected = "Casting from Int32 to Timestamp(Microsecond) not supported"
936+
expected = "Casting from Int32 to Timestamp(Microsecond, None) not supported"
937937
)]
938938
fn test_cast_int32_to_timestamp() {
939939
let a = Int32Array::from(vec![Some(2), Some(10), None]);
940940
let array = Arc::new(a) as ArrayRef;
941-
cast(&array, &DataType::Timestamp(TimeUnit::Microsecond)).unwrap();
941+
cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
942942
}
943943

944944
#[test]
@@ -994,7 +994,7 @@ mod tests {
994994

995995
#[test]
996996
#[should_panic(
997-
expected = "Casting from Int32 to Timestamp(Microsecond) not supported"
997+
expected = "Casting from Int32 to Timestamp(Microsecond, None) not supported"
998998
)]
999999
fn test_cast_list_i32_to_list_timestamp() {
10001000
// Construct a value array
@@ -1014,7 +1014,7 @@ mod tests {
10141014

10151015
cast(
10161016
&list_array,
1017-
&DataType::List(Box::new(DataType::Timestamp(TimeUnit::Microsecond))),
1017+
&DataType::List(Box::new(DataType::Timestamp(TimeUnit::Microsecond, None))),
10181018
)
10191019
.unwrap();
10201020
}
@@ -1062,11 +1062,10 @@ mod tests {
10621062

10631063
#[test]
10641064
fn test_cast_timestamp_to_date32() {
1065-
let a = TimestampMillisecondArray::from(vec![
1066-
Some(864000000005),
1067-
Some(1545696000001),
1068-
None,
1069-
]);
1065+
let a = TimestampMillisecondArray::from_opt_vec(
1066+
vec![Some(864000000005), Some(1545696000001), None],
1067+
Some(Arc::new(String::from("UTC"))),
1068+
);
10701069
let array = Arc::new(a) as ArrayRef;
10711070
let b = cast(&array, &DataType::Date32(DateUnit::Day)).unwrap();
10721071
let c = b.as_any().downcast_ref::<Date32Array>().unwrap();
@@ -1077,11 +1076,10 @@ mod tests {
10771076

10781077
#[test]
10791078
fn test_cast_timestamp_to_date64() {
1080-
let a = TimestampMillisecondArray::from(vec![
1081-
Some(864000000005),
1082-
Some(1545696000001),
1079+
let a = TimestampMillisecondArray::from_opt_vec(
1080+
vec![Some(864000000005), Some(1545696000001), None],
10831081
None,
1084-
]);
1082+
);
10851083
let array = Arc::new(a) as ArrayRef;
10861084
let b = cast(&array, &DataType::Date64(DateUnit::Millisecond)).unwrap();
10871085
let c = b.as_any().downcast_ref::<Date64Array>().unwrap();
@@ -1092,11 +1090,10 @@ mod tests {
10921090

10931091
#[test]
10941092
fn test_cast_timestamp_to_i64() {
1095-
let a = TimestampMillisecondArray::from(vec![
1096-
Some(864000000005),
1097-
Some(1545696000001),
1098-
None,
1099-
]);
1093+
let a = TimestampMillisecondArray::from_opt_vec(
1094+
vec![Some(864000000005), Some(1545696000001), None],
1095+
Some(Arc::new("UTC".to_string())),
1096+
);
11001097
let array = Arc::new(a) as ArrayRef;
11011098
let b = cast(&array, &DataType::Int64).unwrap();
11021099
let c = b.as_any().downcast_ref::<Int64Array>().unwrap();
@@ -1108,13 +1105,12 @@ mod tests {
11081105

11091106
#[test]
11101107
fn test_cast_between_timestamps() {
1111-
let a = TimestampMillisecondArray::from(vec![
1112-
Some(864000003005),
1113-
Some(1545696002001),
1108+
let a = TimestampMillisecondArray::from_opt_vec(
1109+
vec![Some(864000003005), Some(1545696002001), None],
11141110
None,
1115-
]);
1111+
);
11161112
let array = Arc::new(a) as ArrayRef;
1117-
let b = cast(&array, &DataType::Timestamp(TimeUnit::Second)).unwrap();
1113+
let b = cast(&array, &DataType::Timestamp(TimeUnit::Second, None)).unwrap();
11181114
let c = b.as_any().downcast_ref::<TimestampSecondArray>().unwrap();
11191115
assert_eq!(864000003, c.value(0));
11201116
assert_eq!(1545696002, c.value(1));

rust/arrow/src/compute/kernels/take.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,16 +76,16 @@ pub fn take(
7676
DataType::Time64(Nanosecond) => {
7777
take_primitive::<Time64NanosecondType>(values, indices)
7878
}
79-
DataType::Timestamp(Second) => {
79+
DataType::Timestamp(Second, _) => {
8080
take_primitive::<TimestampSecondType>(values, indices)
8181
}
82-
DataType::Timestamp(Millisecond) => {
82+
DataType::Timestamp(Millisecond, _) => {
8383
take_primitive::<TimestampMillisecondType>(values, indices)
8484
}
85-
DataType::Timestamp(Microsecond) => {
85+
DataType::Timestamp(Microsecond, _) => {
8686
take_primitive::<TimestampMicrosecondType>(values, indices)
8787
}
88-
DataType::Timestamp(Nanosecond) => {
88+
DataType::Timestamp(Nanosecond, _) => {
8989
take_primitive::<TimestampNanosecondType>(values, indices)
9090
}
9191
DataType::Utf8 => take_string(values, indices),

0 commit comments

Comments
 (0)