Skip to content

Commit 97701dd

Browse files
Sl1mb0shaeqahmed
authored andcommitted
feat(timestamp_ns): Implement timestamps with nanosecond precision (apache#542)
* feat(timestamp_ns): first commit * feat(timestamp_ns): Add mappings for timestamp_ns/timestamptz_ns * feat(timestamp_ns): Remove unused dep * feat(timestamp_ns): Fix unit test * feat(timestamp_ns): Fix test_all_type_for_write() * feat(timestamp_ns): fix test_transform_days_literal * feat(timestamp_ns): fix math for timestamptz_nanos * chore: formatting * chore: formatting * chore: Appease clippy --------- Co-authored-by: Timothy Maloney <[email protected]>
1 parent e4e60d3 commit 97701dd

File tree

14 files changed

+353
-30
lines changed

14 files changed

+353
-30
lines changed

crates/catalog/glue/src/schema.rs

+2
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ impl SchemaVisitor for GlueSchemaBuilder {
164164
PrimitiveType::Double => "double".to_string(),
165165
PrimitiveType::Date => "date".to_string(),
166166
PrimitiveType::Timestamp => "timestamp".to_string(),
167+
PrimitiveType::TimestampNs => "timestamp_ns".to_string(),
168+
PrimitiveType::TimestamptzNs => "timestamptz_ns".to_string(),
167169
PrimitiveType::Time | PrimitiveType::String | PrimitiveType::Uuid => {
168170
"string".to_string()
169171
}

crates/catalog/hms/src/schema.rs

+2
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ impl SchemaVisitor for HiveSchemaBuilder {
121121
PrimitiveType::Double => "double".to_string(),
122122
PrimitiveType::Date => "date".to_string(),
123123
PrimitiveType::Timestamp => "timestamp".to_string(),
124+
PrimitiveType::TimestampNs => "timestamp_ns".to_string(),
125+
PrimitiveType::TimestamptzNs => "timestamptz_ns".to_string(),
124126
PrimitiveType::Time | PrimitiveType::String | PrimitiveType::Uuid => {
125127
"string".to_string()
126128
}

crates/iceberg/src/arrow/schema.rs

+7
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,13 @@ impl SchemaVisitor for ToArrowSchemaConverter {
579579
// Timestampz always stored as UTC
580580
DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())),
581581
)),
582+
crate::spec::PrimitiveType::TimestampNs => Ok(ArrowSchemaOrFieldOrType::Type(
583+
DataType::Timestamp(TimeUnit::Nanosecond, None),
584+
)),
585+
crate::spec::PrimitiveType::TimestamptzNs => Ok(ArrowSchemaOrFieldOrType::Type(
586+
// Store timestamptz_ns as UTC
587+
DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into())),
588+
)),
582589
crate::spec::PrimitiveType::String => {
583590
Ok(ArrowSchemaOrFieldOrType::Type(DataType::Utf8))
584591
}

crates/iceberg/src/avro/schema.rs

+3
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,8 @@ impl SchemaVisitor for SchemaToAvroSchema {
239239
PrimitiveType::Time => AvroSchema::TimeMicros,
240240
PrimitiveType::Timestamp => AvroSchema::TimestampMicros,
241241
PrimitiveType::Timestamptz => AvroSchema::TimestampMicros,
242+
PrimitiveType::TimestampNs => AvroSchema::TimestampNanos,
243+
PrimitiveType::TimestamptzNs => AvroSchema::TimestampNanos,
242244
PrimitiveType::String => AvroSchema::String,
243245
PrimitiveType::Uuid => avro_fixed_schema(UUID_BYTES, Some(UUID_LOGICAL_TYPE))?,
244246
PrimitiveType::Fixed(len) => avro_fixed_schema((*len) as usize, None)?,
@@ -530,6 +532,7 @@ impl AvroSchemaVisitor for AvroSchemaToSchema {
530532
AvroSchema::Date => Type::Primitive(PrimitiveType::Date),
531533
AvroSchema::TimeMicros => Type::Primitive(PrimitiveType::Time),
532534
AvroSchema::TimestampMicros => Type::Primitive(PrimitiveType::Timestamp),
535+
AvroSchema::TimestampNanos => Type::Primitive(PrimitiveType::TimestampNs),
533536
AvroSchema::Boolean => Type::Primitive(PrimitiveType::Boolean),
534537
AvroSchema::Int => Type::Primitive(PrimitiveType::Int),
535538
AvroSchema::Long => Type::Primitive(PrimitiveType::Long),

crates/iceberg/src/spec/datatypes.rs

+10
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,10 @@ pub enum PrimitiveType {
225225
Timestamp,
226226
/// Timestamp in microsecond precision, with timezone
227227
Timestamptz,
228+
/// Timestamp in nanosecond precision, without timezone
229+
TimestampNs,
230+
/// Timestamp in nanosecond precision with timezone
231+
TimestamptzNs,
228232
/// Arbitrary-length character sequences encoded in utf-8
229233
String,
230234
/// Universally Unique Identifiers, should use 16-byte fixed
@@ -250,6 +254,8 @@ impl PrimitiveType {
250254
| (PrimitiveType::Time, PrimitiveLiteral::Long(_))
251255
| (PrimitiveType::Timestamp, PrimitiveLiteral::Long(_))
252256
| (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(_))
257+
| (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(_))
258+
| (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(_))
253259
| (PrimitiveType::String, PrimitiveLiteral::String(_))
254260
| (PrimitiveType::Uuid, PrimitiveLiteral::UInt128(_))
255261
| (PrimitiveType::Fixed(_), PrimitiveLiteral::Binary(_))
@@ -360,6 +366,8 @@ impl fmt::Display for PrimitiveType {
360366
PrimitiveType::Time => write!(f, "time"),
361367
PrimitiveType::Timestamp => write!(f, "timestamp"),
362368
PrimitiveType::Timestamptz => write!(f, "timestamptz"),
369+
PrimitiveType::TimestampNs => write!(f, "timestamp_ns"),
370+
PrimitiveType::TimestamptzNs => write!(f, "timestamptz_ns"),
363371
PrimitiveType::String => write!(f, "string"),
364372
PrimitiveType::Uuid => write!(f, "uuid"),
365373
PrimitiveType::Fixed(size) => write!(f, "fixed({})", size),
@@ -1158,6 +1166,8 @@ mod tests {
11581166
(PrimitiveType::Time, PrimitiveLiteral::Long(1)),
11591167
(PrimitiveType::Timestamptz, PrimitiveLiteral::Long(1)),
11601168
(PrimitiveType::Timestamp, PrimitiveLiteral::Long(1)),
1169+
(PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(1)),
1170+
(PrimitiveType::TimestampNs, PrimitiveLiteral::Long(1)),
11611171
(
11621172
PrimitiveType::Uuid,
11631173
PrimitiveLiteral::UInt128(Uuid::new_v4().as_u128()),

crates/iceberg/src/spec/manifest.rs

+10
Original file line numberDiff line numberDiff line change
@@ -1570,6 +1570,11 @@ mod tests {
15701570
"v_ts_ntz",
15711571
Type::Primitive(PrimitiveType::Timestamp),
15721572
)),
1573+
Arc::new(NestedField::optional(
1574+
12,
1575+
"v_ts_ns_ntz",
1576+
Type::Primitive(PrimitiveType::TimestampNs
1577+
))),
15731578
])
15741579
.build()
15751580
.unwrap(),
@@ -1678,6 +1683,11 @@ mod tests {
16781683
"v_ts_ntz",
16791684
Type::Primitive(PrimitiveType::Timestamp),
16801685
)),
1686+
Arc::new(NestedField::optional(
1687+
12,
1688+
"v_ts_ns_ntz",
1689+
Type::Primitive(PrimitiveType::TimestampNs
1690+
)))
16811691
])
16821692
.build()
16831693
.unwrap(),

crates/iceberg/src/spec/transform.rs

+8-3
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ impl Transform {
159159
| PrimitiveType::Time
160160
| PrimitiveType::Timestamp
161161
| PrimitiveType::Timestamptz
162+
| PrimitiveType::TimestampNs
163+
| PrimitiveType::TimestamptzNs
162164
| PrimitiveType::String
163165
| PrimitiveType::Uuid
164166
| PrimitiveType::Fixed(_)
@@ -200,6 +202,8 @@ impl Transform {
200202
match p {
201203
PrimitiveType::Timestamp
202204
| PrimitiveType::Timestamptz
205+
| PrimitiveType::TimestampNs
206+
| PrimitiveType::TimestamptzNs
203207
| PrimitiveType::Date => Ok(Type::Primitive(PrimitiveType::Date)),
204208
_ => Err(Error::new(
205209
ErrorKind::DataInvalid,
@@ -216,9 +220,10 @@ impl Transform {
216220
Transform::Hour => {
217221
if let Type::Primitive(p) = input_type {
218222
match p {
219-
PrimitiveType::Timestamp | PrimitiveType::Timestamptz => {
220-
Ok(Type::Primitive(PrimitiveType::Int))
221-
}
223+
PrimitiveType::Timestamp
224+
| PrimitiveType::Timestamptz
225+
| PrimitiveType::TimestampNs
226+
| PrimitiveType::TimestamptzNs => Ok(Type::Primitive(PrimitiveType::Int)),
222227
_ => Err(Error::new(
223228
ErrorKind::DataInvalid,
224229
format!("{input_type} is not a valid input type of {self} transform",),

crates/iceberg/src/spec/values.rs

+70-1
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,15 @@ use serde::ser::SerializeStruct;
3838
use serde::{Deserialize, Serialize};
3939
use serde_bytes::ByteBuf;
4040
use serde_json::{Map as JsonMap, Number, Value as JsonValue};
41+
use timestamp::nanoseconds_to_datetime;
4142
use uuid::Uuid;
4243

4344
use super::datatypes::{PrimitiveType, Type};
4445
use crate::error::Result;
4546
use crate::spec::values::date::{date_from_naive_date, days_to_date, unix_epoch};
4647
use crate::spec::values::time::microseconds_to_time;
4748
use crate::spec::values::timestamp::microseconds_to_datetime;
48-
use crate::spec::values::timestamptz::microseconds_to_datetimetz;
49+
use crate::spec::values::timestamptz::{microseconds_to_datetimetz, nanoseconds_to_datetimetz};
4950
use crate::spec::MAX_DECIMAL_PRECISION;
5051
use crate::{ensure_data_valid, Error, ErrorKind};
5152

@@ -326,6 +327,12 @@ impl Display for Datum {
326327
(PrimitiveType::Timestamptz, PrimitiveLiteral::Long(val)) => {
327328
write!(f, "{}", microseconds_to_datetimetz(*val))
328329
}
330+
(PrimitiveType::TimestampNs, PrimitiveLiteral::Long(val)) => {
331+
write!(f, "{}", nanoseconds_to_datetime(*val))
332+
}
333+
(PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(val)) => {
334+
write!(f, "{}", nanoseconds_to_datetimetz(*val))
335+
}
329336
(_, PrimitiveLiteral::String(val)) => write!(f, r#""{}""#, val),
330337
(PrimitiveType::Uuid, PrimitiveLiteral::UInt128(val)) => {
331338
write!(f, "{}", Uuid::from_u128(*val))
@@ -401,6 +408,12 @@ impl Datum {
401408
PrimitiveType::Timestamptz => {
402409
PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
403410
}
411+
PrimitiveType::TimestampNs => {
412+
PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
413+
}
414+
PrimitiveType::TimestamptzNs => {
415+
PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
416+
}
404417
PrimitiveType::String => {
405418
PrimitiveLiteral::String(std::str::from_utf8(bytes)?.to_string())
406419
}
@@ -734,6 +747,23 @@ impl Datum {
734747
}
735748
}
736749

750+
/// Creates a timestamp from unix epoch in nanoseconds.
751+
///
752+
/// Example:
753+
///
754+
/// ```rust
755+
/// use iceberg::spec::Datum;
756+
/// let t = Datum::timestamp_nanos(1000);
757+
///
758+
/// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.000001");
759+
/// ```
760+
pub fn timestamp_nanos(value: i64) -> Self {
761+
Self {
762+
r#type: PrimitiveType::TimestampNs,
763+
literal: PrimitiveLiteral::Long(value),
764+
}
765+
}
766+
737767
/// Creates a timestamp from [`DateTime`].
738768
///
739769
/// Example:
@@ -792,6 +822,23 @@ impl Datum {
792822
}
793823
}
794824

825+
/// Creates a timestamp with timezone from unix epoch in nanoseconds.
826+
///
827+
/// Example:
828+
///
829+
/// ```rust
830+
/// use iceberg::spec::Datum;
831+
/// let t = Datum::timestamptz_nanos(1000);
832+
///
833+
/// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.000001 UTC");
834+
/// ```
835+
pub fn timestamptz_nanos(value: i64) -> Self {
836+
Self {
837+
r#type: PrimitiveType::TimestamptzNs,
838+
literal: PrimitiveLiteral::Long(value),
839+
}
840+
}
841+
795842
/// Creates a timestamp with timezone from [`DateTime`].
796843
/// Example:
797844
///
@@ -1805,6 +1852,18 @@ impl Literal {
18051852
.format("%Y-%m-%dT%H:%M:%S%.f+00:00")
18061853
.to_string(),
18071854
)),
1855+
(PrimitiveType::TimestampNs, PrimitiveLiteral::Long(val)) => Ok(JsonValue::String(
1856+
timestamp::nanoseconds_to_datetime(val)
1857+
.format("%Y-%m-%dT%H:%M:%S%.f")
1858+
.to_string(),
1859+
)),
1860+
(PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(val)) => {
1861+
Ok(JsonValue::String(
1862+
timestamptz::nanoseconds_to_datetimetz(val)
1863+
.format("%Y-%m-%dT%H:%M:%S%.f+00:00")
1864+
.to_string(),
1865+
))
1866+
}
18081867
(PrimitiveType::String, PrimitiveLiteral::String(val)) => {
18091868
Ok(JsonValue::String(val.clone()))
18101869
}
@@ -1958,6 +2017,10 @@ mod timestamp {
19582017
// This shouldn't fail until the year 262000
19592018
DateTime::from_timestamp_micros(micros).unwrap().naive_utc()
19602019
}
2020+
2021+
pub(crate) fn nanoseconds_to_datetime(nanos: i64) -> NaiveDateTime {
2022+
DateTime::from_timestamp_nanos(nanos).naive_utc()
2023+
}
19612024
}
19622025

19632026
mod timestamptz {
@@ -1972,6 +2035,12 @@ mod timestamptz {
19722035

19732036
DateTime::from_timestamp(secs, rem as u32 * 1_000).unwrap()
19742037
}
2038+
2039+
pub(crate) fn nanoseconds_to_datetimetz(nanos: i64) -> DateTime<Utc> {
2040+
let (secs, rem) = (nanos / 1_000_000_000, nanos % 1_000_000_000);
2041+
2042+
DateTime::from_timestamp(secs, rem as u32).unwrap()
2043+
}
19752044
}
19762045

19772046
mod _serde {

crates/iceberg/src/transform/bucket.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ mod test {
256256
use crate::expr::PredicateOperator;
257257
use crate::spec::PrimitiveType::{
258258
Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp,
259-
Timestamptz, Uuid,
259+
TimestampNs, Timestamptz, TimestamptzNs, Uuid,
260260
};
261261
use crate::spec::Type::{Primitive, Struct};
262262
use crate::spec::{Datum, NestedField, PrimitiveType, StructType, Transform, Type};
@@ -297,6 +297,8 @@ mod test {
297297
(Primitive(Time), Some(Primitive(Int))),
298298
(Primitive(Timestamp), Some(Primitive(Int))),
299299
(Primitive(Timestamptz), Some(Primitive(Int))),
300+
(Primitive(TimestampNs), Some(Primitive(Int))),
301+
(Primitive(TimestamptzNs), Some(Primitive(Int))),
300302
(
301303
Struct(StructType::new(vec![NestedField::optional(
302304
1,

crates/iceberg/src/transform/identity.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ impl TransformFunction for Identity {
3838
mod test {
3939
use crate::spec::PrimitiveType::{
4040
Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp,
41-
Timestamptz, Uuid,
41+
TimestampNs, Timestamptz, TimestamptzNs, Uuid,
4242
};
4343
use crate::spec::Type::{Primitive, Struct};
4444
use crate::spec::{NestedField, StructType, Transform};
@@ -81,6 +81,8 @@ mod test {
8181
(Primitive(Time), Some(Primitive(Time))),
8282
(Primitive(Timestamp), Some(Primitive(Timestamp))),
8383
(Primitive(Timestamptz), Some(Primitive(Timestamptz))),
84+
(Primitive(TimestampNs), Some(Primitive(TimestampNs))),
85+
(Primitive(TimestamptzNs), Some(Primitive(TimestamptzNs))),
8486
(
8587
Struct(StructType::new(vec![NestedField::optional(
8688
1,

0 commit comments

Comments
 (0)