From 32d53e83b6f3f0e3853fa5f55981087c8e827a67 Mon Sep 17 00:00:00 2001 From: Alon Agmon Date: Fri, 27 Sep 2024 06:41:47 +0300 Subject: [PATCH 1/4] add test for table provider creation --- crates/integrations/datafusion/src/table.rs | 39 ++++++ .../tests/test_data/TableMetadataV2Valid.json | 122 ++++++++++++++++++ 2 files changed, 161 insertions(+) create mode 100644 crates/integrations/datafusion/tests/test_data/TableMetadataV2Valid.json diff --git a/crates/integrations/datafusion/src/table.rs b/crates/integrations/datafusion/src/table.rs index f12d41eec..88a73289b 100644 --- a/crates/integrations/datafusion/src/table.rs +++ b/crates/integrations/datafusion/src/table.rs @@ -110,3 +110,42 @@ impl TableProvider for IcebergTableProvider { Ok(filter_support) } } + +#[cfg(test)] +mod tests { + use super::*; + use iceberg::io::FileIO; + use iceberg::table::{StaticTable, Table}; + use iceberg::TableIdent; + + async fn get_test_table_from_metadata_file() -> Table { + let metadata_file_name = "TableMetadataV2Valid.json"; + let metadata_file_path = format!( + "{}/tests/test_data/{}", + env!("CARGO_MANIFEST_DIR"), + metadata_file_name + ); + let file_io = FileIO::from_path(&metadata_file_path) + .unwrap() + .build() + .unwrap(); + let static_identifier = TableIdent::from_strs(["static_ns", "static_table"]).unwrap(); + let static_table = + StaticTable::from_metadata_file(&metadata_file_path, static_identifier, file_io) + .await + .unwrap(); + static_table.into_table() + } + + #[tokio::test] + async fn test_try_new_from_table() { + let table = get_test_table_from_metadata_file().await; + let table_provider = IcebergTableProvider::try_new_from_table(table.clone()) + .await + .unwrap(); + let index_of_z = table_provider.schema().index_of("z").unwrap(); + assert_eq!(index_of_z, 2); + let fields_num = table_provider.schema().fields().len(); + assert_eq!(fields_num, 3); + } +} diff --git a/crates/integrations/datafusion/tests/test_data/TableMetadataV2Valid.json b/crates/integrations/datafusion/tests/test_data/TableMetadataV2Valid.json new file mode 100644 index 000000000..0dc89de58 --- /dev/null +++ b/crates/integrations/datafusion/tests/test_data/TableMetadataV2Valid.json @@ -0,0 +1,122 @@ +{ + "format-version": 2, + "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", + "location": "s3://bucket/test/location", + "last-sequence-number": 34, + "last-updated-ms": 1602638573590, + "last-column-id": 3, + "current-schema-id": 1, + "schemas": [ + { + "type": "struct", + "schema-id": 0, + "fields": [ + { + "id": 1, + "name": "x", + "required": true, + "type": "long" + } + ] + }, + { + "type": "struct", + "schema-id": 1, + "identifier-field-ids": [ + 1, + 2 + ], + "fields": [ + { + "id": 1, + "name": "x", + "required": true, + "type": "long" + }, + { + "id": 2, + "name": "y", + "required": true, + "type": "long", + "doc": "comment" + }, + { + "id": 3, + "name": "z", + "required": true, + "type": "long" + } + ] + } + ], + "default-spec-id": 0, + "partition-specs": [ + { + "spec-id": 0, + "fields": [ + { + "name": "x", + "transform": "identity", + "source-id": 1, + "field-id": 1000 + } + ] + } + ], + "last-partition-id": 1000, + "default-sort-order-id": 3, + "sort-orders": [ + { + "order-id": 3, + "fields": [ + { + "transform": "identity", + "source-id": 2, + "direction": "asc", + "null-order": "nulls-first" + }, + { + "transform": "bucket[4]", + "source-id": 3, + "direction": "desc", + "null-order": "nulls-last" + } + ] + } + ], + "properties": {}, + "current-snapshot-id": 3055729675574597004, + "snapshots": [ + { + "snapshot-id": 3051729675574597004, + "timestamp-ms": 1515100955770, + "sequence-number": 0, + "summary": { + "operation": "append" + }, + "manifest-list": "s3://a/b/1.avro" + }, + { + "snapshot-id": 3055729675574597004, + "parent-snapshot-id": 3051729675574597004, + "timestamp-ms": 1555100955770, + "sequence-number": 1, + "summary": { + "operation": "append" + }, + "manifest-list": "s3://a/b/2.avro", + "schema-id": 1 + } + ], + "snapshot-log": [ + { + "snapshot-id": 3051729675574597004, + "timestamp-ms": 1515100955770 + }, + { + "snapshot-id": 3055729675574597004, + "timestamp-ms": 1555100955770 + } + ], + "metadata-log": [] +} \ No newline at end of file From f536d118b9202854ecf7821f4643e3f4e04c1449 Mon Sep 17 00:00:00 2001 From: Alon Agmon Date: Fri, 27 Sep 2024 06:47:55 +0300 Subject: [PATCH 2/4] fix formatting --- crates/integrations/datafusion/src/table.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/integrations/datafusion/src/table.rs b/crates/integrations/datafusion/src/table.rs index 88a73289b..bfc4a66da 100644 --- a/crates/integrations/datafusion/src/table.rs +++ b/crates/integrations/datafusion/src/table.rs @@ -113,11 +113,12 @@ impl TableProvider for IcebergTableProvider { #[cfg(test)] mod tests { - use super::*; use iceberg::io::FileIO; use iceberg::table::{StaticTable, Table}; use iceberg::TableIdent; + use super::*; + async fn get_test_table_from_metadata_file() -> Table { let metadata_file_name = "TableMetadataV2Valid.json"; let metadata_file_path = format!( From 4e8a76a11bf590d4efe683ab02140a9e5383bcb4 Mon Sep 17 00:00:00 2001 From: Alon Agmon Date: Fri, 27 Sep 2024 06:51:00 +0300 Subject: [PATCH 3/4] fixing yet another formatting issue --- crates/integrations/datafusion/src/table.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/integrations/datafusion/src/table.rs b/crates/integrations/datafusion/src/table.rs index bfc4a66da..570d99f79 100644 --- a/crates/integrations/datafusion/src/table.rs +++ b/crates/integrations/datafusion/src/table.rs @@ -118,7 +118,7 @@ mod tests { use iceberg::TableIdent; use super::*; - + async fn get_test_table_from_metadata_file() -> Table { let metadata_file_name = "TableMetadataV2Valid.json"; let metadata_file_path = format!( From f51314eb2bfaee909cc61b4a11478e8f05c40b68 Mon Sep 17 00:00:00 2001 From: Alon Agmon Date: Fri, 27 Sep 2024 17:29:20 +0300 Subject: [PATCH 4/4] testing schema using data fusion --- crates/integrations/datafusion/src/table.rs | 22 +++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/crates/integrations/datafusion/src/table.rs b/crates/integrations/datafusion/src/table.rs index 570d99f79..2797e12d6 100644 --- a/crates/integrations/datafusion/src/table.rs +++ b/crates/integrations/datafusion/src/table.rs @@ -113,6 +113,8 @@ impl TableProvider for IcebergTableProvider { #[cfg(test)] mod tests { + use datafusion::common::Column; + use datafusion::prelude::SessionContext; use iceberg::io::FileIO; use iceberg::table::{StaticTable, Table}; use iceberg::TableIdent; @@ -144,9 +146,21 @@ mod tests { let table_provider = IcebergTableProvider::try_new_from_table(table.clone()) .await .unwrap(); - let index_of_z = table_provider.schema().index_of("z").unwrap(); - assert_eq!(index_of_z, 2); - let fields_num = table_provider.schema().fields().len(); - assert_eq!(fields_num, 3); + let ctx = SessionContext::new(); + ctx.register_table("mytable", Arc::new(table_provider)) + .unwrap(); + let df = ctx.sql("SELECT * FROM mytable").await.unwrap(); + let df_schema = df.schema(); + let df_columns = df_schema.fields(); + assert_eq!(df_columns.len(), 3); + let x_column = df_columns.first().unwrap(); + let column_data = format!( + "{:?}:{:?}", + x_column.name(), + x_column.data_type().to_string() + ); + assert_eq!(column_data, "\"x\":\"Int64\""); + let has_column = df_schema.has_column(&Column::from_name("z")); + assert!(has_column); } }