Skip to content

Commit a6a3fd7

Browse files
a-agmonAlon Agmon
and
Alon Agmon
authored
test (datafusion): add test for table provider creation (apache#651)
* add test for table provider creation * fix formatting * fixing yet another formatting issue * testing schema using data fusion --------- Co-authored-by: Alon Agmon <[email protected]>
1 parent 87483b4 commit a6a3fd7

File tree

2 files changed

+176
-0
lines changed

2 files changed

+176
-0
lines changed

crates/integrations/datafusion/src/table.rs

+54
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,57 @@ impl TableProvider for IcebergTableProvider {
110110
Ok(filter_support)
111111
}
112112
}
113+
114+
#[cfg(test)]
115+
mod tests {
116+
use datafusion::common::Column;
117+
use datafusion::prelude::SessionContext;
118+
use iceberg::io::FileIO;
119+
use iceberg::table::{StaticTable, Table};
120+
use iceberg::TableIdent;
121+
122+
use super::*;
123+
124+
async fn get_test_table_from_metadata_file() -> Table {
125+
let metadata_file_name = "TableMetadataV2Valid.json";
126+
let metadata_file_path = format!(
127+
"{}/tests/test_data/{}",
128+
env!("CARGO_MANIFEST_DIR"),
129+
metadata_file_name
130+
);
131+
let file_io = FileIO::from_path(&metadata_file_path)
132+
.unwrap()
133+
.build()
134+
.unwrap();
135+
let static_identifier = TableIdent::from_strs(["static_ns", "static_table"]).unwrap();
136+
let static_table =
137+
StaticTable::from_metadata_file(&metadata_file_path, static_identifier, file_io)
138+
.await
139+
.unwrap();
140+
static_table.into_table()
141+
}
142+
143+
#[tokio::test]
144+
async fn test_try_new_from_table() {
145+
let table = get_test_table_from_metadata_file().await;
146+
let table_provider = IcebergTableProvider::try_new_from_table(table.clone())
147+
.await
148+
.unwrap();
149+
let ctx = SessionContext::new();
150+
ctx.register_table("mytable", Arc::new(table_provider))
151+
.unwrap();
152+
let df = ctx.sql("SELECT * FROM mytable").await.unwrap();
153+
let df_schema = df.schema();
154+
let df_columns = df_schema.fields();
155+
assert_eq!(df_columns.len(), 3);
156+
let x_column = df_columns.first().unwrap();
157+
let column_data = format!(
158+
"{:?}:{:?}",
159+
x_column.name(),
160+
x_column.data_type().to_string()
161+
);
162+
assert_eq!(column_data, "\"x\":\"Int64\"");
163+
let has_column = df_schema.has_column(&Column::from_name("z"));
164+
assert!(has_column);
165+
}
166+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
{
2+
"format-version": 2,
3+
"table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
4+
"location": "s3://bucket/test/location",
5+
"last-sequence-number": 34,
6+
"last-updated-ms": 1602638573590,
7+
"last-column-id": 3,
8+
"current-schema-id": 1,
9+
"schemas": [
10+
{
11+
"type": "struct",
12+
"schema-id": 0,
13+
"fields": [
14+
{
15+
"id": 1,
16+
"name": "x",
17+
"required": true,
18+
"type": "long"
19+
}
20+
]
21+
},
22+
{
23+
"type": "struct",
24+
"schema-id": 1,
25+
"identifier-field-ids": [
26+
1,
27+
2
28+
],
29+
"fields": [
30+
{
31+
"id": 1,
32+
"name": "x",
33+
"required": true,
34+
"type": "long"
35+
},
36+
{
37+
"id": 2,
38+
"name": "y",
39+
"required": true,
40+
"type": "long",
41+
"doc": "comment"
42+
},
43+
{
44+
"id": 3,
45+
"name": "z",
46+
"required": true,
47+
"type": "long"
48+
}
49+
]
50+
}
51+
],
52+
"default-spec-id": 0,
53+
"partition-specs": [
54+
{
55+
"spec-id": 0,
56+
"fields": [
57+
{
58+
"name": "x",
59+
"transform": "identity",
60+
"source-id": 1,
61+
"field-id": 1000
62+
}
63+
]
64+
}
65+
],
66+
"last-partition-id": 1000,
67+
"default-sort-order-id": 3,
68+
"sort-orders": [
69+
{
70+
"order-id": 3,
71+
"fields": [
72+
{
73+
"transform": "identity",
74+
"source-id": 2,
75+
"direction": "asc",
76+
"null-order": "nulls-first"
77+
},
78+
{
79+
"transform": "bucket[4]",
80+
"source-id": 3,
81+
"direction": "desc",
82+
"null-order": "nulls-last"
83+
}
84+
]
85+
}
86+
],
87+
"properties": {},
88+
"current-snapshot-id": 3055729675574597004,
89+
"snapshots": [
90+
{
91+
"snapshot-id": 3051729675574597004,
92+
"timestamp-ms": 1515100955770,
93+
"sequence-number": 0,
94+
"summary": {
95+
"operation": "append"
96+
},
97+
"manifest-list": "s3://a/b/1.avro"
98+
},
99+
{
100+
"snapshot-id": 3055729675574597004,
101+
"parent-snapshot-id": 3051729675574597004,
102+
"timestamp-ms": 1555100955770,
103+
"sequence-number": 1,
104+
"summary": {
105+
"operation": "append"
106+
},
107+
"manifest-list": "s3://a/b/2.avro",
108+
"schema-id": 1
109+
}
110+
],
111+
"snapshot-log": [
112+
{
113+
"snapshot-id": 3051729675574597004,
114+
"timestamp-ms": 1515100955770
115+
},
116+
{
117+
"snapshot-id": 3055729675574597004,
118+
"timestamp-ms": 1555100955770
119+
}
120+
],
121+
"metadata-log": []
122+
}

0 commit comments

Comments
 (0)