Skip to content

Commit 12e12e2

Browse files
authored
feat: expose arrow type <-> iceberg type (apache#637)
* feat: expose arrow type <-> iceberg type Previously we only exposed the schema conversion. Signed-off-by: xxchan <[email protected]> * add tests Signed-off-by: xxchan <[email protected]> --------- Signed-off-by: xxchan <[email protected]>
1 parent 3b27c9e commit 12e12e2

File tree

1 file changed

+102
-6
lines changed

1 file changed

+102
-6
lines changed

crates/iceberg/src/arrow/schema.rs

+102-6
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,6 @@ fn visit_type<V: ArrowSchemaVisitor>(r#type: &DataType, visitor: &mut V) -> Resu
171171
}
172172

173173
/// Visit list types in post order.
174-
#[allow(dead_code)]
175174
fn visit_list<V: ArrowSchemaVisitor>(
176175
data_type: &DataType,
177176
element_field: &Field,
@@ -184,7 +183,6 @@ fn visit_list<V: ArrowSchemaVisitor>(
184183
}
185184

186185
/// Visit struct type in post order.
187-
#[allow(dead_code)]
188186
fn visit_struct<V: ArrowSchemaVisitor>(fields: &Fields, visitor: &mut V) -> Result<V::T> {
189187
let mut results = Vec::with_capacity(fields.len());
190188
for field in fields {
@@ -198,7 +196,6 @@ fn visit_struct<V: ArrowSchemaVisitor>(fields: &Fields, visitor: &mut V) -> Resu
198196
}
199197

200198
/// Visit schema in post order.
201-
#[allow(dead_code)]
202199
fn visit_schema<V: ArrowSchemaVisitor>(schema: &ArrowSchema, visitor: &mut V) -> Result<V::U> {
203200
let mut results = Vec::with_capacity(schema.fields().len());
204201
for field in schema.fields() {
@@ -211,12 +208,17 @@ fn visit_schema<V: ArrowSchemaVisitor>(schema: &ArrowSchema, visitor: &mut V) ->
211208
}
212209

213210
/// Convert Arrow schema to ceberg schema.
214-
#[allow(dead_code)]
215211
pub fn arrow_schema_to_schema(schema: &ArrowSchema) -> Result<Schema> {
216212
let mut visitor = ArrowSchemaConverter::new();
217213
visit_schema(schema, &mut visitor)
218214
}
219215

216+
/// Convert Arrow type to iceberg type.
217+
pub fn arrow_type_to_type(ty: &DataType) -> Result<Type> {
218+
let mut visitor = ArrowSchemaConverter::new();
219+
visit_type(ty, &mut visitor)
220+
}
221+
220222
const ARROW_FIELD_DOC_KEY: &str = "doc";
221223

222224
fn get_field_id(field: &Field) -> Result<i32> {
@@ -246,7 +248,6 @@ fn get_field_doc(field: &Field) -> Option<String> {
246248
struct ArrowSchemaConverter;
247249

248250
impl ArrowSchemaConverter {
249-
#[allow(dead_code)]
250251
fn new() -> Self {
251252
Self {}
252253
}
@@ -615,6 +616,15 @@ pub fn schema_to_arrow_schema(schema: &crate::spec::Schema) -> crate::Result<Arr
615616
}
616617
}
617618

619+
/// Convert iceberg type to an arrow type.
620+
pub fn type_to_arrow_type(ty: &crate::spec::Type) -> crate::Result<DataType> {
621+
let mut converter = ToArrowSchemaConverter;
622+
match crate::spec::visit_type(ty, &mut converter)? {
623+
ArrowSchemaOrFieldOrType::Type(ty) => Ok(ty),
624+
_ => unreachable!(),
625+
}
626+
}
627+
618628
/// Convert Iceberg Datum to Arrow Datum.
619629
pub(crate) fn get_arrow_datum(datum: &Datum) -> Result<Box<dyn ArrowDatum + Send>> {
620630
match (datum.data_type(), datum.literal()) {
@@ -779,7 +789,7 @@ mod tests {
779789
use arrow_schema::{DataType, Field, Schema as ArrowSchema, TimeUnit};
780790

781791
use super::*;
782-
use crate::spec::Schema;
792+
use crate::spec::{Literal, Schema};
783793

784794
/// Create a simple field with metadata.
785795
fn simple_field(name: &str, ty: DataType, nullable: bool, value: &str) -> Field {
@@ -1365,4 +1375,90 @@ mod tests {
13651375
let converted_arrow_schema = schema_to_arrow_schema(&schema).unwrap();
13661376
assert_eq!(converted_arrow_schema, arrow_schema);
13671377
}
1378+
1379+
#[test]
1380+
fn test_type_conversion() {
1381+
// test primitive type
1382+
{
1383+
let arrow_type = DataType::Int32;
1384+
let iceberg_type = Type::Primitive(PrimitiveType::Int);
1385+
assert_eq!(arrow_type, type_to_arrow_type(&iceberg_type).unwrap());
1386+
assert_eq!(iceberg_type, arrow_type_to_type(&arrow_type).unwrap());
1387+
}
1388+
1389+
// test struct type
1390+
{
1391+
// no metadata will cause error
1392+
let arrow_type = DataType::Struct(Fields::from(vec![
1393+
Field::new("a", DataType::Int64, false),
1394+
Field::new("b", DataType::Utf8, true),
1395+
]));
1396+
assert_eq!(
1397+
&arrow_type_to_type(&arrow_type).unwrap_err().to_string(),
1398+
"DataInvalid => Field id not found in metadata"
1399+
);
1400+
1401+
let arrow_type = DataType::Struct(Fields::from(vec![
1402+
Field::new("a", DataType::Int64, false).with_metadata(HashMap::from_iter([(
1403+
PARQUET_FIELD_ID_META_KEY.to_string(),
1404+
1.to_string(),
1405+
)])),
1406+
Field::new("b", DataType::Utf8, true).with_metadata(HashMap::from_iter([(
1407+
PARQUET_FIELD_ID_META_KEY.to_string(),
1408+
2.to_string(),
1409+
)])),
1410+
]));
1411+
let iceberg_type = Type::Struct(StructType::new(vec![
1412+
NestedField {
1413+
id: 1,
1414+
doc: None,
1415+
name: "a".to_string(),
1416+
required: true,
1417+
field_type: Box::new(Type::Primitive(PrimitiveType::Long)),
1418+
initial_default: None,
1419+
write_default: None,
1420+
}
1421+
.into(),
1422+
NestedField {
1423+
id: 2,
1424+
doc: None,
1425+
name: "b".to_string(),
1426+
required: false,
1427+
field_type: Box::new(Type::Primitive(PrimitiveType::String)),
1428+
initial_default: None,
1429+
write_default: None,
1430+
}
1431+
.into(),
1432+
]));
1433+
assert_eq!(iceberg_type, arrow_type_to_type(&arrow_type).unwrap());
1434+
assert_eq!(arrow_type, type_to_arrow_type(&iceberg_type).unwrap());
1435+
1436+
// initial_default and write_default is ignored
1437+
let iceberg_type = Type::Struct(StructType::new(vec![
1438+
NestedField {
1439+
id: 1,
1440+
doc: None,
1441+
name: "a".to_string(),
1442+
required: true,
1443+
field_type: Box::new(Type::Primitive(PrimitiveType::Long)),
1444+
initial_default: Some(Literal::Primitive(PrimitiveLiteral::Int(114514))),
1445+
write_default: None,
1446+
}
1447+
.into(),
1448+
NestedField {
1449+
id: 2,
1450+
doc: None,
1451+
name: "b".to_string(),
1452+
required: false,
1453+
field_type: Box::new(Type::Primitive(PrimitiveType::String)),
1454+
initial_default: None,
1455+
write_default: Some(Literal::Primitive(PrimitiveLiteral::String(
1456+
"514".to_string(),
1457+
))),
1458+
}
1459+
.into(),
1460+
]));
1461+
assert_eq!(arrow_type, type_to_arrow_type(&iceberg_type).unwrap());
1462+
}
1463+
}
13681464
}

0 commit comments

Comments
 (0)