Skip to content

Commit 2c9d7e4

Browse files
committed
New builder
1 parent fea1817 commit 2c9d7e4

File tree

11 files changed

+2014
-106
lines changed

11 files changed

+2014
-106
lines changed

crates/catalog/glue/src/catalog.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,9 @@ impl Catalog for GlueCatalog {
355355
}
356356
};
357357

358-
let metadata = TableMetadataBuilder::from_table_creation(creation)?.build()?;
358+
let metadata = TableMetadataBuilder::from_table_creation(creation)?
359+
.build()?
360+
.metadata;
359361
let metadata_location = create_metadata_location(&location, 0)?;
360362

361363
self.file_io

crates/catalog/glue/src/schema.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,9 @@ mod tests {
198198
.location("my_location".to_string())
199199
.schema(schema)
200200
.build();
201-
let metadata = TableMetadataBuilder::from_table_creation(table_creation)?.build()?;
201+
let metadata = TableMetadataBuilder::from_table_creation(table_creation)?
202+
.build()?
203+
.metadata;
202204

203205
Ok(metadata)
204206
}

crates/catalog/glue/src/utils.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,9 @@ mod tests {
299299
.location("my_location".to_string())
300300
.schema(schema)
301301
.build();
302-
let metadata = TableMetadataBuilder::from_table_creation(table_creation)?.build()?;
302+
let metadata = TableMetadataBuilder::from_table_creation(table_creation)?
303+
.build()?
304+
.metadata;
303305

304306
Ok(metadata)
305307
}

crates/catalog/hms/src/catalog.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,9 @@ impl Catalog for HmsCatalog {
346346
}
347347
};
348348

349-
let metadata = TableMetadataBuilder::from_table_creation(creation)?.build()?;
349+
let metadata = TableMetadataBuilder::from_table_creation(creation)?
350+
.build()?
351+
.metadata;
350352
let metadata_location = create_metadata_location(&location, 0)?;
351353

352354
self.file_io

crates/catalog/memory/src/catalog.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,9 @@ impl Catalog for MemoryCatalog {
194194
}
195195
};
196196

197-
let metadata = TableMetadataBuilder::from_table_creation(table_creation)?.build()?;
197+
let metadata = TableMetadataBuilder::from_table_creation(table_creation)?
198+
.build()?
199+
.metadata;
198200
let metadata_location = format!(
199201
"{}/metadata/{}-{}.metadata.json",
200202
&location,

crates/iceberg/src/catalog/mod.rs

+48-5
Original file line numberDiff line numberDiff line change
@@ -445,8 +445,46 @@ impl TableUpdate {
445445
/// Applies the update to the table metadata builder.
446446
pub fn apply(self, builder: TableMetadataBuilder) -> Result<TableMetadataBuilder> {
447447
match self {
448-
TableUpdate::AssignUuid { uuid } => builder.assign_uuid(uuid),
449-
_ => unimplemented!(),
448+
TableUpdate::AssignUuid { uuid } => Ok(builder.assign_uuid(uuid)),
449+
TableUpdate::AddSchema {
450+
schema,
451+
last_column_id,
452+
} => {
453+
if let Some(last_column_id) = last_column_id {
454+
if builder.last_column_id() < last_column_id {
455+
return Err(Error::new(
456+
ErrorKind::DataInvalid,
457+
format!(
458+
"Invalid last column ID: {last_column_id} < {} (previous last column ID)",
459+
builder.last_column_id()
460+
),
461+
));
462+
}
463+
};
464+
Ok(builder.add_schema(schema))
465+
}
466+
TableUpdate::SetCurrentSchema { schema_id } => builder.set_current_schema(schema_id),
467+
TableUpdate::AddSpec { spec } => builder.add_partition_spec(spec),
468+
TableUpdate::SetDefaultSpec { spec_id } => builder.set_default_partition_spec(spec_id),
469+
TableUpdate::AddSortOrder { sort_order } => builder.add_sort_order(sort_order),
470+
TableUpdate::SetDefaultSortOrder { sort_order_id } => {
471+
builder.set_default_sort_order(sort_order_id)
472+
}
473+
TableUpdate::AddSnapshot { snapshot } => builder.add_snapshot(snapshot),
474+
TableUpdate::SetSnapshotRef {
475+
ref_name,
476+
reference,
477+
} => builder.set_ref(&ref_name, reference),
478+
TableUpdate::RemoveSnapshots { snapshot_ids } => {
479+
Ok(builder.remove_snapshots(&snapshot_ids))
480+
}
481+
TableUpdate::RemoveSnapshotRef { ref_name } => Ok(builder.remove_ref(&ref_name)),
482+
TableUpdate::SetLocation { location } => Ok(builder.set_location(location)),
483+
TableUpdate::SetProperties { updates } => builder.set_properties(updates),
484+
TableUpdate::RemoveProperties { removals } => Ok(builder.remove_properties(&removals)),
485+
TableUpdate::UpgradeFormatVersion { format_version } => {
486+
builder.upgrade_format_version(format_version)
487+
}
450488
}
451489
}
452490
}
@@ -1125,16 +1163,21 @@ mod tests {
11251163
let table_metadata = TableMetadataBuilder::from_table_creation(table_creation)
11261164
.unwrap()
11271165
.build()
1128-
.unwrap();
1129-
let table_metadata_builder = TableMetadataBuilder::new(table_metadata);
1166+
.unwrap()
1167+
.metadata;
1168+
let table_metadata_builder = TableMetadataBuilder::new_from_metadata(
1169+
table_metadata,
1170+
"s3://db/table/metadata/metadata1.gz.json",
1171+
);
11301172

11311173
let uuid = uuid::Uuid::new_v4();
11321174
let update = TableUpdate::AssignUuid { uuid };
11331175
let updated_metadata = update
11341176
.apply(table_metadata_builder)
11351177
.unwrap()
11361178
.build()
1137-
.unwrap();
1179+
.unwrap()
1180+
.metadata;
11381181
assert_eq!(updated_metadata.uuid(), uuid);
11391182
}
11401183
}

crates/iceberg/src/spec/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ mod schema;
2525
mod snapshot;
2626
mod sort;
2727
mod table_metadata;
28+
mod table_metadata_builder;
2829
mod transform;
2930
mod values;
3031
mod view_metadata;

crates/iceberg/src/spec/partition.rs

+5
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,11 @@ impl PartitionSpec {
171171
pub fn partition_type(&self) -> &StructType {
172172
&self.partition_type
173173
}
174+
175+
/// Set the spec id for the partition spec.
176+
pub(crate) fn with_spec_id(self, spec_id: i32) -> Self {
177+
Self { spec_id, ..self }
178+
}
174179
}
175180

176181
impl SchemalessPartitionSpec {

crates/iceberg/src/spec/schema.rs

+18
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,24 @@ impl Schema {
376376
pub fn accessor_by_field_id(&self, field_id: i32) -> Option<Arc<StructAccessor>> {
377377
self.field_id_to_accessor.get(&field_id).cloned()
378378
}
379+
380+
/// Check if this schema is identical to another schema semantically - excluding schema id.
381+
pub(crate) fn is_same_schema(&self, other: &SchemaRef) -> bool {
382+
self.as_struct().eq(other.as_struct())
383+
&& self.identifier_field_ids().eq(other.identifier_field_ids())
384+
}
385+
386+
/// Change the schema id of this schema.
387+
// This is redundant with the `with_schema_id` method on the builder, but useful
388+
// as it is infallible in contrast to the builder `build()` method.
389+
pub(crate) fn with_schema_id(self, schema_id: SchemaId) -> Self {
390+
Self { schema_id, ..self }
391+
}
392+
393+
/// Return A HashMap matching field ids to field names.
394+
pub(crate) fn field_id_to_name_map(&self) -> &HashMap<i32, String> {
395+
&self.id_to_name
396+
}
379397
}
380398

381399
impl Display for Schema {

crates/iceberg/src/spec/table_metadata.rs

+15-96
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,13 @@ use serde_repr::{Deserialize_repr, Serialize_repr};
3030
use uuid::Uuid;
3131

3232
use super::snapshot::SnapshotReference;
33+
pub use super::table_metadata_builder::TableMetadataBuilder;
3334
use super::{
34-
PartitionSpec, PartitionSpecRef, SchemaId, SchemaRef, SchemalessPartitionSpecRef, Snapshot,
35-
SnapshotRef, SnapshotRetention, SortOrder, SortOrderRef, DEFAULT_PARTITION_SPEC_ID,
35+
PartitionSpecRef, SchemaId, SchemaRef, SchemalessPartitionSpecRef, Snapshot, SnapshotRef,
36+
SnapshotRetention, SortOrder, SortOrderRef, DEFAULT_PARTITION_SPEC_ID,
3637
};
3738
use crate::error::{timestamp_ms_to_utc, Result};
38-
use crate::{Error, ErrorKind, TableCreation};
39+
use crate::{Error, ErrorKind};
3940

4041
static MAIN_BRANCH: &str = "main";
4142
pub(crate) static ONE_MINUTE_MS: i64 = 60_000;
@@ -165,6 +166,15 @@ pub struct TableMetadata {
165166
}
166167

167168
impl TableMetadata {
169+
/// Convert this Table Metadata into a builder for modification.
170+
///
171+
/// `current_file_location` is the location where the current version
172+
/// of the metadata file is stored. This is used to update the metadata log.
173+
#[must_use]
174+
pub fn into_builder(self, current_file_location: impl Into<String>) -> TableMetadataBuilder {
175+
TableMetadataBuilder::new_from_metadata(self, current_file_location)
176+
}
177+
168178
/// Returns format version of this metadata.
169179
#[inline]
170180
pub fn format_version(&self) -> FormatVersion {
@@ -539,98 +549,6 @@ impl TableMetadata {
539549
}
540550
}
541551

542-
/// Manipulating table metadata.
543-
pub struct TableMetadataBuilder(TableMetadata);
544-
545-
impl TableMetadataBuilder {
546-
/// Creates a new table metadata builder from the given table metadata.
547-
pub fn new(origin: TableMetadata) -> Self {
548-
Self(origin)
549-
}
550-
551-
/// Creates a new table metadata builder from the given table creation.
552-
pub fn from_table_creation(table_creation: TableCreation) -> Result<Self> {
553-
let TableCreation {
554-
name: _,
555-
location,
556-
schema,
557-
partition_spec,
558-
sort_order,
559-
properties,
560-
} = table_creation;
561-
562-
let schema: Arc<super::Schema> = Arc::new(schema);
563-
let unpartition_spec = PartitionSpec::unpartition_spec(schema.clone());
564-
let partition_specs = match partition_spec {
565-
Some(_) => {
566-
return Err(Error::new(
567-
ErrorKind::FeatureUnsupported,
568-
"Can't create table with partition spec now",
569-
))
570-
}
571-
None => HashMap::from([(
572-
unpartition_spec.spec_id(),
573-
Arc::new(unpartition_spec.clone().into_schemaless()),
574-
)]),
575-
};
576-
577-
let sort_orders = match sort_order {
578-
Some(_) => {
579-
return Err(Error::new(
580-
ErrorKind::FeatureUnsupported,
581-
"Can't create table with sort order now",
582-
))
583-
}
584-
None => HashMap::from([(
585-
SortOrder::UNSORTED_ORDER_ID,
586-
Arc::new(SortOrder::unsorted_order()),
587-
)]),
588-
};
589-
590-
let mut table_metadata = TableMetadata {
591-
format_version: FormatVersion::V2,
592-
table_uuid: Uuid::now_v7(),
593-
location: location.ok_or_else(|| {
594-
Error::new(
595-
ErrorKind::DataInvalid,
596-
"Can't create table without location",
597-
)
598-
})?,
599-
last_sequence_number: 0,
600-
last_updated_ms: Utc::now().timestamp_millis(),
601-
last_column_id: schema.highest_field_id(),
602-
current_schema_id: schema.schema_id(),
603-
schemas: HashMap::from([(schema.schema_id(), schema)]),
604-
partition_specs,
605-
default_spec: PartitionSpecRef::new(unpartition_spec),
606-
last_partition_id: 0,
607-
properties,
608-
current_snapshot_id: None,
609-
snapshots: Default::default(),
610-
snapshot_log: vec![],
611-
sort_orders,
612-
metadata_log: vec![],
613-
default_sort_order_id: SortOrder::UNSORTED_ORDER_ID,
614-
refs: Default::default(),
615-
};
616-
617-
table_metadata.try_normalize()?;
618-
619-
Ok(Self(table_metadata))
620-
}
621-
622-
/// Changes uuid of table metadata.
623-
pub fn assign_uuid(mut self, uuid: Uuid) -> Result<Self> {
624-
self.0.table_uuid = uuid;
625-
Ok(self)
626-
}
627-
628-
/// Returns the new table metadata after changes.
629-
pub fn build(self) -> Result<TableMetadata> {
630-
Ok(self.0)
631-
}
632-
}
633-
634552
pub(super) mod _serde {
635553
use std::borrow::BorrowMut;
636554
/// This is a helper module that defines types to help with serialization/deserialization.
@@ -2308,7 +2226,8 @@ mod tests {
23082226
let table_metadata = TableMetadataBuilder::from_table_creation(table_creation)
23092227
.unwrap()
23102228
.build()
2311-
.unwrap();
2229+
.unwrap()
2230+
.metadata;
23122231
assert_eq!(table_metadata.location, "s3://db/table");
23132232
assert_eq!(table_metadata.schemas.len(), 1);
23142233
assert_eq!(

0 commit comments

Comments
 (0)