Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 12 additions & 16 deletions crates/iceberg/src/spec/table_metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,10 @@ pub use super::table_metadata_builder::{TableMetadataBuildResult, TableMetadataB
use super::{
DEFAULT_PARTITION_SPEC_ID, PartitionSpecRef, PartitionStatisticsFile, SchemaId, SchemaRef,
SnapshotRef, SnapshotRetention, SortOrder, SortOrderRef, StatisticsFile, StructType,
TableProperties,
};
use crate::error::{Result, timestamp_ms_to_utc};
use crate::io::FileIO;
use crate::spec::EncryptedKey;
use crate::spec::{EncryptedKey, TableProperties};
use crate::{Error, ErrorKind};

static MAIN_BRANCH: &str = "main";
Expand Down Expand Up @@ -91,7 +90,7 @@ pub struct TableMetadata {
///A string to string map of table properties. This is used to control settings that
/// affect reading and writing and is not intended to be used for arbitrary metadata.
/// For example, commit.retry.num-retries is used to control the number of commit retries.
pub(crate) properties: HashMap<String, String>,
pub(crate) properties: TableProperties,
/// long ID of the current table snapshot; must be the same as the current
/// ID of the main branch in refs.
pub(crate) current_snapshot_id: Option<i64>,
Expand Down Expand Up @@ -358,16 +357,13 @@ impl TableMetadata {
/// Returns properties of table.
#[inline]
pub fn properties(&self) -> &HashMap<String, String> {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should remove this method.

&self.properties
&self.properties.other
}

/// Returns typed table properties parsed from the raw properties map with defaults.
pub fn table_properties(&self) -> Result<TableProperties> {
TableProperties::try_from(&self.properties).map_err(|e| {
Error::new(ErrorKind::DataInvalid, "Invalid table properties").with_source(e)
})
pub fn table_properties(&self) -> &TableProperties {
&self.properties
}

/// Return location of statistics files.
#[inline]
pub fn statistics_iter(&self) -> impl ExactSizeIterator<Item = &StatisticsFile> {
Expand Down Expand Up @@ -969,7 +965,7 @@ pub(super) mod _serde {
default_partition_type,
default_spec,
last_partition_id: value.last_partition_id,
properties: value.properties.unwrap_or_default(),
properties: crate::spec::TableProperties::new(value.properties.unwrap_or_default()),
current_snapshot_id,
snapshots: snapshots
.map(|snapshots| {
Expand Down Expand Up @@ -1082,7 +1078,7 @@ pub(super) mod _serde {
default_partition_type,
default_spec,
last_partition_id: value.last_partition_id,
properties: value.properties.unwrap_or_default(),
properties: crate::spec::TableProperties::new(value.properties.unwrap_or_default()),
current_snapshot_id,
snapshots: snapshots
.map(|snapshots| {
Expand Down Expand Up @@ -1234,7 +1230,7 @@ pub(super) mod _serde {
.unwrap_or_else(|| partition_specs.keys().copied().max().unwrap_or_default()),
partition_specs,
schemas,
properties: value.properties.unwrap_or_default(),
properties: crate::spec::TableProperties::new(value.properties.unwrap_or_default()),
current_snapshot_id,
snapshots: value
.snapshots
Expand Down Expand Up @@ -1361,10 +1357,10 @@ pub(super) mod _serde {
.collect(),
default_spec_id: v.default_spec.spec_id(),
last_partition_id: v.last_partition_id,
properties: if v.properties.is_empty() {
properties: if v.properties.other.is_empty() {
None
} else {
Some(v.properties)
Some(v.properties.other.clone())
},
current_snapshot_id: v.current_snapshot_id,
snapshot_log: if v.snapshot_log.is_empty() {
Expand Down Expand Up @@ -1430,10 +1426,10 @@ pub(super) mod _serde {
),
default_spec_id: Some(v.default_spec.spec_id()),
last_partition_id: Some(v.last_partition_id),
properties: if v.properties.is_empty() {
properties: if v.properties.other.is_empty() {
None
} else {
Some(v.properties)
Some(v.properties.other.clone())
},
current_snapshot_id: v.current_snapshot_id,
snapshots: if v.snapshots.is_empty() {
Expand Down
7 changes: 4 additions & 3 deletions crates/iceberg/src/spec/table_metadata_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ impl TableMetadataBuilder {
), // Overwritten immediately by add_default_partition_spec
default_partition_type: StructType::new(vec![]),
last_partition_id: UNPARTITIONED_LAST_ASSIGNED_ID,
properties: HashMap::new(),
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in TableMetadataBuilder, we should still use HashMap<String, String> given it's a mutable api. We could add an extra field in TableMetadataBuilder.

properties: TableProperties::default(),
current_snapshot_id: None,
snapshots: HashMap::new(),
snapshot_log: vec![],
Expand Down Expand Up @@ -272,7 +272,7 @@ impl TableMetadataBuilder {
return Ok(self);
}

self.metadata.properties.extend(properties.clone());
self.metadata.properties.other.extend(properties.clone());
self.changes.push(TableUpdate::SetProperties {
updates: properties,
});
Expand Down Expand Up @@ -307,7 +307,7 @@ impl TableMetadataBuilder {
}

for property in &properties {
self.metadata.properties.remove(property);
self.metadata.properties.other.remove(property);
}

if !properties.is_empty() {
Expand Down Expand Up @@ -1128,6 +1128,7 @@ impl TableMetadataBuilder {
let max_size = self
.metadata
.properties
.other
.get(TableProperties::PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX)
.and_then(|v| v.parse::<usize>().ok())
.unwrap_or(TableProperties::PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT)
Expand Down
10 changes: 9 additions & 1 deletion crates/iceberg/src/spec/table_properties.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ where
}

/// TableProperties that contains the properties of a table.
#[derive(Debug)]
#[derive(Debug, Clone, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have dedicated ser/de for TableMetadata, we should not add it here.

pub struct TableProperties {
/// The number of times to retry a commit.
pub commit_num_retries: usize,
Expand All @@ -51,6 +51,8 @@ pub struct TableProperties {
pub write_target_file_size_bytes: usize,
/// Whether to use `FanoutWriter` for partitioned tables.
pub write_datafusion_fanout_enabled: bool,
/// Any other properties that are not explicitly captured in named fields.
pub other: HashMap<String, String>,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This field name is not very descriptive to what it actually holds

}

impl TableProperties {
Copy link

Copilot AI Jan 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Making the other field public exposes internal implementation details and allows external code to directly modify properties, potentially causing inconsistencies. Consider making this field private and providing accessor methods if needed.

Suggested change
pub other: HashMap<String, String>,
}
impl TableProperties {
other: HashMap<String, String>,
}
impl TableProperties {
/// Returns a reference to the map of additional, non-standard table properties.
pub fn other(&self) -> &HashMap<String, String> {
&self.other
}
/// Returns a mutable reference to the map of additional, non-standard table properties.
pub fn other_mut(&mut self) -> &mut HashMap<String, String> {
&mut self.other
}

Copilot uses AI. Check for mistakes.
Expand All @@ -68,6 +70,11 @@ impl TableProperties {
pub const PROPERTY_UUID: &str = "uuid";
/// Reserved table property for the total number of snapshots.
pub const PROPERTY_SNAPSHOT_COUNT: &str = "snapshot-count";
/// Creates a new TableProperties from a HashMap of raw strings.
pub fn new(props: HashMap<String, String>) -> Self {
Self::try_from(&props).unwrap_or_default()
Comment on lines +75 to +76
Copy link

Copilot AI Jan 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new() method silently swallows errors via unwrap_or_default(), which could hide parsing failures for invalid property values. This differs from the original behavior where table_properties() returned a Result. Consider returning Result<Self, anyhow::Error> to preserve error handling capabilities, or at minimum document that invalid values fall back to defaults.

Suggested change
pub fn new(props: HashMap<String, String>) -> Self {
Self::try_from(&props).unwrap_or_default()
pub fn new(props: HashMap<String, String>) -> Result<Self, anyhow::Error> {
Self::try_from(&props)

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should not unwrap, just implement TryFrom.

}

/// Reserved table property for current snapshot summary.
pub const PROPERTY_CURRENT_SNAPSHOT_SUMMARY: &str = "current-snapshot-summary";
/// Reserved table property for current snapshot id.
Expand Down Expand Up @@ -187,6 +194,7 @@ impl TryFrom<&HashMap<String, String>> for TableProperties {
TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED,
TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED_DEFAULT,
)?,
other: props.clone(),
Copy link

Copilot AI Jan 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The other field is set to a full clone of the input props HashMap, but this includes all properties (even the ones that were already parsed into struct fields). This creates unnecessary duplication and memory overhead. Consider filtering out the explicitly parsed properties before storing in other.

Copilot uses AI. Check for mistakes.
})
}
}
Expand Down
4 changes: 2 additions & 2 deletions crates/iceberg/src/transaction/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,9 @@ impl Transaction {
return Ok(self.table);
}

let table_props = self.table.metadata().table_properties()?;
let table_props = self.table.metadata().table_properties();

let backoff = Self::build_backoff(table_props)?;
let backoff = Self::build_backoff(table_props.clone())?;
Copy link

Copilot AI Jan 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This clones the entire TableProperties struct (including the potentially large other HashMap) just to pass it to build_backoff(). Since build_backoff() only reads a few fields, passing a reference would be more efficient.

Suggested change
let backoff = Self::build_backoff(table_props.clone())?;
let backoff = Self::build_backoff(&table_props)?;

Copilot uses AI. Check for mistakes.
let tx = self;

(|mut tx: Transaction| async {
Expand Down
Loading