Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bindings/python/src/data_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ impl PyDataFile {
}

#[getter]
fn equality_ids(&self) -> &[i32] {
fn equality_ids(&self) -> Option<Vec<i32>> {
self.inner.equality_ids()
}

Expand Down
2 changes: 1 addition & 1 deletion bindings/python/tests/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,5 +138,5 @@ def test_read_manifest_entry(generated_manifest_entry_file: str) -> None:
}
assert data_file.key_metadata is None
assert data_file.split_offsets == [4]
assert data_file.equality_ids == []
assert data_file.equality_ids is None
assert data_file.sort_order_id == 0
2 changes: 1 addition & 1 deletion crates/iceberg/src/arrow/caching_delete_file_loader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ impl CachingDeleteFileLoader {
)
.await?,
sender,
equality_ids: HashSet::from_iter(task.equality_ids.clone()),
equality_ids: HashSet::from_iter(task.equality_ids.clone().unwrap()),
})
}

Expand Down
6 changes: 3 additions & 3 deletions crates/iceberg/src/arrow/delete_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,21 +311,21 @@ pub(crate) mod tests {
file_path: format!("{}/pos-del-1.parquet", table_location.to_str().unwrap()),
file_type: DataContentType::PositionDeletes,
partition_spec_id: 0,
equality_ids: vec![],
equality_ids: None,
};

let pos_del_2 = FileScanTaskDeleteFile {
file_path: format!("{}/pos-del-2.parquet", table_location.to_str().unwrap()),
file_type: DataContentType::PositionDeletes,
partition_spec_id: 0,
equality_ids: vec![],
equality_ids: None,
};

let pos_del_3 = FileScanTaskDeleteFile {
file_path: format!("{}/pos-del-3.parquet", table_location.to_str().unwrap()),
file_type: DataContentType::PositionDeletes,
partition_spec_id: 0,
equality_ids: vec![],
equality_ids: None,
};

let file_scan_tasks = vec![
Expand Down
4 changes: 2 additions & 2 deletions crates/iceberg/src/expr/visitors/expression_evaluator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ mod tests {
upper_bounds: HashMap::new(),
key_metadata: None,
split_offsets: vec![],
equality_ids: vec![],
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand Down Expand Up @@ -375,7 +375,7 @@ mod tests {
upper_bounds: HashMap::new(),
key_metadata: None,
split_offsets: vec![],
equality_ids: vec![],
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand Down
12 changes: 6 additions & 6 deletions crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1996,7 +1996,7 @@ mod test {
upper_bounds: Default::default(),
key_metadata: None,
split_offsets: vec![],
equality_ids: vec![],
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand All @@ -2022,7 +2022,7 @@ mod test {
upper_bounds: Default::default(),
key_metadata: None,
split_offsets: vec![],
equality_ids: vec![],
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand Down Expand Up @@ -2084,7 +2084,7 @@ mod test {
column_sizes: Default::default(),
key_metadata: None,
split_offsets: vec![],
equality_ids: vec![],
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand Down Expand Up @@ -2115,7 +2115,7 @@ mod test {
column_sizes: Default::default(),
key_metadata: None,
split_offsets: vec![],
equality_ids: vec![],
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand Down Expand Up @@ -2147,7 +2147,7 @@ mod test {
column_sizes: Default::default(),
key_metadata: None,
split_offsets: vec![],
equality_ids: vec![],
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand Down Expand Up @@ -2179,7 +2179,7 @@ mod test {
column_sizes: Default::default(),
key_metadata: None,
split_offsets: vec![],
equality_ids: vec![],
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand Down
8 changes: 4 additions & 4 deletions crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,7 @@ mod test {
column_sizes: Default::default(),
key_metadata: None,
split_offsets: vec![],
equality_ids: vec![],
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand All @@ -605,7 +605,7 @@ mod test {
upper_bounds: Default::default(),
key_metadata: None,
split_offsets: vec![],
equality_ids: vec![],
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand All @@ -631,7 +631,7 @@ mod test {
column_sizes: Default::default(),
key_metadata: None,
split_offsets: vec![],
equality_ids: vec![],
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand All @@ -658,7 +658,7 @@ mod test {
column_sizes: Default::default(),
key_metadata: None,
split_offsets: vec![],
equality_ids: vec![],
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand Down
4 changes: 2 additions & 2 deletions crates/iceberg/src/scan/task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,6 @@ pub struct FileScanTaskDeleteFile {
/// partition id
pub partition_spec_id: i32,

/// equality ids for equality deletes (empty for positional deletes)
pub equality_ids: Vec<i32>,
/// equality ids for equality deletes (null for anything other than equality-deletes)
pub equality_ids: Option<Vec<i32>>,
}
17 changes: 7 additions & 10 deletions crates/iceberg/src/spec/manifest/_serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ pub(super) struct DataFileSerde {
upper_bounds: Option<Vec<BytesEntry>>,
key_metadata: Option<serde_bytes::ByteBuf>,
split_offsets: Option<Vec<i64>>,
#[serde(default)]
equality_ids: Option<Vec<i32>>,
sort_order_id: Option<i32>,
first_row_id: Option<i64>,
Expand Down Expand Up @@ -155,7 +154,7 @@ impl DataFileSerde {
upper_bounds: Some(to_bytes_entry(value.upper_bounds)?),
key_metadata: value.key_metadata.map(serde_bytes::ByteBuf::from),
split_offsets: Some(value.split_offsets),
equality_ids: Some(value.equality_ids),
equality_ids: value.equality_ids,
sort_order_id: value.sort_order_id,
first_row_id: value.first_row_id,
referenced_data_file: value.referenced_data_file,
Expand Down Expand Up @@ -224,7 +223,7 @@ impl DataFileSerde {
.unwrap_or_default(),
key_metadata: self.key_metadata.map(|v| v.to_vec()),
split_offsets: self.split_offsets.unwrap_or_default(),
equality_ids: self.equality_ids.unwrap_or_default(),
equality_ids: self.equality_ids,
sort_order_id: self.sort_order_id,
partition_spec_id,
first_row_id: self.first_row_id,
Expand Down Expand Up @@ -382,7 +381,7 @@ mod tests {
upper_bounds: HashMap::from([(1,Datum::int(1)),(2,Datum::string("a")),(3,Datum::string("AC/DC"))]),
key_metadata: None,
split_offsets: vec![4],
equality_ids: vec![],
equality_ids: None,
sort_order_id: Some(0),
partition_spec_id: 0,
first_row_id: None,
Expand Down Expand Up @@ -517,9 +516,8 @@ mod tests {
"DataFileSerde should convert content 0 to DataContentType::Data"
);
assert_eq!(
v2_entry.data_file.equality_ids,
Vec::<i32>::new(),
"DataFileSerde should convert None equality_ids to empty vec"
v2_entry.data_file.equality_ids, None,
"DataFileSerde should preserve None equality_ids as None"
);

// Verify other fields are preserved during conversion
Expand Down Expand Up @@ -581,9 +579,8 @@ mod tests {
"content 0 should convert to DataContentType::Data"
);
assert_eq!(
data_file.equality_ids,
Vec::<i32>::new(),
"None equality_ids should convert to empty vec via unwrap_or_default()"
data_file.equality_ids, None,
"None equality_ids should remain as None"
);

// Verify other fields are handled correctly during conversion
Expand Down
6 changes: 3 additions & 3 deletions crates/iceberg/src/spec/manifest/data_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ pub struct DataFile {
/// otherwise. Fields with ids listed in this column must be present
/// in the delete file
#[builder(default)]
pub(crate) equality_ids: Vec<i32>,
pub(crate) equality_ids: Option<Vec<i32>>,
/// field id: 140
///
/// ID representing sort order for this file.
Expand Down Expand Up @@ -249,8 +249,8 @@ impl DataFile {
/// Get the equality ids of the data file.
/// Field ids used to determine row equality in equality delete files.
/// null when content is not EqualityDeletes.
pub fn equality_ids(&self) -> &[i32] {
&self.equality_ids
pub fn equality_ids(&self) -> Option<Vec<i32>> {
self.equality_ids.clone()
}
/// Get the first row id in the data file.
pub fn first_row_id(&self) -> Option<i64> {
Expand Down
24 changes: 12 additions & 12 deletions crates/iceberg/src/spec/manifest/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ mod tests {
snapshot_id: None,
sequence_number: None,
file_sequence_number: None,
data_file: DataFile {content:DataContentType::Data,file_path:"s3a://icebergdata/demo/s1/t1/data/00000-0-ba56fbfa-f2ff-40c9-bb27-565ad6dc2be8-00000.parquet".to_string(),file_format:DataFileFormat::Parquet,partition:Struct::empty(),record_count:1,file_size_in_bytes:5442,column_sizes:HashMap::from([(0,73),(6,34),(2,73),(7,61),(3,61),(5,62),(9,79),(10,73),(1,61),(4,73),(8,73)]),value_counts:HashMap::from([(4,1),(5,1),(2,1),(0,1),(3,1),(6,1),(8,1),(1,1),(10,1),(7,1),(9,1)]),null_value_counts:HashMap::from([(1,0),(6,0),(2,0),(8,0),(0,0),(3,0),(5,0),(9,0),(7,0),(4,0),(10,0)]),nan_value_counts:HashMap::new(),lower_bounds:HashMap::new(),upper_bounds:HashMap::new(),key_metadata:None,split_offsets:vec![4],equality_ids:Vec::new(),sort_order_id:None, partition_spec_id: 0,first_row_id: None,referenced_data_file: None,content_offset: None,content_size_in_bytes: None }
data_file: DataFile {content:DataContentType::Data,file_path:"s3a://icebergdata/demo/s1/t1/data/00000-0-ba56fbfa-f2ff-40c9-bb27-565ad6dc2be8-00000.parquet".to_string(),file_format:DataFileFormat::Parquet,partition:Struct::empty(),record_count:1,file_size_in_bytes:5442,column_sizes:HashMap::from([(0,73),(6,34),(2,73),(7,61),(3,61),(5,62),(9,79),(10,73),(1,61),(4,73),(8,73)]),value_counts:HashMap::from([(4,1),(5,1),(2,1),(0,1),(3,1),(6,1),(8,1),(1,1),(10,1),(7,1),(9,1)]),null_value_counts:HashMap::from([(1,0),(6,0),(2,0),(8,0),(0,0),(3,0),(5,0),(9,0),(7,0),(4,0),(10,0)]),nan_value_counts:HashMap::new(),lower_bounds:HashMap::new(),upper_bounds:HashMap::new(),key_metadata:None,split_offsets:vec![4],equality_ids:Some(Vec::new()),sort_order_id:None, partition_spec_id: 0,first_row_id: None,referenced_data_file: None,content_offset: None,content_size_in_bytes: None }
}
];

Expand Down Expand Up @@ -435,7 +435,7 @@ mod tests {
upper_bounds: HashMap::new(),
key_metadata: None,
split_offsets: vec![4],
equality_ids: vec![],
equality_ids: Some(Vec::new()),
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand Down Expand Up @@ -532,7 +532,7 @@ mod tests {
upper_bounds: HashMap::from([(1,Datum::int(1)),(2,Datum::string("a")),(3,Datum::string("AC/DC"))]),
key_metadata: None,
split_offsets: vec![4],
equality_ids: vec![],
equality_ids: None,
sort_order_id: Some(0),
partition_spec_id: 0,
first_row_id: None,
Expand Down Expand Up @@ -640,7 +640,7 @@ mod tests {
]),
key_metadata: None,
split_offsets: vec![4],
equality_ids: vec![],
equality_ids: None,
sort_order_id: Some(0),
partition_spec_id: 0,
first_row_id: None,
Expand Down Expand Up @@ -749,7 +749,7 @@ mod tests {
]),
key_metadata: None,
split_offsets: vec![4],
equality_ids: vec![],
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand Down Expand Up @@ -840,7 +840,7 @@ mod tests {
]),
key_metadata: None,
split_offsets: vec![4],
equality_ids: vec![],
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand Down Expand Up @@ -922,7 +922,7 @@ mod tests {
upper_bounds: HashMap::new(),
key_metadata: None,
split_offsets: vec![4],
equality_ids: Vec::new(),
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand Down Expand Up @@ -957,7 +957,7 @@ mod tests {
upper_bounds: HashMap::new(),
key_metadata: None,
split_offsets: vec![4],
equality_ids: Vec::new(),
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand Down Expand Up @@ -992,7 +992,7 @@ mod tests {
upper_bounds: HashMap::new(),
key_metadata: None,
split_offsets: vec![4],
equality_ids: Vec::new(),
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand Down Expand Up @@ -1027,7 +1027,7 @@ mod tests {
upper_bounds: HashMap::new(),
key_metadata: None,
split_offsets: vec![4],
equality_ids: Vec::new(),
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand Down Expand Up @@ -1182,7 +1182,7 @@ mod tests {
"upper_bounds": [],
"key_metadata": null,
"split_offsets": [],
"equality_ids": [],
"equality_ids": null,
"sort_order_id": null,
"first_row_id": null,
"referenced_data_file": null,
Expand Down Expand Up @@ -1213,7 +1213,7 @@ mod tests {
"upper_bounds": [],
"key_metadata": null,
"split_offsets": [],
"equality_ids": [],
"equality_ids": null,
"sort_order_id": null,
"first_row_id": null,
"referenced_data_file": null,
Expand Down
6 changes: 3 additions & 3 deletions crates/iceberg/src/spec/manifest/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ mod tests {
upper_bounds: HashMap::new(),
key_metadata: Some(Vec::new()),
split_offsets: vec![4],
equality_ids: Vec::new(),
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand Down Expand Up @@ -574,7 +574,7 @@ mod tests {
upper_bounds: HashMap::new(),
key_metadata: Some(Vec::new()),
split_offsets: vec![4],
equality_ids: Vec::new(),
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand Down Expand Up @@ -603,7 +603,7 @@ mod tests {
upper_bounds: HashMap::new(),
key_metadata: Some(Vec::new()),
split_offsets: vec![4],
equality_ids: Vec::new(),
equality_ids: None,
sort_order_id: None,
partition_spec_id: 0,
first_row_id: None,
Expand Down
Loading
Loading