Skip to content

Commit 667c173

Browse files
committed
feat: support write (#10)
* add arrow_struct_to_iceberg_struct * refine writer interface * support fanout partition writer * support sort_position_delete_writer * support equality delta writer * support precompute partition writer * update value convert * fix some wrong in writer * implement Display for NamespaceIdent * expose _serde::DataFile * fix FieldSummary generated from Manifest * add delete file support for transaction * fix record_batch_partition_spliter * fix day transform * fix RawLiteralEnum::Record * fix nullable field of equality delete writer * support to delete empty row file * fix decimal parse for parquet statistics --------- Co-authored-by: ZENOTME <[email protected]>
1 parent 54ef090 commit 667c173

25 files changed

+3026
-62
lines changed

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,13 @@ apache-avro = "0.17"
4343
array-init = "2"
4444
arrow-arith = { version = "53" }
4545
arrow-array = { version = "53" }
46+
arrow-buffer = { version = "53" }
4647
arrow-cast = { version = "53" }
4748
arrow-ord = { version = "53" }
4849
arrow-schema = { version = "53" }
4950
arrow-select = { version = "53" }
5051
arrow-string = { version = "53" }
52+
arrow-row = { version = "53" }
5153
async-stream = "0.3.5"
5254
async-trait = "0.1"
5355
async-std = "1.12"

crates/iceberg/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,10 @@ apache-avro = { workspace = true }
4646
array-init = { workspace = true }
4747
arrow-arith = { workspace = true }
4848
arrow-array = { workspace = true }
49+
arrow-buffer = { workspace = true }
4950
arrow-cast = { workspace = true }
5051
arrow-ord = { workspace = true }
52+
arrow-row = { workspace = true }
5153
arrow-schema = { workspace = true }
5254
arrow-select = { workspace = true }
5355
arrow-string = { workspace = true }

crates/iceberg/src/arrow/mod.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,8 @@ pub use schema::*;
2222
mod reader;
2323
pub(crate) mod record_batch_projector;
2424
pub(crate) mod record_batch_transformer;
25-
25+
mod value;
2626
pub use reader::*;
27+
pub use value::*;
28+
mod record_batch_partition_spliter;
29+
pub(crate) use record_batch_partition_spliter::*;

crates/iceberg/src/arrow/reader.rs

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask, PARQUET_FI
3939
use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader};
4040
use parquet::schema::types::{SchemaDescriptor, Type as ParquetType};
4141

42+
use super::record_batch_transformer::RecordBatchTransformer;
4243
use crate::arrow::{arrow_schema_to_schema, get_arrow_datum};
4344
use crate::error::Result;
4445
use crate::expr::visitors::bound_predicate_visitor::{visit, BoundPredicateVisitor};
@@ -51,8 +52,6 @@ use crate::spec::{DataContentType, Datum, PrimitiveType, Schema};
5152
use crate::utils::available_parallelism;
5253
use crate::{Error, ErrorKind};
5354

54-
use super::record_batch_transformer::RecordBatchTransformer;
55-
5655
/// Builder to create ArrowReader
5756
pub struct ArrowReaderBuilder {
5857
batch_size: Option<usize>,
@@ -345,14 +344,19 @@ impl ArrowReader {
345344
if iceberg_field.is_none() || parquet_iceberg_field.is_none() {
346345
return;
347346
}
348-
349-
if !type_promotion_is_valid(
350-
parquet_iceberg_field
351-
.unwrap()
352-
.field_type
353-
.as_primitive_type(),
354-
iceberg_field.unwrap().field_type.as_primitive_type(),
355-
) {
347+
if iceberg_field
348+
.unwrap()
349+
.field_type
350+
.as_primitive_type()
351+
.is_some()
352+
&& !type_promotion_is_valid(
353+
parquet_iceberg_field
354+
.unwrap()
355+
.field_type
356+
.as_primitive_type(),
357+
iceberg_field.unwrap().field_type.as_primitive_type(),
358+
)
359+
{
356360
return;
357361
}
358362

0 commit comments

Comments
 (0)