@@ -21,6 +21,8 @@ use std::sync::Arc;
2121
2222use arrow_array:: { ArrayRef , BooleanArray , Int32Array , RecordBatch , StringArray } ;
2323use futures:: TryStreamExt ;
24+ use iceberg:: spec:: DataFile ;
25+ use iceberg:: table:: Table ;
2426use iceberg:: transaction:: { ApplyTransactionAction , Transaction } ;
2527use iceberg:: writer:: base_writer:: data_file_writer:: DataFileWriterBuilder ;
2628use iceberg:: writer:: file_writer:: ParquetWriterBuilder ;
@@ -453,3 +455,114 @@ async fn test_sequence_number_in_manifest_entry() {
453455 }
454456 }
455457}
458+
459+ #[ tokio:: test]
460+ async fn test_partition_spec_id_in_manifest ( ) {
461+ let fixture = get_shared_containers ( ) ;
462+ let rest_catalog = RestCatalogBuilder :: default ( )
463+ . load ( "rest" , fixture. catalog_config . clone ( ) )
464+ . await
465+ . unwrap ( ) ;
466+ let ns = random_ns ( ) . await ;
467+ let schema = test_schema ( ) ;
468+
469+ let table_creation = TableCreation :: builder ( )
470+ . name ( "t1" . to_string ( ) )
471+ . schema ( schema. clone ( ) )
472+ . build ( ) ;
473+
474+ let mut table = rest_catalog
475+ . create_table ( ns. name ( ) , table_creation)
476+ . await
477+ . unwrap ( ) ;
478+
479+ // Create the writer and write the data
480+ let schema: Arc < arrow_schema:: Schema > = Arc :: new (
481+ table
482+ . metadata ( )
483+ . current_schema ( )
484+ . as_ref ( )
485+ . try_into ( )
486+ . unwrap ( ) ,
487+ ) ;
488+ let location_generator = DefaultLocationGenerator :: new ( table. metadata ( ) . clone ( ) ) . unwrap ( ) ;
489+ let file_name_generator = DefaultFileNameGenerator :: new (
490+ "test" . to_string ( ) ,
491+ None ,
492+ iceberg:: spec:: DataFileFormat :: Parquet ,
493+ ) ;
494+
495+ // commit result
496+ let mut data_files_vec = Vec :: default ( ) ;
497+
498+ async fn build_data_file_f (
499+ schema : Arc < arrow_schema:: Schema > ,
500+ table : & Table ,
501+ location_generator : DefaultLocationGenerator ,
502+ file_name_generator : DefaultFileNameGenerator ,
503+ ) -> DataFile {
504+ let parquet_writer_builder = ParquetWriterBuilder :: new (
505+ WriterProperties :: default ( ) ,
506+ table. metadata ( ) . current_schema ( ) . clone ( ) ,
507+ ) ;
508+ let rolling_writer_builder = RollingFileWriterBuilder :: new_with_default_file_size (
509+ parquet_writer_builder,
510+ table. file_io ( ) . clone ( ) ,
511+ location_generator. clone ( ) ,
512+ file_name_generator. clone ( ) ,
513+ ) ;
514+ let data_file_writer_builder = DataFileWriterBuilder :: new ( rolling_writer_builder) ;
515+
516+ let mut data_file_writer = data_file_writer_builder. build ( None ) . await . unwrap ( ) ;
517+ let col1 = StringArray :: from ( vec ! [ Some ( "foo" ) , Some ( "bar" ) , None , Some ( "baz" ) ] ) ;
518+ let col2 = Int32Array :: from ( vec ! [ Some ( 1 ) , Some ( 2 ) , Some ( 3 ) , Some ( 4 ) ] ) ;
519+ let col3 = BooleanArray :: from ( vec ! [ Some ( true ) , Some ( false ) , None , Some ( false ) ] ) ;
520+ let batch = RecordBatch :: try_new ( schema. clone ( ) , vec ! [
521+ Arc :: new( col1) as ArrayRef ,
522+ Arc :: new( col2) as ArrayRef ,
523+ Arc :: new( col3) as ArrayRef ,
524+ ] )
525+ . unwrap ( ) ;
526+ data_file_writer. write ( batch. clone ( ) ) . await . unwrap ( ) ;
527+ data_file_writer. close ( ) . await . unwrap ( ) [ 0 ] . clone ( )
528+ }
529+
530+ for _ in 0 ..10 {
531+ let data_file = build_data_file_f (
532+ schema. clone ( ) ,
533+ & table,
534+ location_generator. clone ( ) ,
535+ file_name_generator. clone ( ) ,
536+ )
537+ . await ;
538+ data_files_vec. push ( data_file. clone ( ) ) ;
539+ let tx = Transaction :: new ( & table) ;
540+ let append_action = tx. fast_append ( ) . add_data_files ( vec ! [ data_file] ) ;
541+ let tx = append_action. apply ( tx) . unwrap ( ) ;
542+ table = tx. commit ( & rest_catalog) . await . unwrap ( ) ;
543+ }
544+
545+ let last_data_files = data_files_vec. last ( ) . unwrap ( ) ;
546+ let partition_spec_id = last_data_files. partition_spec_id ( ) ;
547+
548+ // remove the data files by RewriteAction
549+ for data_file in & data_files_vec {
550+ let tx = Transaction :: new ( & table) ;
551+ let rewrite_action = tx. rewrite_files ( ) . delete_files ( vec ! [ data_file. clone( ) ] ) ;
552+ let tx = rewrite_action. apply ( tx) . unwrap ( ) ;
553+ table = tx. commit ( & rest_catalog) . await . unwrap ( ) ;
554+ }
555+
556+ // TODO: test update partition spec
557+ // Verify that the partition spec ID is correctly set
558+
559+ let last_snapshot = table. metadata ( ) . current_snapshot ( ) . unwrap ( ) ;
560+ let manifest_list = last_snapshot
561+ . load_manifest_list ( table. file_io ( ) , table. metadata ( ) )
562+ . await
563+ . unwrap ( ) ;
564+ assert_eq ! ( manifest_list. entries( ) . len( ) , 1 ) ;
565+ for manifest_file in manifest_list. entries ( ) {
566+ assert_eq ! ( manifest_file. partition_spec_id, partition_spec_id) ;
567+ }
568+ }
0 commit comments