2020use crate :: VariantArray ;
2121use arrow:: array:: { ArrayRef , BinaryViewArray , BinaryViewBuilder , NullBufferBuilder , StructArray } ;
2222use arrow_schema:: { ArrowError , DataType , Field , Fields } ;
23- use parquet_variant:: { ListBuilder , ObjectBuilder , Variant , VariantBuilder , VariantBuilderExt } ;
23+ use parquet_variant:: { ListBuilder , ObjectBuilder , Variant , VariantBuilderExt } ;
24+ use parquet_variant:: { MetadataBuilder , ParentState , ValueBuilder } ;
2425use std:: sync:: Arc ;
2526
2627/// A builder for [`VariantArray`]
@@ -72,12 +73,12 @@ use std::sync::Arc;
7273pub struct VariantArrayBuilder {
7374 /// Nulls
7475 nulls : NullBufferBuilder ,
75- /// buffer for all the metadata
76- metadata_buffer : Vec < u8 > ,
76+ /// builder for all the metadata
77+ metadata_builder : MetadataBuilder ,
7778 /// ending offset for each serialized metadata dictionary in the buffer
7879 metadata_offsets : Vec < usize > ,
79- /// buffer for values
80- value_buffer : Vec < u8 > ,
80+ /// builder for values
81+ value_builder : ValueBuilder ,
8182 /// ending offset for each serialized variant value in the buffer
8283 value_offsets : Vec < usize > ,
8384 /// The fields of the final `StructArray`
@@ -95,9 +96,9 @@ impl VariantArrayBuilder {
9596
9697 Self {
9798 nulls : NullBufferBuilder :: new ( row_capacity) ,
98- metadata_buffer : Vec :: new ( ) , // todo allocation capacity
99+ metadata_builder : MetadataBuilder :: default ( ) ,
99100 metadata_offsets : Vec :: with_capacity ( row_capacity) ,
100- value_buffer : Vec :: new ( ) ,
101+ value_builder : ValueBuilder :: new ( ) ,
101102 value_offsets : Vec :: with_capacity ( row_capacity) ,
102103 fields : Fields :: from ( vec ! [ metadata_field, value_field] ) ,
103104 }
@@ -107,15 +108,17 @@ impl VariantArrayBuilder {
107108 pub fn build ( self ) -> VariantArray {
108109 let Self {
109110 mut nulls,
110- metadata_buffer ,
111+ metadata_builder ,
111112 metadata_offsets,
112- value_buffer ,
113+ value_builder ,
113114 value_offsets,
114115 fields,
115116 } = self ;
116117
118+ let metadata_buffer = metadata_builder. into_inner ( ) ;
117119 let metadata_array = binary_view_array_from_buffers ( metadata_buffer, metadata_offsets) ;
118120
121+ let value_buffer = value_builder. into_inner ( ) ;
119122 let value_array = binary_view_array_from_buffers ( value_buffer, value_offsets) ;
120123
121124 // The build the final struct array
@@ -136,14 +139,14 @@ impl VariantArrayBuilder {
136139 pub fn append_null ( & mut self ) {
137140 self . nulls . append_null ( ) ;
138141 // The subfields are expected to be non-nullable according to the parquet variant spec.
139- self . metadata_offsets . push ( self . metadata_buffer . len ( ) ) ;
140- self . value_offsets . push ( self . value_buffer . len ( ) ) ;
142+ self . metadata_offsets . push ( self . metadata_builder . offset ( ) ) ;
143+ self . value_offsets . push ( self . value_builder . offset ( ) ) ;
141144 }
142145
143146 /// Append the [`Variant`] to the builder as the next row
144147 pub fn append_variant ( & mut self , variant : Variant ) {
145148 let mut direct_builder = self . variant_builder ( ) ;
146- direct_builder. variant_builder . append_value ( variant) ;
149+ direct_builder. append_value ( variant) ;
147150 direct_builder. finish ( )
148151 }
149152
@@ -194,32 +197,23 @@ impl VariantArrayBuilder {
194197///
195198/// See [`VariantArrayBuilder::variant_builder`] for an example
196199pub struct VariantArrayVariantBuilder < ' a > {
197- /// was finish called?
198- finished : bool ,
199- /// starting offset in the variant_builder's `metadata` buffer
200- metadata_offset : usize ,
201- /// starting offset in the variant_builder's `value` buffer
202- value_offset : usize ,
203- /// Parent array builder that this variant builder writes to. Buffers
204- /// have been moved into the variant builder, and must be returned on
205- /// drop
206- array_builder : & ' a mut VariantArrayBuilder ,
207- /// Builder for the in progress variant value, temporarily owns the buffers
208- /// from `array_builder`
209- variant_builder : VariantBuilder ,
200+ parent_state : ParentState < ' a > ,
201+ metadata_offsets : & ' a mut Vec < usize > ,
202+ value_offsets : & ' a mut Vec < usize > ,
203+ nulls : & ' a mut NullBufferBuilder ,
210204}
211205
212206impl VariantBuilderExt for VariantArrayVariantBuilder < ' _ > {
213207 fn append_value < ' m , ' v > ( & mut self , value : impl Into < Variant < ' m , ' v > > ) {
214- self . variant_builder . append_value ( value) ;
208+ ValueBuilder :: append_variant ( self . parent_state ( ) , value. into ( ) ) ;
215209 }
216210
217211 fn try_new_list ( & mut self ) -> Result < ListBuilder < ' _ > , ArrowError > {
218- Ok ( self . variant_builder . new_list ( ) )
212+ Ok ( ListBuilder :: new ( self . parent_state ( ) , false ) )
219213 }
220214
221215 fn try_new_object ( & mut self ) -> Result < ObjectBuilder < ' _ > , ArrowError > {
222- Ok ( self . variant_builder . new_object ( ) )
216+ Ok ( ObjectBuilder :: new ( self . parent_state ( ) , false ) )
223217 }
224218}
225219
@@ -228,103 +222,40 @@ impl<'a> VariantArrayVariantBuilder<'a> {
228222 ///
229223 /// Note this is not public as this is a structure that is logically
230224 /// part of the [`VariantArrayBuilder`] and relies on its internal structure
231- fn new ( array_builder : & ' a mut VariantArrayBuilder ) -> Self {
232- // append directly into the metadata and value buffers
233- let metadata_buffer = std:: mem:: take ( & mut array_builder. metadata_buffer ) ;
234- let value_buffer = std:: mem:: take ( & mut array_builder. value_buffer ) ;
235- let metadata_offset = metadata_buffer. len ( ) ;
236- let value_offset = value_buffer. len ( ) ;
225+ fn new ( builder : & ' a mut VariantArrayBuilder ) -> Self {
226+ let parent_state =
227+ ParentState :: variant ( & mut builder. value_builder , & mut builder. metadata_builder ) ;
237228 VariantArrayVariantBuilder {
238- finished : false ,
239- metadata_offset,
240- value_offset,
241- variant_builder : VariantBuilder :: new_with_buffers ( metadata_buffer, value_buffer) ,
242- array_builder,
229+ parent_state,
230+ metadata_offsets : & mut builder. metadata_offsets ,
231+ value_offsets : & mut builder. value_offsets ,
232+ nulls : & mut builder. nulls ,
243233 }
244234 }
245235
246- /// Return a reference to the underlying `VariantBuilder`
247- pub fn inner ( & self ) -> & VariantBuilder {
248- & self . variant_builder
249- }
250-
251- /// Return a mutable reference to the underlying `VariantBuilder`
252- pub fn inner_mut ( & mut self ) -> & mut VariantBuilder {
253- & mut self . variant_builder
254- }
255-
256236 /// Called to finish the in progress variant and write it to the underlying
257237 /// buffers
258238 ///
259239 /// Note if you do not call finish, on drop any changes made to the
260240 /// underlying buffers will be rolled back.
261241 pub fn finish ( mut self ) {
262- self . finished = true ;
263-
264- let metadata_offset = self . metadata_offset ;
265- let value_offset = self . value_offset ;
266- // get the buffers back from the variant builder
267- let ( metadata_buffer, value_buffer) = std:: mem:: take ( & mut self . variant_builder ) . finish ( ) ;
268-
269- // Sanity Check: if the buffers got smaller, something went wrong (previous data was lost)
270- assert ! (
271- metadata_offset <= metadata_buffer. len( ) ,
272- "metadata length decreased unexpectedly"
273- ) ;
274- assert ! (
275- value_offset <= value_buffer. len( ) ,
276- "value length decreased unexpectedly"
277- ) ;
278-
279- // commit the changes by putting the
280- // ending offsets into the parent array builder.
281- let builder = & mut self . array_builder ;
282- builder. metadata_offsets . push ( metadata_buffer. len ( ) ) ;
283- builder. value_offsets . push ( value_buffer. len ( ) ) ;
284- builder. nulls . append_non_null ( ) ;
242+ // Record the ending offsets after finishing metadata and finish the parent state.
243+ let ( value_builder, metadata_builder) = self . parent_state . value_and_metadata_builders ( ) ;
244+ self . metadata_offsets . push ( metadata_builder. finish ( ) ) ;
245+ self . value_offsets . push ( value_builder. offset ( ) ) ;
246+ self . nulls . append_non_null ( ) ;
247+ self . parent_state . finish ( ) ;
248+ }
285249
286- // put the buffers back into the array builder
287- builder . metadata_buffer = metadata_buffer ;
288- builder . value_buffer = value_buffer ;
250+ fn parent_state ( & mut self ) -> ParentState < ' _ > {
251+ let ( value_builder , metadata_builder ) = self . parent_state . value_and_metadata_builders ( ) ;
252+ ParentState :: variant ( value_builder , metadata_builder )
289253 }
290254}
291255
256+ // Empty Drop to help with borrow checking - warns users if they forget to call finish()
292257impl Drop for VariantArrayVariantBuilder < ' _ > {
293- /// If the builder was not finished, roll back any changes made to the
294- /// underlying buffers (by truncating them)
295- fn drop ( & mut self ) {
296- if self . finished {
297- return ;
298- }
299-
300- // if the object was not finished, need to rollback any changes by
301- // truncating the buffers to the original offsets
302- let metadata_offset = self . metadata_offset ;
303- let value_offset = self . value_offset ;
304-
305- // get the buffers back from the variant builder
306- let ( mut metadata_buffer, mut value_buffer) =
307- std:: mem:: take ( & mut self . variant_builder ) . into_buffers ( ) ;
308-
309- // Sanity Check: if the buffers got smaller, something went wrong (previous data was lost) so panic immediately
310- metadata_buffer
311- . len ( )
312- . checked_sub ( metadata_offset)
313- . expect ( "metadata length decreased unexpectedly" ) ;
314- value_buffer
315- . len ( )
316- . checked_sub ( value_offset)
317- . expect ( "value length decreased unexpectedly" ) ;
318-
319- // Note this truncate is fast because truncate doesn't free any memory:
320- // it just has to drop elements (and u8 doesn't have a destructor)
321- metadata_buffer. truncate ( metadata_offset) ;
322- value_buffer. truncate ( value_offset) ;
323-
324- // put the buffers back into the array builder
325- self . array_builder . metadata_buffer = metadata_buffer;
326- self . array_builder . value_buffer = value_buffer;
327- }
258+ fn drop ( & mut self ) { }
328259}
329260
330261fn binary_view_array_from_buffers ( buffer : Vec < u8 > , offsets : Vec < usize > ) -> BinaryViewArray {
@@ -457,12 +388,18 @@ mod test {
457388 assert_eq ! ( variant_array. len( ) , 2 ) ;
458389 assert ! ( !variant_array. is_null( 0 ) ) ;
459390 let variant = variant_array. value ( 0 ) ;
460- let variant = variant. as_object ( ) . expect ( "variant to be an object" ) ;
461- assert_eq ! ( variant. get( "foo" ) . unwrap( ) , Variant :: from( 1i32 ) ) ;
391+ assert_eq ! (
392+ variant. get_object_field( "foo" ) ,
393+ Some ( Variant :: from( 1i32 ) ) ,
394+ "Expected an object with field \" foo\" , got: {variant:?}"
395+ ) ;
462396
463397 assert ! ( !variant_array. is_null( 1 ) ) ;
464398 let variant = variant_array. value ( 1 ) ;
465- let variant = variant. as_object ( ) . expect ( "variant to be an object" ) ;
466- assert_eq ! ( variant. get( "baz" ) . unwrap( ) , Variant :: from( 3i32 ) ) ;
399+ assert_eq ! (
400+ variant. get_object_field( "baz" ) ,
401+ Some ( Variant :: from( 3i32 ) ) ,
402+ "Expected an object with field \" baz\" , got: {variant:?}"
403+ ) ;
467404 }
468405}
0 commit comments