@@ -39,7 +39,8 @@ impl<B: FileWriterBuilder> RollingFileWriterBuilder<B> {
3939 /// * `target_file_size` - The target size in bytes before rolling over to a new file
4040 ///
4141 /// NOTE: The `target_file_size` does not exactly reflect the final size on physical storage.
42- /// This is because the input size is based on the Arrow in-memory format, which differs from the on-disk file format.
42+ /// This is because the input size is based on the Arrow in-memory format and cannot precisely control rollover behavior.
43+ /// The actual file size on disk is expected to be slightly larger than `target_file_size`.
4344 pub fn new ( inner_builder : B , target_file_size : usize ) -> Self {
4445 Self {
4546 inner_builder,
@@ -77,30 +78,22 @@ pub struct RollingFileWriter<B: FileWriterBuilder> {
7778impl < B : FileWriterBuilder > RollingFileWriter < B > {
7879 /// Determines if the writer should roll over to a new file.
7980 ///
80- /// # Arguments
81- ///
82- /// * `input_size` - The size in bytes of the incoming data
83- ///
8481 /// # Returns
8582 ///
8683 /// `true` if a new file should be started, `false` otherwise
87- pub fn should_roll ( & self , input_size : usize ) -> bool {
88- self . current_written_size ( ) + input_size > self . target_file_size
84+ pub fn should_roll ( & self ) -> bool {
85+ self . current_written_size ( ) > self . target_file_size
8986 }
9087}
9188
9289impl < B : FileWriterBuilder > FileWriter for RollingFileWriter < B > {
9390 async fn write ( & mut self , input : & RecordBatch ) -> Result < ( ) > {
94- // The input size is estimated using the Arrow in-memory format
95- // and will differ from the final on-disk file size.
96- let input_size = input. get_array_memory_size ( ) ;
97-
9891 if self . inner . is_none ( ) {
9992 // initialize inner writer
10093 self . inner = Some ( self . inner_builder . clone ( ) . build ( ) . await ?) ;
10194 }
10295
103- if self . should_roll ( input_size ) {
96+ if self . should_roll ( ) {
10497 if let Some ( inner) = self . inner . take ( ) {
10598 // close the current writer, roll to a new file
10699 self . data_file_builders . extend ( inner. close ( ) . await ?) ;
0 commit comments