Skip to content

Commit ac22f27

Browse files
committed
conservatively rolling
1 parent 46a03ab commit ac22f27

File tree

1 file changed

+5
-12
lines changed

1 file changed

+5
-12
lines changed

crates/iceberg/src/writer/file_writer/rolling_writer.rs

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ impl<B: FileWriterBuilder> RollingFileWriterBuilder<B> {
3939
/// * `target_file_size` - The target size in bytes before rolling over to a new file
4040
///
4141
/// NOTE: The `target_file_size` does not exactly reflect the final size on physical storage.
42-
/// This is because the input size is based on the Arrow in-memory format, which differs from the on-disk file format.
42+
/// This is because the input size is based on the Arrow in-memory format and cannot precisely control rollover behavior.
43+
/// The actual file size on disk is expected to be slightly larger than `target_file_size`.
4344
pub fn new(inner_builder: B, target_file_size: usize) -> Self {
4445
Self {
4546
inner_builder,
@@ -77,30 +78,22 @@ pub struct RollingFileWriter<B: FileWriterBuilder> {
7778
impl<B: FileWriterBuilder> RollingFileWriter<B> {
7879
/// Determines if the writer should roll over to a new file.
7980
///
80-
/// # Arguments
81-
///
82-
/// * `input_size` - The size in bytes of the incoming data
83-
///
8481
/// # Returns
8582
///
8683
/// `true` if a new file should be started, `false` otherwise
87-
pub fn should_roll(&self, input_size: usize) -> bool {
88-
self.current_written_size() + input_size > self.target_file_size
84+
pub fn should_roll(&self) -> bool {
85+
self.current_written_size() > self.target_file_size
8986
}
9087
}
9188

9289
impl<B: FileWriterBuilder> FileWriter for RollingFileWriter<B> {
9390
async fn write(&mut self, input: &RecordBatch) -> Result<()> {
94-
// The input size is estimated using the Arrow in-memory format
95-
// and will differ from the final on-disk file size.
96-
let input_size = input.get_array_memory_size();
97-
9891
if self.inner.is_none() {
9992
// initialize inner writer
10093
self.inner = Some(self.inner_builder.clone().build().await?);
10194
}
10295

103-
if self.should_roll(input_size) {
96+
if self.should_roll() {
10497
if let Some(inner) = self.inner.take() {
10598
// close the current writer, roll to a new file
10699
self.data_file_builders.extend(inner.close().await?);

0 commit comments

Comments
 (0)