@@ -24,12 +24,13 @@ use std::{
24
24
path::{Path, PathBuf},
25
25
process,
26
26
sync::{Arc, Mutex, RwLock},
27
+ time::UNIX_EPOCH,
27
28
};
28
29
29
30
use arrow_array::RecordBatch;
30
31
use arrow_ipc::writer::StreamWriter;
31
32
use arrow_schema::{Field, Fields, Schema};
32
- use chrono::{NaiveDateTime, Timelike, Utc};
33
+ use chrono::{DateTime, NaiveDateTime, Timelike, Utc};
33
34
use derive_more::{Deref, DerefMut};
34
35
use itertools::Itertools;
35
36
use parquet::{
@@ -193,7 +194,7 @@ impl Stream {
193
194
/// Only includes ones starting from the previous minute
194
195
pub fn arrow_files_grouped_exclude_time(
195
196
&self,
196
- exclude: NaiveDateTime ,
197
+ exclude: DateTime<Utc> ,
197
198
shutdown_signal: bool,
198
199
) -> HashMap<PathBuf, Vec<PathBuf>> {
199
200
let mut grouped_arrow_file: HashMap<PathBuf, Vec<PathBuf>> = HashMap::new();
@@ -203,12 +204,14 @@ impl Stream {
203
204
// don't keep the ones for the current minute
204
205
if !shutdown_signal {
205
206
arrow_files.retain(|path| {
206
- !path
207
- .file_name()
208
- .unwrap()
209
- .to_str()
210
- .unwrap()
211
- .starts_with(&exclude.format("%Y%m%dT%H%M").to_string())
207
+ path.metadata()
208
+ .expect("Arrow file should exist on disk")
209
+ .created()
210
+ .expect("Creation time should be accessible")
211
+ .duration_since(UNIX_EPOCH)
212
+ .expect("Unix Timestamp Duration")
213
+ .as_millis()
214
+ < exclude.timestamp_millis() as u128
212
215
});
213
216
}
214
217
@@ -430,8 +433,7 @@ impl Stream {
430
433
) -> Result<Option<Schema>, StagingError> {
431
434
let mut schemas = Vec::new();
432
435
433
- let time = chrono::Utc::now().naive_utc();
434
- let staging_files = self.arrow_files_grouped_exclude_time(time, shutdown_signal);
436
+ let staging_files = self.arrow_files_grouped_exclude_time(Utc::now(), shutdown_signal);
435
437
if staging_files.is_empty() {
436
438
metrics::STAGING_FILES
437
439
.with_label_values(&[&self.stream_name])
0 commit comments