Skip to content

Commit 58b4200

Browse files
author
Devdutt Shenoi
committed
feat: construct path for each request as suggested by @nikhilsinhaparseable
1 parent 1ee5dd0 commit 58b4200

File tree

1 file changed

+17
-20
lines changed

1 file changed

+17
-20
lines changed

src/parseable/streams.rs

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -119,23 +119,21 @@ impl Stream {
119119
) -> Result<(), StagingError> {
120120
let mut guard = self.writer.lock().unwrap();
121121
if self.options.mode != Mode::Query || stream_type == StreamType::Internal {
122+
let filename =
123+
self.filename_by_partition(schema_key, parsed_timestamp, custom_partition_values);
122124
match guard.disk.get_mut(schema_key) {
123125
Some(writer) => {
124126
writer.write(record)?;
125127
}
126128
None => {
127129
// entry is not present thus we create it
128-
let file_path = self.path_by_current_time(
129-
schema_key,
130-
parsed_timestamp,
131-
custom_partition_values,
132-
);
133130
std::fs::create_dir_all(&self.data_path)?;
134131

135132
let range = TimeRange::granularity_range(
136133
parsed_timestamp.and_local_timezone(Utc).unwrap(),
137134
OBJECT_STORE_DATA_GRANULARITY,
138135
);
136+
let file_path = self.data_path.join(filename);
139137
let mut writer = DiskWriter::try_new(file_path, &record.schema(), range)
140138
.expect("File and RecordBatch both are checked");
141139

@@ -150,17 +148,17 @@ impl Stream {
150148
Ok(())
151149
}
152150

153-
pub fn path_by_current_time(
151+
pub fn filename_by_partition(
154152
&self,
155153
stream_hash: &str,
156154
parsed_timestamp: NaiveDateTime,
157155
custom_partition_values: &HashMap<String, String>,
158-
) -> PathBuf {
156+
) -> String {
159157
let mut hostname = hostname::get().unwrap().into_string().unwrap();
160158
if let Some(id) = &self.ingestor_id {
161159
hostname.push_str(id);
162160
}
163-
let filename = format!(
161+
format!(
164162
"{stream_hash}.date={}.hour={:02}.minute={}.{}{hostname}.data.{ARROW_FILE_EXTENSION}",
165163
parsed_timestamp.date(),
166164
parsed_timestamp.hour(),
@@ -170,8 +168,7 @@ impl Stream {
170168
.sorted_by_key(|v| v.0)
171169
.map(|(key, value)| format!("{key}={value}."))
172170
.join("")
173-
);
174-
self.data_path.join(filename)
171+
)
175172
}
176173

177174
pub fn arrow_files(&self) -> Vec<PathBuf> {
@@ -877,18 +874,18 @@ mod tests {
877874
None,
878875
);
879876

880-
let expected_path = staging.data_path.join(format!(
877+
let expected = format!(
881878
"{stream_hash}.date={}.hour={:02}.minute={}.{}.data.{ARROW_FILE_EXTENSION}",
882879
parsed_timestamp.date(),
883880
parsed_timestamp.hour(),
884881
Minute::from(parsed_timestamp).to_slot(OBJECT_STORE_DATA_GRANULARITY),
885882
hostname::get().unwrap().into_string().unwrap()
886-
));
883+
);
887884

888-
let generated_path =
889-
staging.path_by_current_time(stream_hash, parsed_timestamp, &custom_partition_values);
885+
let generated =
886+
staging.filename_by_partition(stream_hash, parsed_timestamp, &custom_partition_values);
890887

891-
assert_eq!(generated_path, expected_path);
888+
assert_eq!(generated, expected);
892889
}
893890

894891
#[test]
@@ -911,18 +908,18 @@ mod tests {
911908
None,
912909
);
913910

914-
let expected_path = staging.data_path.join(format!(
911+
let expected = format!(
915912
"{stream_hash}.date={}.hour={:02}.minute={}.key1=value1.key2=value2.{}.data.{ARROW_FILE_EXTENSION}",
916913
parsed_timestamp.date(),
917914
parsed_timestamp.hour(),
918915
Minute::from(parsed_timestamp).to_slot(OBJECT_STORE_DATA_GRANULARITY),
919916
hostname::get().unwrap().into_string().unwrap()
920-
));
917+
);
921918

922-
let generated_path =
923-
staging.path_by_current_time(stream_hash, parsed_timestamp, &custom_partition_values);
919+
let generated =
920+
staging.filename_by_partition(stream_hash, parsed_timestamp, &custom_partition_values);
924921

925-
assert_eq!(generated_path, expected_path);
922+
assert_eq!(generated, expected);
926923
}
927924

928925
#[test]

0 commit comments

Comments
 (0)