@@ -26,12 +26,13 @@ use std::{
26
26
use anyhow:: { anyhow, Error as AnyError } ;
27
27
use arrow_array:: RecordBatch ;
28
28
use arrow_schema:: { DataType , Field , Schema , TimeUnit } ;
29
- use chrono:: DateTime ;
29
+ use chrono:: { DateTime , NaiveDateTime } ;
30
30
use serde:: { Deserialize , Serialize } ;
31
31
use serde_json:: Value ;
32
32
33
33
use crate :: {
34
34
metadata:: SchemaVersion ,
35
+ storage:: StreamType ,
35
36
utils:: arrow:: { get_field, get_timestamp_array, replace_columns} ,
36
37
} ;
37
38
@@ -105,15 +106,20 @@ pub trait EventFormat: Sized {
105
106
106
107
fn decode ( data : Self :: Data , schema : Arc < Schema > ) -> Result < RecordBatch , AnyError > ;
107
108
108
- fn into_recordbatch (
109
+ fn to_event (
109
110
self ,
111
+ stream_name : & str ,
112
+ origin_size : u64 ,
110
113
storage_schema : & HashMap < String , Arc < Field > > ,
111
114
static_schema_flag : bool ,
112
- time_partition : Option < & String > ,
115
+ parsed_timestamp : NaiveDateTime ,
116
+ time_partition : Option < String > ,
117
+ custom_partition_values : HashMap < String , String > ,
113
118
schema_version : SchemaVersion ,
114
- ) -> Result < ( RecordBatch , bool ) , AnyError > {
115
- let ( data, mut schema, is_first) =
116
- self . to_data ( storage_schema, time_partition, schema_version) ?;
119
+ stream_type : StreamType ,
120
+ ) -> Result < super :: Event , AnyError > {
121
+ let ( data, mut schema, is_first_event) =
122
+ self . to_data ( storage_schema, time_partition. as_ref ( ) , schema_version) ?;
117
123
118
124
if get_field ( & schema, DEFAULT_TIMESTAMP_KEY ) . is_some ( ) {
119
125
return Err ( anyhow ! (
@@ -137,8 +143,13 @@ pub trait EventFormat: Sized {
137
143
if !Self :: is_schema_matching ( new_schema. clone ( ) , storage_schema, static_schema_flag) {
138
144
return Err ( anyhow ! ( "Schema mismatch" ) ) ;
139
145
}
140
- new_schema =
141
- update_field_type_in_schema ( new_schema, None , time_partition, None , schema_version) ;
146
+ new_schema = update_field_type_in_schema (
147
+ new_schema,
148
+ None ,
149
+ time_partition. as_ref ( ) ,
150
+ None ,
151
+ schema_version,
152
+ ) ;
142
153
143
154
let mut rb = Self :: decode ( data, new_schema. clone ( ) ) ?;
144
155
rb = replace_columns (
@@ -148,7 +159,17 @@ pub trait EventFormat: Sized {
148
159
& [ Arc :: new ( get_timestamp_array ( rb. num_rows ( ) ) ) ] ,
149
160
) ;
150
161
151
- Ok ( ( rb, is_first) )
162
+ Ok ( super :: Event {
163
+ rb,
164
+ stream_name : stream_name. to_string ( ) ,
165
+ origin_format : "json" ,
166
+ origin_size,
167
+ is_first_event,
168
+ parsed_timestamp,
169
+ time_partition,
170
+ custom_partition_values,
171
+ stream_type,
172
+ } )
152
173
}
153
174
154
175
fn is_schema_matching (
0 commit comments