@@ -62,6 +62,7 @@ impl EventFormat for Event {
62
62
schema : & HashMap < String , Arc < Field > > ,
63
63
time_partition : Option < & String > ,
64
64
schema_version : SchemaVersion ,
65
+ static_schema_flag : bool ,
65
66
) -> Result < ( Self :: Data , Vec < Arc < Field > > , bool ) , anyhow:: Error > {
66
67
let stream_schema = schema;
67
68
@@ -111,7 +112,7 @@ impl EventFormat for Event {
111
112
112
113
if value_arr
113
114
. iter ( )
114
- . any ( |value| fields_mismatch ( & schema, value, schema_version) )
115
+ . any ( |value| fields_mismatch ( & schema, value, schema_version, static_schema_flag ) )
115
116
{
116
117
return Err ( anyhow ! (
117
118
"Could not process this event due to mismatch in datatype"
@@ -253,39 +254,64 @@ fn collect_keys<'a>(values: impl Iterator<Item = &'a Value>) -> Result<Vec<&'a s
253
254
Ok ( keys)
254
255
}
255
256
256
- fn fields_mismatch ( schema : & [ Arc < Field > ] , body : & Value , schema_version : SchemaVersion ) -> bool {
257
+ fn fields_mismatch (
258
+ schema : & [ Arc < Field > ] ,
259
+ body : & Value ,
260
+ schema_version : SchemaVersion ,
261
+ static_schema_flag : bool ,
262
+ ) -> bool {
257
263
for ( name, val) in body. as_object ( ) . expect ( "body is of object variant" ) {
258
264
if val. is_null ( ) {
259
265
continue ;
260
266
}
261
267
let Some ( field) = get_field ( schema, name) else {
262
268
return true ;
263
269
} ;
264
- if !valid_type ( field. data_type ( ) , val, schema_version) {
270
+ if !valid_type ( field, val, schema_version, static_schema_flag ) {
265
271
return true ;
266
272
}
267
273
}
268
274
false
269
275
}
270
276
271
- fn valid_type ( data_type : & DataType , value : & Value , schema_version : SchemaVersion ) -> bool {
272
- match data_type {
277
+ fn valid_type (
278
+ field : & Field ,
279
+ value : & Value ,
280
+ schema_version : SchemaVersion ,
281
+ static_schema_flag : bool ,
282
+ ) -> bool {
283
+ match field. data_type ( ) {
273
284
DataType :: Boolean => value. is_boolean ( ) ,
274
- DataType :: Int8 | DataType :: Int16 | DataType :: Int32 | DataType :: Int64 => value. is_i64 ( ) ,
285
+ DataType :: Int8 | DataType :: Int16 | DataType :: Int32 | DataType :: Int64 => {
286
+ if static_schema_flag {
287
+ if let Value :: String ( s) = value {
288
+ return s. parse :: < i64 > ( ) . is_ok ( ) ;
289
+ }
290
+ }
291
+ value. is_i64 ( )
292
+ }
275
293
DataType :: UInt8 | DataType :: UInt16 | DataType :: UInt32 | DataType :: UInt64 => value. is_u64 ( ) ,
276
294
DataType :: Float16 | DataType :: Float32 => value. is_f64 ( ) ,
277
- // All numbers can be cast as Float64 from schema version v1
278
- DataType :: Float64 if schema_version == SchemaVersion :: V1 => value. is_number ( ) ,
279
- DataType :: Float64 if schema_version != SchemaVersion :: V1 => value. is_f64 ( ) ,
295
+ DataType :: Float64 => {
296
+ if static_schema_flag {
297
+ if let Value :: String ( s) = value. clone ( ) {
298
+ return s. parse :: < f64 > ( ) . is_ok ( ) || s. parse :: < i64 > ( ) . is_ok ( ) ;
299
+ }
300
+ return value. is_number ( ) ;
301
+ }
302
+ match schema_version {
303
+ SchemaVersion :: V1 => value. is_number ( ) ,
304
+ _ => value. is_f64 ( ) ,
305
+ }
306
+ }
280
307
DataType :: Utf8 => value. is_string ( ) ,
281
308
DataType :: List ( field) => {
282
- let data_type = field. data_type ( ) ;
283
309
if let Value :: Array ( arr) = value {
284
310
for elem in arr {
285
311
if elem. is_null ( ) {
286
312
continue ;
287
313
}
288
- if !valid_type ( data_type , elem, schema_version) {
314
+ if !valid_type ( field , elem, schema_version, static_schema_flag ) {
289
315
return false ;
290
316
}
291
317
}
@@ -303,7 +329,7 @@ fn valid_type(data_type: &DataType, value: &Value, schema_version: SchemaVersion
303
329
if value. is_null ( ) {
304
330
continue ;
305
331
}
306
- if !valid_type ( field. data_type ( ) , value, schema_version) {
332
+ if !valid_type ( field, value, schema_version, static_schema_flag ) {
307
333
return false ;
308
334
}
309
335
} else {
@@ -317,7 +343,11 @@ fn valid_type(data_type: &DataType, value: &Value, schema_version: SchemaVersion
317
343
}
318
344
DataType :: Timestamp ( _, _) => value. is_string ( ) || value. is_number ( ) ,
319
345
_ => {
320
- error ! ( "Unsupported datatype {:?}, value {:?}" , data_type, value) ;
346
+ error ! (
347
+ "Unsupported datatype {:?}, value {:?}" ,
348
+ field. data_type( ) ,
349
+ value
350
+ ) ;
321
351
unreachable ! ( )
322
352
}
323
353
}
0 commit comments