@@ -23,7 +23,7 @@ use serde::{Deserialize, Serialize};
23
23
use std:: str;
24
24
25
25
use arrow_schema:: { DataType , Field , Schema , TimeUnit } ;
26
- use std:: { collections:: HashMap , sync:: Arc } ;
26
+ use std:: { collections:: { HashMap , HashSet } , sync:: Arc } ;
27
27
#[ derive( Debug , Clone , PartialEq , Serialize , Deserialize ) ]
28
28
pub struct StaticSchema {
29
29
fields : Vec < SchemaFields > ,
@@ -87,7 +87,12 @@ pub fn convert_static_schema_to_arrow_schema(
87
87
}
88
88
}
89
89
}
90
+
91
+ let mut existing_field_names: HashSet < String > = HashSet :: new ( ) ;
92
+
90
93
for mut field in static_schema. fields {
94
+
95
+ validate_field_names ( & field. name , & mut existing_field_names) ?;
91
96
if !time_partition. is_empty ( ) && field. name == time_partition {
92
97
time_partition_exists = true ;
93
98
field. data_type = "datetime" . to_string ( ) ;
@@ -139,6 +144,7 @@ pub fn convert_static_schema_to_arrow_schema(
139
144
fn add_parseable_fields_to_static_schema (
140
145
parsed_schema : ParsedSchema ,
141
146
) -> Result < Arc < Schema > , AnyError > {
147
+
142
148
let mut schema: Vec < Arc < Field > > = Vec :: new ( ) ;
143
149
for field in parsed_schema. fields . iter ( ) {
144
150
let field = Field :: new ( field. name . clone ( ) , field. data_type . clone ( ) , field. nullable ) ;
@@ -176,3 +182,16 @@ fn default_dict_id() -> i64 {
176
182
fn default_dict_is_ordered ( ) -> bool {
177
183
false
178
184
}
185
+
186
+ fn validate_field_names ( field_name : & str , existing_fields : & mut HashSet < String > ) -> Result < ( ) , AnyError > {
187
+
188
+ if field_name. is_empty ( ) {
189
+ return Err ( anyhow ! ( "field names should not be empty" ) ) ;
190
+ }
191
+
192
+ if !existing_fields. insert ( field_name. to_string ( ) ) {
193
+ return Err ( anyhow ! ( "duplicate field name: {}" , field_name) ) ;
194
+ }
195
+
196
+ Ok ( ( ) )
197
+ }
0 commit comments