Skip to content

Commit 3cee271

Browse files
author
Devdutt Shenoi
committed
refactor: directly construct Field
1 parent 7616d47 commit 3cee271

File tree

1 file changed

+21
-64
lines changed

1 file changed

+21
-64
lines changed

src/static_schema.rs

Lines changed: 21 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,15 @@
1616
*
1717
*/
1818

19-
use std::{collections::HashMap, sync::Arc};
19+
use std::sync::Arc;
2020

2121
use arrow_schema::{DataType, Field, Schema, TimeUnit};
2222
use serde::{Deserialize, Serialize};
2323

2424
use crate::event::DEFAULT_TIMESTAMP_KEY;
2525

26+
const DEFAULT_NULLABLE: bool = true;
27+
2628
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
2729
pub struct StaticSchema {
2830
fields: Vec<SchemaFields>,
@@ -48,7 +50,7 @@ impl StaticSchema {
4850
time_partition: &str,
4951
custom_partition: Option<&String>,
5052
) -> Result<Arc<Schema>, StaticSchemaError> {
51-
let mut fields = Vec::new();
53+
let mut schema: Vec<Arc<Field>> = Vec::new();
5254
let mut time_partition_exists = false;
5355

5456
if let Some(custom_partition) = custom_partition {
@@ -68,50 +70,30 @@ impl StaticSchema {
6870
field.data_type = "datetime".to_string();
6971
}
7072

71-
let parsed_field = Fields {
72-
name: field.name.clone(),
73-
74-
data_type: {
75-
match field.data_type.as_str() {
76-
"int" => DataType::Int64,
77-
"double" | "float" => DataType::Float64,
78-
"boolean" => DataType::Boolean,
79-
"string" => DataType::Utf8,
80-
"datetime" => DataType::Timestamp(TimeUnit::Millisecond, None),
81-
"string_list" => {
82-
DataType::List(Arc::new(Field::new("item", DataType::Utf8, true)))
83-
}
84-
"int_list" => {
85-
DataType::List(Arc::new(Field::new("item", DataType::Int64, true)))
86-
}
87-
"double_list" | "float_list" => {
88-
DataType::List(Arc::new(Field::new("item", DataType::Float64, true)))
89-
}
90-
"boolean_list" => {
91-
DataType::List(Arc::new(Field::new("item", DataType::Boolean, true)))
92-
}
93-
_ => DataType::Null,
94-
}
95-
},
96-
nullable: default_nullable(),
97-
dict_id: default_dict_id(),
98-
dict_is_ordered: default_dict_is_ordered(),
99-
metadata: HashMap::new(),
73+
let data_type = match field.data_type.as_str() {
74+
"int" => DataType::Int64,
75+
"double" | "float" => DataType::Float64,
76+
"boolean" => DataType::Boolean,
77+
"string" => DataType::Utf8,
78+
"datetime" => DataType::Timestamp(TimeUnit::Millisecond, None),
79+
"string_list" => DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))),
80+
"int_list" => DataType::List(Arc::new(Field::new("item", DataType::Int64, true))),
81+
"double_list" | "float_list" => {
82+
DataType::List(Arc::new(Field::new("item", DataType::Float64, true)))
83+
}
84+
"boolean_list" => {
85+
DataType::List(Arc::new(Field::new("item", DataType::Boolean, true)))
86+
}
87+
_ => DataType::Null,
10088
};
101-
102-
fields.push(parsed_field);
89+
let field = Field::new(&field.name, data_type, DEFAULT_NULLABLE);
90+
schema.push(Arc::new(field));
10391
}
10492

10593
if !time_partition.is_empty() && !time_partition_exists {
10694
return Err(StaticSchemaError::MissingTime(time_partition.to_owned()));
10795
}
10896

109-
let mut schema: Vec<Arc<Field>> = Vec::new();
110-
for field in fields {
111-
let field = Field::new(field.name.clone(), field.data_type.clone(), field.nullable);
112-
schema.push(Arc::new(field));
113-
}
114-
11597
// add the p_timestamp field to the event schema to the 0th index
11698
schema.insert(
11799
0,
@@ -132,28 +114,3 @@ pub struct SchemaFields {
132114
name: String,
133115
data_type: String,
134116
}
135-
136-
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
137-
#[serde(rename_all = "camelCase")]
138-
pub struct Fields {
139-
name: String,
140-
data_type: DataType,
141-
nullable: bool,
142-
dict_id: i64,
143-
dict_is_ordered: bool,
144-
metadata: HashMap<String, String>,
145-
}
146-
147-
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
148-
149-
pub struct Metadata {}
150-
151-
fn default_nullable() -> bool {
152-
true
153-
}
154-
fn default_dict_id() -> i64 {
155-
0
156-
}
157-
fn default_dict_is_ordered() -> bool {
158-
false
159-
}

0 commit comments

Comments
 (0)