Skip to content

Commit 236a6c3

Browse files
committed
compute correct days since epoch.
1 parent 646274c commit 236a6c3

File tree

1 file changed

+26
-36
lines changed
  • core/src/execution/datafusion/expressions

1 file changed

+26
-36
lines changed

core/src/execution/datafusion/expressions/cast.rs

Lines changed: 26 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,12 @@ use arrow_array::{
3434
PrimitiveArray,
3535
};
3636
use arrow_schema::{DataType, Schema};
37-
use chrono::{Datelike, NaiveDate, TimeZone, Timelike};
37+
use chrono::{NaiveDate, TimeZone, Timelike};
3838
use datafusion::logical_expr::ColumnarValue;
3939
use datafusion_common::{internal_err, Result as DataFusionResult, ScalarValue};
4040
use datafusion_physical_expr::PhysicalExpr;
41-
use log::info;
42-
use num::{traits::CheckedNeg, CheckedSub, Integer, Num};
41+
use num::{traits::CheckedNeg, CheckedSub, Integer, Num, ToPrimitive};
42+
use once_cell::sync::Lazy;
4343
use regex::Regex;
4444

4545
use crate::{
@@ -50,6 +50,7 @@ use crate::{
5050
};
5151

5252
static TIMESTAMP_FORMAT: Option<&str> = Some("%Y-%m-%d %H:%M:%S%.f");
53+
static EPOCH: Lazy<NaiveDate> = Lazy::new(|| NaiveDate::from_ymd_opt(1970, 1, 1).unwrap());
5354
static CAST_OPTIONS: CastOptions = CastOptions {
5455
safe: true,
5556
format_options: FormatOptions::new()
@@ -110,23 +111,7 @@ macro_rules! cast_utf8_to_timestamp {
110111
result
111112
}};
112113
}
113-
macro_rules! cast_utf8_to_date {
114-
($array:expr, $eval_mode:expr, $array_type:ty, $date_parser:ident) => {{
115-
let len = $array.len();
116-
let mut cast_array = PrimitiveArray::<$array_type>::builder(len);
117-
for i in 0..len {
118-
if $array.is_null(i) {
119-
cast_array.append_null()
120-
} else if let Ok(Some(cast_value)) = $date_parser($array.value(i).trim(), $eval_mode) {
121-
cast_array.append_value(cast_value);
122-
} else {
123-
cast_array.append_null()
124-
}
125-
}
126-
let result: ArrayRef = Arc::new(cast_array.finish()) as ArrayRef;
127-
result
128-
}};
129-
}
114+
130115
macro_rules! cast_float_to_string {
131116
($from:expr, $eval_mode:expr, $type:ty, $output_type:ty, $offset_type:ty) => {{
132117

@@ -411,7 +396,7 @@ impl Cast {
411396
if string_array.is_null(i) {
412397
cast_array.append_null()
413398
} else if let Ok(Some(cast_value)) =
414-
date_parser(string_array.value(i).trim(), eval_mode)
399+
date_parser(string_array.value(i), eval_mode)
415400
{
416401
cast_array.append_value(cast_value);
417402
} else {
@@ -1009,7 +994,6 @@ fn parse_str_to_time_only_timestamp(value: &str) -> CometResult<Option<i64>> {
1009994
}
1010995

1011996
fn date_parser(value: &str, eval_mode: EvalMode) -> CometResult<Option<i32>> {
1012-
info!("Date String is {:?}", value);
1013997
let value = value.trim();
1014998
if value.is_empty() {
1015999
return Ok(None);
@@ -1041,7 +1025,6 @@ fn date_parser(value: &str, eval_mode: EvalMode) -> CometResult<Option<i32>> {
10411025
_ => None,
10421026
};
10431027

1044-
info!("Returned Date is {:?}", date);
10451028
if date.is_none() && eval_mode == EvalMode::Ansi {
10461029
return Err(CometError::CastInvalidValue {
10471030
value: value.to_string(),
@@ -1051,14 +1034,17 @@ fn date_parser(value: &str, eval_mode: EvalMode) -> CometResult<Option<i32>> {
10511034
}
10521035

10531036
match date {
1054-
Some(date) => Ok(Some(date.num_days_from_ce())),
1037+
Some(date) => {
1038+
let duration_since_epoch = date.signed_duration_since(*EPOCH).num_days();
1039+
Ok(Some(duration_since_epoch.to_i32().unwrap()))
1040+
}
10551041
None => Ok(None),
10561042
}
10571043
}
10581044

10591045
#[cfg(test)]
10601046
mod tests {
1061-
use arrow::datatypes::{Date32Type, TimestampMicrosecondType};
1047+
use arrow::datatypes::TimestampMicrosecondType;
10621048
use arrow_array::StringArray;
10631049
use arrow_schema::TimeUnit;
10641050

@@ -1133,7 +1119,7 @@ mod tests {
11331119
//test valid dates for all eval modes
11341120
for date in &["2020", "2020-01", "2020-01-01", "2020-01-01T"] {
11351121
for eval_mode in &[EvalMode::Legacy, EvalMode::Ansi, EvalMode::Try] {
1136-
assert_eq!(date_parser(*date, *eval_mode).unwrap(), Some(737425));
1122+
assert_eq!(date_parser(*date, *eval_mode).unwrap(), Some(18262));
11371123
}
11381124
}
11391125

@@ -1175,24 +1161,28 @@ mod tests {
11751161
}
11761162

11771163
#[test]
1178-
fn test_cast_string_as_date() {
1164+
fn test_cast_string_to_date() {
1165+
// Create a StringArray with various date strings
11791166
let array: ArrayRef = Arc::new(StringArray::from(vec![
11801167
Some("2020"),
11811168
Some("2020-01"),
11821169
Some("2020-01-01"),
11831170
Some("2020-01-01T"),
11841171
]));
11851172

1186-
let string_array = array
1187-
.as_any()
1188-
.downcast_ref::<GenericStringArray<i32>>()
1189-
.expect("Expected a string array");
1190-
1191-
let eval_mode = EvalMode::Legacy;
1192-
let result = cast_utf8_to_date!(&string_array, eval_mode, Date32Type, date_parser);
1173+
// Invoke cast_string_to_date
1174+
let result =
1175+
Cast::cast_string_to_date(&array, &DataType::Date32, EvalMode::Legacy).unwrap();
11931176

1194-
assert_eq!(result.data_type(), &DataType::Date32);
1195-
assert_eq!(result.len(), 4);
1177+
// Verify that each element of the result is 18262
1178+
let date32_array = result
1179+
.as_any()
1180+
.downcast_ref::<arrow::array::Date32Array>()
1181+
.unwrap();
1182+
assert_eq!(date32_array.len(), 4);
1183+
date32_array
1184+
.iter()
1185+
.for_each(|v| assert_eq!(v.unwrap(), 18262));
11961186
}
11971187

11981188
#[test]

0 commit comments

Comments
 (0)