@@ -34,12 +34,12 @@ use arrow_array::{
3434 PrimitiveArray ,
3535} ;
3636use arrow_schema:: { DataType , Schema } ;
37- use chrono:: { Datelike , NaiveDate , TimeZone , Timelike } ;
37+ use chrono:: { NaiveDate , TimeZone , Timelike } ;
3838use datafusion:: logical_expr:: ColumnarValue ;
3939use datafusion_common:: { internal_err, Result as DataFusionResult , ScalarValue } ;
4040use datafusion_physical_expr:: PhysicalExpr ;
41- use log :: info ;
42- use num :: { traits :: CheckedNeg , CheckedSub , Integer , Num } ;
41+ use num :: { traits :: CheckedNeg , CheckedSub , Integer , Num , ToPrimitive } ;
42+ use once_cell :: sync :: Lazy ;
4343use regex:: Regex ;
4444
4545use crate :: {
@@ -50,6 +50,7 @@ use crate::{
5050} ;
5151
5252static TIMESTAMP_FORMAT : Option < & str > = Some ( "%Y-%m-%d %H:%M:%S%.f" ) ;
53+ static EPOCH : Lazy < NaiveDate > = Lazy :: new ( || NaiveDate :: from_ymd_opt ( 1970 , 1 , 1 ) . unwrap ( ) ) ;
5354static CAST_OPTIONS : CastOptions = CastOptions {
5455 safe : true ,
5556 format_options : FormatOptions :: new ( )
@@ -110,23 +111,7 @@ macro_rules! cast_utf8_to_timestamp {
110111 result
111112 } } ;
112113}
113- macro_rules! cast_utf8_to_date {
114- ( $array: expr, $eval_mode: expr, $array_type: ty, $date_parser: ident) => { {
115- let len = $array. len( ) ;
116- let mut cast_array = PrimitiveArray :: <$array_type>:: builder( len) ;
117- for i in 0 ..len {
118- if $array. is_null( i) {
119- cast_array. append_null( )
120- } else if let Ok ( Some ( cast_value) ) = $date_parser( $array. value( i) . trim( ) , $eval_mode) {
121- cast_array. append_value( cast_value) ;
122- } else {
123- cast_array. append_null( )
124- }
125- }
126- let result: ArrayRef = Arc :: new( cast_array. finish( ) ) as ArrayRef ;
127- result
128- } } ;
129- }
114+
130115macro_rules! cast_float_to_string {
131116 ( $from: expr, $eval_mode: expr, $type: ty, $output_type: ty, $offset_type: ty) => { {
132117
@@ -411,7 +396,7 @@ impl Cast {
411396 if string_array. is_null ( i) {
412397 cast_array. append_null ( )
413398 } else if let Ok ( Some ( cast_value) ) =
414- date_parser ( string_array. value ( i) . trim ( ) , eval_mode)
399+ date_parser ( string_array. value ( i) , eval_mode)
415400 {
416401 cast_array. append_value ( cast_value) ;
417402 } else {
@@ -1009,7 +994,6 @@ fn parse_str_to_time_only_timestamp(value: &str) -> CometResult<Option<i64>> {
1009994}
1010995
1011996fn date_parser ( value : & str , eval_mode : EvalMode ) -> CometResult < Option < i32 > > {
1012- info ! ( "Date String is {:?}" , value) ;
1013997 let value = value. trim ( ) ;
1014998 if value. is_empty ( ) {
1015999 return Ok ( None ) ;
@@ -1041,7 +1025,6 @@ fn date_parser(value: &str, eval_mode: EvalMode) -> CometResult<Option<i32>> {
10411025 _ => None ,
10421026 } ;
10431027
1044- info ! ( "Returned Date is {:?}" , date) ;
10451028 if date. is_none ( ) && eval_mode == EvalMode :: Ansi {
10461029 return Err ( CometError :: CastInvalidValue {
10471030 value : value. to_string ( ) ,
@@ -1051,14 +1034,17 @@ fn date_parser(value: &str, eval_mode: EvalMode) -> CometResult<Option<i32>> {
10511034 }
10521035
10531036 match date {
1054- Some ( date) => Ok ( Some ( date. num_days_from_ce ( ) ) ) ,
1037+ Some ( date) => {
1038+ let duration_since_epoch = date. signed_duration_since ( * EPOCH ) . num_days ( ) ;
1039+ Ok ( Some ( duration_since_epoch. to_i32 ( ) . unwrap ( ) ) )
1040+ }
10551041 None => Ok ( None ) ,
10561042 }
10571043}
10581044
10591045#[ cfg( test) ]
10601046mod tests {
1061- use arrow:: datatypes:: { Date32Type , TimestampMicrosecondType } ;
1047+ use arrow:: datatypes:: TimestampMicrosecondType ;
10621048 use arrow_array:: StringArray ;
10631049 use arrow_schema:: TimeUnit ;
10641050
@@ -1133,7 +1119,7 @@ mod tests {
11331119 //test valid dates for all eval modes
11341120 for date in & [ "2020" , "2020-01" , "2020-01-01" , "2020-01-01T" ] {
11351121 for eval_mode in & [ EvalMode :: Legacy , EvalMode :: Ansi , EvalMode :: Try ] {
1136- assert_eq ! ( date_parser( * date, * eval_mode) . unwrap( ) , Some ( 737425 ) ) ;
1122+ assert_eq ! ( date_parser( * date, * eval_mode) . unwrap( ) , Some ( 18262 ) ) ;
11371123 }
11381124 }
11391125
@@ -1175,24 +1161,28 @@ mod tests {
11751161 }
11761162
11771163 #[ test]
1178- fn test_cast_string_as_date ( ) {
1164+ fn test_cast_string_to_date ( ) {
1165+ // Create a StringArray with various date strings
11791166 let array: ArrayRef = Arc :: new ( StringArray :: from ( vec ! [
11801167 Some ( "2020" ) ,
11811168 Some ( "2020-01" ) ,
11821169 Some ( "2020-01-01" ) ,
11831170 Some ( "2020-01-01T" ) ,
11841171 ] ) ) ;
11851172
1186- let string_array = array
1187- . as_any ( )
1188- . downcast_ref :: < GenericStringArray < i32 > > ( )
1189- . expect ( "Expected a string array" ) ;
1190-
1191- let eval_mode = EvalMode :: Legacy ;
1192- let result = cast_utf8_to_date ! ( & string_array, eval_mode, Date32Type , date_parser) ;
1173+ // Invoke cast_string_to_date
1174+ let result =
1175+ Cast :: cast_string_to_date ( & array, & DataType :: Date32 , EvalMode :: Legacy ) . unwrap ( ) ;
11931176
1194- assert_eq ! ( result. data_type( ) , & DataType :: Date32 ) ;
1195- assert_eq ! ( result. len( ) , 4 ) ;
1177+ // Verify that each element of the result is 18262
1178+ let date32_array = result
1179+ . as_any ( )
1180+ . downcast_ref :: < arrow:: array:: Date32Array > ( )
1181+ . unwrap ( ) ;
1182+ assert_eq ! ( date32_array. len( ) , 4 ) ;
1183+ date32_array
1184+ . iter ( )
1185+ . for_each ( |v| assert_eq ! ( v. unwrap( ) , 18262 ) ) ;
11961186 }
11971187
11981188 #[ test]
0 commit comments