Skip to content

Commit e2f738a

Browse files
author
Emil Ejbyfeldt
committed
Fix overflow in date_parser
The date_parser was introduced in apache#383 and is mostly a direct port of code in Spark. Since the code uses the JVM it has defined integer overflow as wrapping. The proposed fixed is to use std::num::Wrapping to get the same wrapping behavior in rust. The overflown value will still be disgarded in a later check that uses `current_segment_digits` so allowing the overflow does not lead to correctness issues. This resolves one of the overflows discussed in apache#481
1 parent c1cdf46 commit e2f738a

File tree

1 file changed

+11
-7
lines changed
  • core/src/execution/datafusion/expressions

1 file changed

+11
-7
lines changed

core/src/execution/datafusion/expressions/cast.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use std::{
1919
any::Any,
2020
fmt::{Debug, Display, Formatter},
2121
hash::{Hash, Hasher},
22+
num::Wrapping,
2223
sync::Arc,
2324
};
2425

@@ -1570,7 +1571,7 @@ fn date_parser(date_str: &str, eval_mode: EvalMode) -> CometResult<Option<i32>>
15701571
let mut date_segments = [1, 1, 1];
15711572
let mut sign = 1;
15721573
let mut current_segment = 0;
1573-
let mut current_segment_value = 0;
1574+
let mut current_segment_value = Wrapping(0);
15741575
let mut current_segment_digits = 0;
15751576
let bytes = date_str.as_bytes();
15761577

@@ -1597,16 +1598,16 @@ fn date_parser(date_str: &str, eval_mode: EvalMode) -> CometResult<Option<i32>>
15971598
return return_result(date_str, eval_mode);
15981599
}
15991600
//if valid update corresponding segment with the current segment value.
1600-
date_segments[current_segment as usize] = current_segment_value;
1601-
current_segment_value = 0;
1601+
date_segments[current_segment as usize] = current_segment_value.0;
1602+
current_segment_value = Wrapping(0);
16021603
current_segment_digits = 0;
16031604
current_segment += 1;
16041605
} else if !b.is_ascii_digit() {
16051606
return return_result(date_str, eval_mode);
16061607
} else {
16071608
//increment value of current segment by the next digit
1608-
let parsed_value = (b - b'0') as i32;
1609-
current_segment_value = current_segment_value * 10 + parsed_value;
1609+
let parsed_value = Wrapping((b - b'0') as i32);
1610+
current_segment_value = current_segment_value * Wrapping(10) + parsed_value;
16101611
current_segment_digits += 1;
16111612
}
16121613
j += 1;
@@ -1622,7 +1623,7 @@ fn date_parser(date_str: &str, eval_mode: EvalMode) -> CometResult<Option<i32>>
16221623
return return_result(date_str, eval_mode);
16231624
}
16241625

1625-
date_segments[current_segment as usize] = current_segment_value;
1626+
date_segments[current_segment as usize] = current_segment_value.0;
16261627

16271628
match NaiveDate::from_ymd_opt(
16281629
sign * date_segments[0],
@@ -1836,6 +1837,8 @@ mod tests {
18361837
Some(" 202 "),
18371838
Some("\n 2020-\r8 "),
18381839
Some("2020-01-01T"),
1840+
// Overflows i32
1841+
Some("-4607172990231812908"),
18391842
]));
18401843

18411844
for eval_mode in &[EvalMode::Legacy, EvalMode::Try] {
@@ -1857,7 +1860,8 @@ mod tests {
18571860
None,
18581861
None,
18591862
None,
1860-
Some(18262)
1863+
Some(18262),
1864+
None
18611865
]
18621866
);
18631867
}

0 commit comments

Comments
 (0)