Skip to content

Commit 13c7463

Browse files
committed
awk: fix string parsing
awk: fix string grammar
1 parent 60e235d commit 13c7463

File tree

2 files changed

+63
-8
lines changed

2 files changed

+63
-8
lines changed

awk/src/compiler.rs

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -600,15 +600,16 @@ impl Compiler {
600600
))
601601
}
602602
Rule::string => {
603-
let index = self.push_constant(Constant::String(
604-
escape_string_contents(primary.as_str().trim_matches('"'))
605-
.map_err(|e| pest_error_from_span(primary.as_span(), e))?,
606-
));
603+
let span = primary.as_span();
604+
let string_line_col = primary.line_col();
605+
let str = escape_string_contents(first_child(primary).as_str())
606+
.map_err(|e| pest_error_from_span(span, e))?;
607+
let index = self.push_constant(Constant::String(str));
607608
Ok(Expr::new(
608609
ExprKind::String,
609610
Instructions::from_instructions_and_line_col(
610611
vec![OpCode::PushConstant(index)],
611-
primary.line_col(),
612+
string_line_col,
612613
),
613614
))
614615
}
@@ -1960,9 +1961,51 @@ mod test {
19601961

19611962
#[test]
19621963
fn test_compile_string() {
1964+
let (_, constants) = compile_expr(r#""""#);
1965+
assert_eq!(constants, vec!["".into()]);
1966+
19631967
let (_, constants) = compile_expr(r#""hello""#);
19641968
assert_eq!(constants, vec!["hello".into()]);
19651969

1970+
let (_, constants) = compile_expr(r#""\"""#);
1971+
assert_eq!(constants, vec![Constant::from("\"")]);
1972+
1973+
let (_, constants) = compile_expr(r#""\/""#);
1974+
assert_eq!(constants, vec![Constant::from("/")]);
1975+
1976+
let (_, constants) = compile_expr(r#""\a""#);
1977+
assert_eq!(constants, vec![Constant::from("\x07")]);
1978+
1979+
let (_, constants) = compile_expr(r#""\b""#);
1980+
assert_eq!(constants, vec![Constant::from("\x08")]);
1981+
1982+
let (_, constants) = compile_expr(r#""\f""#);
1983+
assert_eq!(constants, vec![Constant::from("\x0C")]);
1984+
1985+
let (_, constants) = compile_expr(r#""\n""#);
1986+
assert_eq!(constants, vec![Constant::from("\n")]);
1987+
1988+
let (_, constants) = compile_expr(r#""\r""#);
1989+
assert_eq!(constants, vec![Constant::from("\r")]);
1990+
1991+
let (_, constants) = compile_expr(r#""\t""#);
1992+
assert_eq!(constants, vec![Constant::from("\t")]);
1993+
1994+
let (_, constants) = compile_expr(r#""\v""#);
1995+
assert_eq!(constants, vec![Constant::from("\x0B")]);
1996+
1997+
let (_, constants) = compile_expr(r#""\\""#);
1998+
assert_eq!(constants, vec![Constant::from("\\")]);
1999+
2000+
let (_, constants) = compile_expr(r#""\7""#);
2001+
assert_eq!(constants, vec![Constant::from("\x07")]);
2002+
2003+
let (_, constants) = compile_expr(r#""\41""#);
2004+
assert_eq!(constants, vec![Constant::from("!")]);
2005+
2006+
let (_, constants) = compile_expr(r#""\142""#);
2007+
assert_eq!(constants, vec![Constant::from("b")]);
2008+
19662009
let (_, constants) = compile_expr(r#""hello\nworld""#);
19672010
assert_eq!(constants, vec![Constant::from("hello\nworld")]);
19682011

@@ -1980,6 +2023,12 @@ mod test {
19802023

19812024
let (_, constants) = compile_expr(r#""hello\141world""#);
19822025
assert_eq!(constants, vec![Constant::from("helloaworld")]);
2026+
2027+
let (_, constants) = compile_expr(r#""\\""#);
2028+
assert_eq!(constants, vec![Constant::from("\\")]);
2029+
2030+
let (_, constants) = compile_expr(r#""\"""#);
2031+
assert_eq!(constants, vec![Constant::from("\"")]);
19832032
}
19842033

19852034
#[test]

awk/src/grammar.pest

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,13 @@
1010
WHITESPACE = _{ " " | "\t" | "\r" }
1111
COMMENT = _{ "#" ~ (!"\n" ~ ANY)* ~ &"\n" }
1212

13-
string = { "\"" ~ (("\\\"") | (!("\"" | "\n") ~ ANY))* ~ "\"" }
13+
string = ${ "\"" ~ string_contents ~ "\"" }
14+
string_contents = @{ string_char* }
15+
string_char = {
16+
!("\"" | "\\") ~ ANY
17+
| "\\" ~ ("\"" | "/" | "a" | "b" | "f" | "n" | "r" | "t" | "v" | "\\")
18+
| "\\" ~ ASCII_DIGIT{1, 3}
19+
}
1420
ere = { "/" ~ (("\\" ~ "/") | (!("/" | "\n") ~ ANY))* ~ "/" }
1521
number = @{ decimal_float | integer }
1622
digit = { ('0'..'9') }
@@ -46,7 +52,7 @@ builtin_func = _{
4652
| tolower
4753
| toupper
4854
| close
49-
| fflush
55+
| fflush
5056
| system
5157
}
5258

@@ -163,7 +169,7 @@ ut_for = { "for" ~ "(" ~ simple_statement? ~ ";" ~ expr? ~ ";" ~ simple_stat
163169
ut_foreach = { "for" ~ "(" ~ name ~ in_op ~ name ~ ")" ~ opt_newline ~ unterminated_statement }
164170

165171
terminatable_statement = _{
166-
nextfile
172+
nextfile
167173
| next
168174
| break_stmt
169175
| continue_stmt

0 commit comments

Comments
 (0)