Skip to content

Commit cef7320

Browse files
authored
Merge pull request #65 from wfdewith/string-escapes
Store unescaped string in AST node
2 parents 30c0f8b + a7945a8 commit cef7320

File tree

14 files changed

+430
-271
lines changed

14 files changed

+430
-271
lines changed

.github/workflows/python-package.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
- name: Install dependencies
2828
run: |
2929
python -m pip install --upgrade pip
30-
python -m pip install flake8 pytest
30+
python -m pip install flake8 pytest pytest-subtests
3131
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
3232
- name: Lint with flake8
3333
run: |

luaparser/astnodes.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -628,18 +628,21 @@ class String(Expression):
628628
"""Define the Lua string expression.
629629
630630
Attributes:
631-
s: String value.
631+
s: String value in bytes.
632+
raw: Unescaped string
632633
delimiter: The string delimiter
633634
"""
634635

635636
def __init__(
636637
self,
637-
s: str,
638+
s: bytes,
639+
raw: str,
638640
delimiter: StringDelimiter = StringDelimiter.SINGLE_QUOTE,
639641
**kwargs
640642
):
641643
super(String, self).__init__("String", **kwargs)
642-
self.s: str = s
644+
self.s: bytes = s
645+
self.raw: str = raw
643646
self.delimiter: StringDelimiter = delimiter
644647

645648

luaparser/builder.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from luaparser.parser.LuaLexer import LuaLexer
1111
from luaparser.parser.LuaParser import LuaParser
1212
from luaparser.parser.LuaParserVisitor import LuaParserVisitor
13+
from luaparser.utils.string_literals import unescape_lua_string
1314

1415
TNode = TypeVar("TNode", bound=Node)
1516

@@ -659,9 +660,8 @@ def visitString(self, ctx: LuaParser.StringContext):
659660
elif p.match(lua_str):
660661
lua_str = p.search(lua_str).group(1)
661662

662-
# Eval string to unescape:
663-
try:
664-
lua_str = ast.literal_eval(F'"{lua_str}"')
665-
except:
666-
pass
667-
return String(lua_str, delimiter)
663+
if delimiter == StringDelimiter.DOUBLE_QUOTE or delimiter == StringDelimiter.SINGLE_QUOTE:
664+
unescaped_str = unescape_lua_string(lua_str)
665+
else:
666+
unescaped_str = lua_str.encode("utf-8")
667+
return String(unescaped_str, lua_str, delimiter)

luaparser/parser/LuaLexer.g4

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,11 +102,30 @@ fragment EscapeSequence:
102102
| UtfEscape
103103
;
104104
105-
fragment DecimalEscape: '\\' Digit | '\\' Digit Digit | '\\' [0-2] Digit Digit;
105+
fragment DecimalEscape:
106+
'\\'
107+
( Digit
108+
| Digit Digit
109+
| [0-1] Digit Digit
110+
| '2' ('5' [0-5] | [0-4] Digit)
111+
)
112+
;
106113
107114
fragment HexEscape: '\\' 'x' HexDigit HexDigit;
108115
109-
fragment UtfEscape: '\\' 'u{' HexDigit+ '}';
116+
fragment UtfEscape:
117+
'\\' 'u{'
118+
( HexDigit
119+
| HexDigit HexDigit
120+
| HexDigit HexDigit HexDigit
121+
| HexDigit HexDigit HexDigit HexDigit
122+
| HexDigit HexDigit HexDigit HexDigit HexDigit
123+
| HexDigit HexDigit HexDigit HexDigit HexDigit HexDigit
124+
| HexDigit HexDigit HexDigit HexDigit HexDigit HexDigit HexDigit
125+
| [0-7] HexDigit HexDigit HexDigit HexDigit HexDigit HexDigit HexDigit
126+
)
127+
'}'
128+
;
110129
111130
fragment Digit: [0-9];
112131

luaparser/parser/LuaLexer.interp

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

luaparser/parser/LuaLexer.py

Lines changed: 207 additions & 189 deletions
Large diffs are not rendered by default.

luaparser/printers.py

Lines changed: 15 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ def __init__(self, indent):
2828
def visit(self, node):
2929
return repr(node)
3030

31+
@visitor(bytes)
32+
def visit(self, node):
33+
return repr(node)
34+
3135
@visitor(float)
3236
def visit(self, node):
3337
return str(node)
@@ -124,41 +128,6 @@ def visit(self, node):
124128
return res
125129

126130

127-
escape_dict = {
128-
"\a": r"\a",
129-
"\b": r"\b",
130-
"\c": r"\c",
131-
"\f": r"\f",
132-
"\n": r"\n",
133-
"\r": r"\r",
134-
"\t": r"\t",
135-
"\v": r"\v",
136-
"'": r"\'",
137-
'"': r"\"",
138-
"\0": r"\0",
139-
"\1": r"\1",
140-
"\2": r"\2",
141-
"\3": r"\3",
142-
"\4": r"\4",
143-
"\5": r"\5",
144-
"\6": r"\6",
145-
"\7": r"\7",
146-
"\8": r"\8",
147-
"\9": r"\9",
148-
}
149-
150-
151-
def raw(text):
152-
"""Returns a raw string representation of text"""
153-
new_string = ""
154-
for char in text:
155-
try:
156-
new_string += escape_dict[char]
157-
except KeyError:
158-
new_string += char
159-
return new_string
160-
161-
162131
class HTMLStyleVisitor:
163132
def __init__(self):
164133
pass
@@ -172,6 +141,12 @@ def get_xml_string(self, tree):
172141

173142
return minidom.parseString(ElementTree.tostring(doc)).toprettyxml(indent=" ")
174143

144+
@visitor(bytes)
145+
def visit(self, node):
146+
if node.startswith(b'"') and node.endswith(b'"'):
147+
node = node[1:-1]
148+
return node
149+
175150
@visitor(str)
176151
def visit(self, node):
177152
if node.startswith('"') and node.endswith('"'):
@@ -208,6 +183,8 @@ def visit(self, node):
208183
child_node = self.visit(attrValue)
209184
if type(child_node) is str:
210185
xml_attr.text = child_node
186+
elif type(child_node) is bytes:
187+
pass
211188
elif type(child_node) is list:
212189
xml_attr.extend(child_node)
213190
else:
@@ -420,11 +397,11 @@ def visit(self, node: Number) -> str:
420397
@visit.register
421398
def visit(self, node: String) -> str:
422399
if node.delimiter == StringDelimiter.SINGLE_QUOTE:
423-
return "'" + self.do_visit(node.s) + "'"
400+
return "'" + self.do_visit(node.raw) + "'"
424401
elif node.delimiter == StringDelimiter.DOUBLE_QUOTE:
425-
return '"' + self.do_visit(node.s) + '"'
402+
return '"' + self.do_visit(node.raw) + '"'
426403
else:
427-
return "[[" + self.do_visit(node.s) + "]]"
404+
return "[[" + self.do_visit(node.raw) + "]]"
428405

429406
@visit.register
430407
def visit(self, node: Table):

luaparser/tests/test_comments.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def test_comment_in_table(self):
141141
),
142142
Field(
143143
Number(1),
144-
String("foo", StringDelimiter.DOUBLE_QUOTE),
144+
String(b"foo", "foo", StringDelimiter.DOUBLE_QUOTE),
145145
comments=[Comment("-- just a value")],
146146
between_brackets=True,
147147
),

luaparser/tests/test_expressions.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -368,8 +368,8 @@ def test_string_concatenation(self):
368368
targets=[Name("str")],
369369
values=[
370370
Concat(
371-
left=String("begin", StringDelimiter.DOUBLE_QUOTE),
372-
right=String("end", StringDelimiter.DOUBLE_QUOTE),
371+
left=String(b"begin", "begin", StringDelimiter.DOUBLE_QUOTE),
372+
right=String(b"end", "end", StringDelimiter.DOUBLE_QUOTE),
373373
)
374374
],
375375
)
@@ -429,11 +429,11 @@ def test_dict(self):
429429
[
430430
Field(
431431
Name("foo"),
432-
String("bar", StringDelimiter.DOUBLE_QUOTE),
432+
String(b"bar", "bar", StringDelimiter.DOUBLE_QUOTE),
433433
),
434434
Field(
435435
Name("bar"),
436-
String("foo", StringDelimiter.DOUBLE_QUOTE),
436+
String(b"foo", "foo", StringDelimiter.DOUBLE_QUOTE),
437437
),
438438
]
439439
)
@@ -467,7 +467,7 @@ def test_nested_dict(self):
467467
[
468468
Field(
469469
Name("car"),
470-
Table([Field(Name("name"), String("bmw"))]),
470+
Table([Field(Name("name"), String(b"bmw", "bmw"))]),
471471
),
472472
Field(
473473
Name("options"),
@@ -555,15 +555,15 @@ def test_mix_dict_array(self):
555555
),
556556
Field(
557557
Number(1),
558-
String("enabled", StringDelimiter.DOUBLE_QUOTE),
558+
String(b"enabled", "enabled", StringDelimiter.DOUBLE_QUOTE),
559559
between_brackets=True,
560560
),
561561
Field(Number(2), Number(157), between_brackets=True),
562562
Field(
563563
TrueExpr(), FalseExpr(), between_brackets=True
564564
),
565565
Field(
566-
String("true"),
566+
String(b"true", "true"),
567567
TrueExpr(),
568568
between_brackets=True,
569569
),
@@ -588,7 +588,7 @@ def test_function_call_simple(self):
588588
[
589589
Call(
590590
func=Name("print"),
591-
args=[String("hello", StringDelimiter.DOUBLE_QUOTE)],
591+
args=[String(b"hello", "hello", StringDelimiter.DOUBLE_QUOTE)],
592592
)
593593
]
594594
)
@@ -602,7 +602,7 @@ def test_function_call_no_par_string(self):
602602
[
603603
Call(
604604
func=Name("print"),
605-
args=[String("hello", StringDelimiter.DOUBLE_QUOTE)],
605+
args=[String(b"hello", "hello", StringDelimiter.DOUBLE_QUOTE)],
606606
style=CallStyle.NO_PARENTHESIS,
607607
)
608608
]
@@ -630,7 +630,7 @@ def test_function_invoke(self):
630630
Invoke(
631631
source=Name("foo"),
632632
func=Name("print"),
633-
args=[String("hello", StringDelimiter.DOUBLE_QUOTE)],
633+
args=[String(b"hello", "hello", StringDelimiter.DOUBLE_QUOTE)],
634634
)
635635
]
636636
)
@@ -645,7 +645,7 @@ def test_function_nested_invoke(self):
645645
Invoke(
646646
source=Invoke(source=Name("foo"), func=Name("bar"), args=[]),
647647
func=Name("print"),
648-
args=[String("hello", StringDelimiter.DOUBLE_QUOTE)],
648+
args=[String(b"hello", "hello", StringDelimiter.DOUBLE_QUOTE)],
649649
)
650650
]
651651
)
@@ -660,7 +660,7 @@ def test_function_call_args(self):
660660
Call(
661661
func=Name("print"),
662662
args=[
663-
String("hello", StringDelimiter.DOUBLE_QUOTE),
663+
String(b"hello", "hello", StringDelimiter.DOUBLE_QUOTE),
664664
Number(n=42),
665665
],
666666
)
@@ -708,7 +708,7 @@ def test_function_exp_invoke(self):
708708
notation=IndexNotation.SQUARE
709709
),
710710
func=Name("hello"),
711-
args=[String("ok", delimiter=StringDelimiter.DOUBLE_QUOTE)],
711+
args=[String(b"ok", "ok", delimiter=StringDelimiter.DOUBLE_QUOTE)],
712712
style=CallStyle.NO_PARENTHESIS,
713713
)
714714
]

luaparser/tests/test_integration.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def test_cont_int_1(self):
3434
Call(
3535
Name("describe"),
3636
[
37-
String("", StringDelimiter.DOUBLE_QUOTE),
37+
String(b"", "", StringDelimiter.DOUBLE_QUOTE),
3838
AnonymousFunction(
3939
[],
4040
Block(
@@ -119,7 +119,7 @@ def test_cont_int_3(self):
119119
tree = ast.parse(textwrap.dedent(r"""print(x['a'])"""))
120120
exp = Chunk(Block([Call(
121121
func=Name("print"),
122-
args=[Index(idx=String("a"), value=Name("x"), notation=IndexNotation.SQUARE)],
122+
args=[Index(idx=String(b"a", "a"), value=Name("x"), notation=IndexNotation.SQUARE)],
123123
)]))
124124
self.assertEqual(exp, tree)
125125

@@ -171,9 +171,10 @@ def test_cont_int_4(self):
171171
id: 'print'
172172
args: [] 1 item
173173
0: {} 1 key
174-
String: {} 4 keys
174+
String: {} 5 keys
175175
wrapped: False
176-
s: 'hello world !'
176+
s: b'hello world !'
177+
raw: 'hello world !'
177178
delimiter: SINGLE_QUOTE
178179
style: DEFAULT
179180
1: {} 1 key
@@ -255,6 +256,7 @@ def test_cont_int_6(self):
255256
Field(
256257
Name("mykey"),
257258
String(
259+
b"myvalue",
258260
"myvalue",
259261
delimiter=StringDelimiter.DOUBLE_QUOTE,
260262
),
@@ -323,7 +325,7 @@ def test_cont_int_10(self):
323325
If(
324326
test=AndLoOp(left=GreaterThanOp(left=ULengthOP(Name("setting")), right=Number(10)),
325327
right=EqToOp(left=Name("setting_name"),
326-
right=String("user", StringDelimiter.DOUBLE_QUOTE))),
328+
right=String(b"user", "user", StringDelimiter.DOUBLE_QUOTE))),
327329
body=Block([
328330
Return([Number(100)])
329331
]),
@@ -340,7 +342,7 @@ def test_cont_int_12(self):
340342
"""))
341343
exp = Chunk(
342344
Block([
343-
Assign([Name("a")], [String("\x00\n\ta", StringDelimiter.SINGLE_QUOTE)])
345+
Assign([Name("a")], [String(b"\x00\n\ta", "\\0\\n\\ta", StringDelimiter.SINGLE_QUOTE)])
344346
])
345347
)
346348
self.assertEqual(exp, tree)

0 commit comments

Comments
 (0)