Skip to content

Commit 12e5c34

Browse files
committed
Rework setup grammar script to better handle more targets.
1 parent a002e68 commit 12e5c34

File tree

1 file changed

+92
-34
lines changed

1 file changed

+92
-34
lines changed

setup_grammar.py

Lines changed: 92 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,70 +1,128 @@
1+
import datetime
12
import pathlib
23
import subprocess
34
from enum import Enum
45

56
import requests
67

7-
GRAMMAR = {
8-
'lexer': {
9-
'url': 'https://raw.githubusercontent.com/crate/crate/master/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseLexer.g4',
10-
'filename': 'SqlBaseLexer.g4'
8+
9+
class Antlr4Target(Enum):
10+
js = 'JavaScript'
11+
python = 'Python3'
12+
13+
14+
build_options = {
15+
'antlr4_compiled_target_output': {
16+
Antlr4Target.js: 'cratedb_sqlparse_js',
17+
Antlr4Target.python: 'cratedb_sqlparse_py'
1118
},
12-
'parser': {
13-
'url': 'https://raw.githubusercontent.com/crate/crate/master/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseParser.g4',
14-
'filename': 'SqlBaseParser.g4'
15-
}
1619

20+
'antlr4_compiled_target_subdir': 'parser/generated_parser',
21+
22+
# List of '.g4' files that will be built
23+
'files': [
24+
{
25+
'url': 'https://raw.githubusercontent.com/crate/crate/{version}/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseLexer.g4',
26+
'filename': 'SqlBaseLexer.g4'
27+
},
28+
{
29+
'url': 'https://raw.githubusercontent.com/crate/crate/{version}/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseParser.g4',
30+
'filename': 'SqlBaseParser.g4'
31+
}
32+
]
1733
}
1834

19-
PARSER_COMPILE_PATH = pathlib.Path(__file__).parent
20-
2135

22-
class Target(Enum):
23-
js = 'JavaScript'
24-
python = 'Python3'
36+
PARSER_COMPILE_PATH = pathlib.Path(__file__).parent
2537

2638

27-
def download_cratedb_grammar():
39+
def download_cratedb_grammar(version='master'):
2840
"""
29-
Downloads CrateDB's grammar files.
41+
Downloads CrateDB's `version` grammar files.
42+
43+
Version should match a tag; for a list of tags run:
44+
$ curl https://api.github.com/repos/crate/crate/tags | jq -r '.[] | .name'
3045
"""
31-
for file in GRAMMAR.values():
32-
response = requests.get(file['url'])
33-
with open(str(pathlib.Path(__file__).parent / file['filename']),
34-
'w') as f:
35-
f.write(response.text)
46+
for file in build_options['files']:
47+
response = requests.get(file['url'].format(version=version))
3648

49+
# We annotate the CrateDB branch and date of download to the Grammar files for reference.
50+
text = f'/* crate_branch={version}, at={datetime.datetime.now()}, annotatedby=cratedb_sqlparse */\n' + response.text
3751

38-
def compile_grammar(target: Target, path: str):
52+
with open(str(pathlib.Path(__file__).parent / file['filename']), 'w') as f:
53+
f.write(text)
54+
55+
56+
def compile_grammar(target: Antlr4Target):
3957
"""
40-
Compiles antlr4 files into Python code.
58+
Compiles antlr4 files into `target` code.
4159
"""
42-
for file in GRAMMAR.values():
60+
61+
base_dir = build_options['antlr4_compiled_target_output'][target]
62+
sub_dir = build_options['antlr4_compiled_target_subdir']
63+
64+
for file in build_options['files']:
4365
subprocess.run(
4466
[
4567
'antlr4', f'-Dlanguage={target.value}',
46-
'-o', str(PARSER_COMPILE_PATH / path),
68+
'-o',
69+
str(PARSER_COMPILE_PATH / base_dir / sub_dir),
4770
file['filename']
4871
]
4972
)
5073

5174

52-
def patch_lexer():
75+
def patch_lexer(target: Antlr4Target):
76+
"""
77+
Patches the lexer file, removing bad syntax generated by Antlr4.
78+
"""
79+
5380
REMOVE_LINES = [
5481
'import io.crate.sql.AbstractSqlBaseLexer;',
5582
]
56-
sqlbaselexer_pyfile = PARSER_COMPILE_PATH / GRAMMAR['lexer']['filename'].replace('g4', 'py')
57-
text = pathlib.Path(sqlbaselexer_pyfile).read_text()
5883

59-
# We remove lines that do not properly work.
84+
# If more targets are added, this needs to be improved.
85+
extension = 'py' if target == Antlr4Target.python else 'js'
86+
87+
base_dir = build_options['antlr4_compiled_target_output'][target]
88+
sub_dir = build_options['antlr4_compiled_target_subdir']
89+
file_name = build_options['files'][0]['filename'].replace('g4', extension)
90+
91+
lexer_file = PARSER_COMPILE_PATH / base_dir / sub_dir / file_name
92+
93+
text = pathlib.Path(lexer_file).read_text()
94+
6095
for text_to_remove in REMOVE_LINES:
61-
text = text.replace(text_to_remove,
62-
'# Code removed by cratedb_sqlparse.setup_grammar.patch_lexer')
96+
text = text.replace(text_to_remove, '')
97+
98+
pathlib.Path(lexer_file).write_text(text)
99+
100+
101+
def set_version(target: Antlr4Target, version: str):
102+
"""
103+
Specifies the compiled version to the target package,
104+
depending on the package the strategy differs.
105+
"""
106+
base_dir = build_options['antlr4_compiled_target_output'][target]
107+
sub_dir = build_options['antlr4_compiled_target_subdir']
108+
109+
target_path = (PARSER_COMPILE_PATH / base_dir / sub_dir).parent
110+
111+
version = f'"{version}"' # Escape quotes on echo command.
112+
113+
if target == Antlr4Target.python:
114+
subprocess.run([
115+
f"\necho __cratedb_version__ = {repr(version)} >> {target_path / '__init__.py'}",
116+
], shell=True)
63117

64-
pathlib.Path(sqlbaselexer_pyfile).write_text(text)
118+
if target == Antlr4Target.js:
119+
subprocess.run([
120+
f"\necho export const __cratedb_version__ = {repr(version)} >> {target_path / 'index.js'}",
121+
], shell=True)
65122

66123

67124
if __name__ == '__main__':
68-
download_cratedb_grammar()
69-
compile_grammar(Target.js, 'cratedb_sqlparse_js/parser/generated_parser')
70-
patch_lexer()
125+
download_cratedb_grammar('5.6.4')
126+
compile_grammar(Antlr4Target.js)
127+
patch_lexer(Antlr4Target.js)
128+
set_version(Antlr4Target.python, '5.6.4')

0 commit comments

Comments
 (0)