Skip to content

Commit 4872a0b

Browse files
committed
Rework setup grammar script to better handle more targets.
1 parent a002e68 commit 4872a0b

File tree

1 file changed

+95
-35
lines changed

1 file changed

+95
-35
lines changed

setup_grammar.py

Lines changed: 95 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,70 +1,130 @@
1+
import datetime
12
import pathlib
23
import subprocess
34
from enum import Enum
45

56
import requests
67

7-
GRAMMAR = {
8-
'lexer': {
9-
'url': 'https://raw.githubusercontent.com/crate/crate/master/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseLexer.g4',
10-
'filename': 'SqlBaseLexer.g4'
8+
9+
class Antlr4Target(Enum):
10+
js = 'JavaScript'
11+
python = 'Python3'
12+
13+
14+
build_options = {
15+
'antlr4_compiled_target_output': {
16+
Antlr4Target.js: 'cratedb_sqlparse_js',
17+
Antlr4Target.python: 'cratedb_sqlparse_py'
1118
},
12-
'parser': {
13-
'url': 'https://raw.githubusercontent.com/crate/crate/master/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseParser.g4',
14-
'filename': 'SqlBaseParser.g4'
15-
}
1619

20+
'antlr4_compiled_target_subdir': 'parser/generated_parser',
21+
22+
# List of '.g4' files that will be built
23+
'files': [
24+
{
25+
'url': 'https://github.com/crate/crate/raw/{version}/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseLexer.g4',
26+
'filename': 'SqlBaseLexer.g4'
27+
},
28+
{
29+
'url': 'https://github.com/crate/crate/raw/{version}/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseParser.g4',
30+
'filename': 'SqlBaseParser.g4'
31+
}
32+
]
1733
}
1834

1935
PARSER_COMPILE_PATH = pathlib.Path(__file__).parent
2036

2137

22-
class Target(Enum):
23-
js = 'JavaScript'
24-
python = 'Python3'
25-
26-
27-
def download_cratedb_grammar():
38+
def download_cratedb_grammar(version='master'):
2839
"""
29-
Downloads CrateDB's grammar files.
40+
Downloads CrateDB's `version` grammar files.
41+
42+
Version should match a tag; for a list of tags run:
43+
$ curl https://api.github.com/repos/crate/crate/tags | jq -r '.[] | .name'
3044
"""
31-
for file in GRAMMAR.values():
32-
response = requests.get(file['url'])
33-
with open(str(pathlib.Path(__file__).parent / file['filename']),
34-
'w') as f:
35-
f.write(response.text)
45+
for file in build_options['files']:
46+
response = requests.get(file['url'].format(version=version))
47+
48+
# We annotate the CrateDB branch and date of download to the Grammar files for reference.
49+
text = f'/* crate_branch={version}, at={datetime.datetime.now()}, annotatedby=cratedb_sqlparse */\n' + response.text
3650

51+
with open(str(PARSER_COMPILE_PATH.parent / file['filename']), 'w') as f:
52+
f.write(text)
3753

38-
def compile_grammar(target: Target, path: str):
54+
55+
def compile_grammar(target: Antlr4Target):
3956
"""
40-
Compiles antlr4 files into Python code.
57+
Compiles antlr4 files into `target` code.
4158
"""
42-
for file in GRAMMAR.values():
59+
60+
base_dir = build_options['antlr4_compiled_target_output'][target]
61+
sub_dir = build_options['antlr4_compiled_target_subdir']
62+
63+
for file in build_options['files']:
4364
subprocess.run(
4465
[
4566
'antlr4', f'-Dlanguage={target.value}',
46-
'-o', str(PARSER_COMPILE_PATH / path),
67+
'-o',
68+
str(PARSER_COMPILE_PATH / base_dir / sub_dir),
4769
file['filename']
4870
]
4971
)
5072

5173

52-
def patch_lexer():
74+
def patch_lexer(target: Antlr4Target):
75+
"""
76+
Patches the lexer file, removing bad syntax generated by Antlr4.
77+
"""
78+
5379
REMOVE_LINES = [
5480
'import io.crate.sql.AbstractSqlBaseLexer;',
5581
]
56-
sqlbaselexer_pyfile = PARSER_COMPILE_PATH / GRAMMAR['lexer']['filename'].replace('g4', 'py')
57-
text = pathlib.Path(sqlbaselexer_pyfile).read_text()
5882

59-
# We remove lines that do not properly work.
83+
# If more targets are added, this needs to be improved.
84+
extension = 'py' if target == Antlr4Target.python else 'js'
85+
86+
base_dir = build_options['antlr4_compiled_target_output'][target]
87+
sub_dir = build_options['antlr4_compiled_target_subdir']
88+
file_name = build_options['files'][0]['filename'].replace('g4', extension)
89+
90+
lexer_file = PARSER_COMPILE_PATH / base_dir / sub_dir / file_name
91+
92+
text = pathlib.Path(lexer_file).read_text()
93+
6094
for text_to_remove in REMOVE_LINES:
61-
text = text.replace(text_to_remove,
62-
'# Code removed by cratedb_sqlparse.setup_grammar.patch_lexer')
95+
text = text.replace(text_to_remove, '')
96+
97+
pathlib.Path(lexer_file).write_text(text)
98+
99+
100+
def set_version(target: Antlr4Target, version: str):
101+
"""
102+
Specifies the compiled version to the target package,
103+
depending on the package the strategy differs.
104+
"""
105+
base_dir = build_options['antlr4_compiled_target_output'][target]
106+
sub_dir = build_options['antlr4_compiled_target_subdir']
107+
108+
target_path = (PARSER_COMPILE_PATH / base_dir / sub_dir).parent
109+
110+
version = f'"{version}"' # Escape quotes on echo command.
111+
112+
index_file = ''
113+
variable = ''
114+
115+
if target == Antlr4Target.python:
116+
index_file = '__init__.py'
117+
variable = '__cratedb_version__'
63118

64-
pathlib.Path(sqlbaselexer_pyfile).write_text(text)
119+
if target == Antlr4Target.js:
120+
index_file = 'index.js'
121+
variable = 'export const __cratedb_version__'
65122

123+
with open(target_path / index_file, "a") as f:
124+
f.write(f"{variable} = {version}\n")
66125

67-
if __name__ == '__main__':
68-
download_cratedb_grammar()
69-
compile_grammar(Target.js, 'cratedb_sqlparse_js/parser/generated_parser')
70-
patch_lexer()
126+
# if __name__ == '__main__':
127+
# download_cratedb_grammar('5.6.4')
128+
# compile_grammar(Antlr4Target.js)
129+
# patch_lexer(Antlr4Target.js)
130+
set_version(Antlr4Target.js, '5.45.4')

0 commit comments

Comments
 (0)