|
| 1 | +import datetime |
1 | 2 | import pathlib |
2 | 3 | import subprocess |
3 | 4 | from enum import Enum |
4 | 5 |
|
5 | 6 | import requests |
6 | 7 |
|
7 | | -GRAMMAR = { |
8 | | - 'lexer': { |
9 | | - 'url': 'https://raw.githubusercontent.com/crate/crate/master/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseLexer.g4', |
10 | | - 'filename': 'SqlBaseLexer.g4' |
| 8 | + |
| 9 | +class Antlr4Target(Enum): |
| 10 | + js = 'JavaScript' |
| 11 | + python = 'Python3' |
| 12 | + |
| 13 | + |
| 14 | +build_options = { |
| 15 | + 'antlr4_compiled_target_output': { |
| 16 | + Antlr4Target.js: 'cratedb_sqlparse_js', |
| 17 | + Antlr4Target.python: 'cratedb_sqlparse_py' |
11 | 18 | }, |
12 | | - 'parser': { |
13 | | - 'url': 'https://raw.githubusercontent.com/crate/crate/master/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseParser.g4', |
14 | | - 'filename': 'SqlBaseParser.g4' |
15 | | - } |
16 | 19 |
|
| 20 | + 'antlr4_compiled_target_subdir': 'parser/generated_parser', |
| 21 | + |
| 22 | + # List of '.g4' files that will be built |
| 23 | + 'files': [ |
| 24 | + { |
| 25 | + 'url': 'https://github.com/crate/crate/raw/{version}/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseLexer.g4', |
| 26 | + 'filename': 'SqlBaseLexer.g4' |
| 27 | + }, |
| 28 | + { |
| 29 | + 'url': 'https://github.com/crate/crate/raw/{version}/libs/sql-parser/src/main/antlr/io/crate/sql/parser/antlr/SqlBaseParser.g4', |
| 30 | + 'filename': 'SqlBaseParser.g4' |
| 31 | + } |
| 32 | + ] |
17 | 33 | } |
18 | 34 |
|
19 | 35 | PARSER_COMPILE_PATH = pathlib.Path(__file__).parent |
20 | 36 |
|
21 | 37 |
|
22 | | -class Target(Enum): |
23 | | - js = 'JavaScript' |
24 | | - python = 'Python3' |
25 | | - |
26 | | - |
27 | | -def download_cratedb_grammar(): |
| 38 | +def download_cratedb_grammar(version='master'): |
28 | 39 | """ |
29 | | - Downloads CrateDB's grammar files. |
| 40 | + Downloads CrateDB's `version` grammar files. |
| 41 | +
|
| 42 | + Version should match a tag; for a list of tags run: |
| 43 | + $ curl https://api.github.com/repos/crate/crate/tags | jq -r '.[] | .name' |
30 | 44 | """ |
31 | | - for file in GRAMMAR.values(): |
32 | | - response = requests.get(file['url']) |
33 | | - with open(str(pathlib.Path(__file__).parent / file['filename']), |
34 | | - 'w') as f: |
35 | | - f.write(response.text) |
| 45 | + for file in build_options['files']: |
| 46 | + response = requests.get(file['url'].format(version=version)) |
| 47 | + |
| 48 | + # We annotate the CrateDB branch and date of download to the Grammar files for reference. |
| 49 | + text = f'/* crate_branch={version}, at={datetime.datetime.now()}, annotatedby=cratedb_sqlparse */\n' + response.text |
36 | 50 |
|
| 51 | + with open(str(PARSER_COMPILE_PATH.parent / file['filename']), 'w') as f: |
| 52 | + f.write(text) |
37 | 53 |
|
38 | | -def compile_grammar(target: Target, path: str): |
| 54 | + |
| 55 | +def compile_grammar(target: Antlr4Target): |
39 | 56 | """ |
40 | | - Compiles antlr4 files into Python code. |
| 57 | + Compiles antlr4 files into `target` code. |
41 | 58 | """ |
42 | | - for file in GRAMMAR.values(): |
| 59 | + |
| 60 | + base_dir = build_options['antlr4_compiled_target_output'][target] |
| 61 | + sub_dir = build_options['antlr4_compiled_target_subdir'] |
| 62 | + |
| 63 | + for file in build_options['files']: |
43 | 64 | subprocess.run( |
44 | 65 | [ |
45 | 66 | 'antlr4', f'-Dlanguage={target.value}', |
46 | | - '-o', str(PARSER_COMPILE_PATH / path), |
| 67 | + '-o', |
| 68 | + str(PARSER_COMPILE_PATH / base_dir / sub_dir), |
47 | 69 | file['filename'] |
48 | 70 | ] |
49 | 71 | ) |
50 | 72 |
|
51 | 73 |
|
52 | | -def patch_lexer(): |
| 74 | +def patch_lexer(target: Antlr4Target): |
| 75 | + """ |
| 76 | + Patches the lexer file, removing bad syntax generated by Antlr4. |
| 77 | + """ |
| 78 | + |
53 | 79 | REMOVE_LINES = [ |
54 | 80 | 'import io.crate.sql.AbstractSqlBaseLexer;', |
55 | 81 | ] |
56 | | - sqlbaselexer_pyfile = PARSER_COMPILE_PATH / GRAMMAR['lexer']['filename'].replace('g4', 'py') |
57 | | - text = pathlib.Path(sqlbaselexer_pyfile).read_text() |
58 | 82 |
|
59 | | - # We remove lines that do not properly work. |
| 83 | + # If more targets are added, this needs to be improved. |
| 84 | + extension = 'py' if target == Antlr4Target.python else 'js' |
| 85 | + |
| 86 | + base_dir = build_options['antlr4_compiled_target_output'][target] |
| 87 | + sub_dir = build_options['antlr4_compiled_target_subdir'] |
| 88 | + file_name = build_options['files'][0]['filename'].replace('g4', extension) |
| 89 | + |
| 90 | + lexer_file = PARSER_COMPILE_PATH / base_dir / sub_dir / file_name |
| 91 | + |
| 92 | + text = pathlib.Path(lexer_file).read_text() |
| 93 | + |
60 | 94 | for text_to_remove in REMOVE_LINES: |
61 | | - text = text.replace(text_to_remove, |
62 | | - '# Code removed by cratedb_sqlparse.setup_grammar.patch_lexer') |
| 95 | + text = text.replace(text_to_remove, '') |
| 96 | + |
| 97 | + pathlib.Path(lexer_file).write_text(text) |
| 98 | + |
| 99 | + |
| 100 | +def set_version(target: Antlr4Target, version: str): |
| 101 | + """ |
| 102 | + Specifies the compiled version to the target package, |
| 103 | + depending on the package the strategy differs. |
| 104 | + """ |
| 105 | + base_dir = build_options['antlr4_compiled_target_output'][target] |
| 106 | + sub_dir = build_options['antlr4_compiled_target_subdir'] |
| 107 | + |
| 108 | + target_path = (PARSER_COMPILE_PATH / base_dir / sub_dir).parent |
| 109 | + |
| 110 | + version = f'"{version}"' # Escape quotes on echo command. |
| 111 | + |
| 112 | + index_file = '' |
| 113 | + variable = '' |
| 114 | + |
| 115 | + if target == Antlr4Target.python: |
| 116 | + index_file = '__init__.py' |
| 117 | + variable = '__cratedb_version__' |
63 | 118 |
|
64 | | - pathlib.Path(sqlbaselexer_pyfile).write_text(text) |
| 119 | + if target == Antlr4Target.js: |
| 120 | + index_file = 'index.js' |
| 121 | + variable = 'export const __cratedb_version__' |
65 | 122 |
|
| 123 | + with open(target_path / index_file, "a") as f: |
| 124 | + f.write(f"{variable} = {version}\n") |
66 | 125 |
|
67 | | -if __name__ == '__main__': |
68 | | - download_cratedb_grammar() |
69 | | - compile_grammar(Target.js, 'cratedb_sqlparse_js/parser/generated_parser') |
70 | | - patch_lexer() |
| 126 | +# if __name__ == '__main__': |
| 127 | +# download_cratedb_grammar('5.6.4') |
| 128 | +# compile_grammar(Antlr4Target.js) |
| 129 | +# patch_lexer(Antlr4Target.js) |
| 130 | +set_version(Antlr4Target.js, '5.45.4') |
0 commit comments