Skip to content

Commit e721fc0

Browse files
committed
Bulk generator: Prepare for adding DCA support. This commits just generalizes the existing functionality to be independent of Rust and instead depend on the configuration file and the command-line arguments.
1 parent 6ff2beb commit e721fc0

File tree

2 files changed

+68
-40
lines changed

2 files changed

+68
-40
lines changed

misc/scripts/models-as-data/rust_bulk_generate_mad.py

Lines changed: 63 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,10 @@
2626
)
2727
build_dir = os.path.join(gitroot, "mad-generation-build")
2828

29-
30-
def path_to_mad_directory(language: str, name: str) -> str:
31-
return os.path.join(gitroot, f"{language}/ql/lib/ext/generated/{name}")
32-
33-
3429
# A project to generate models for
3530
class Project(TypedDict):
3631
"""
37-
Type definition for Rust projects to model.
32+
Type definition for projects (acquired via a GitHub repo) to model.
3833
3934
Attributes:
4035
name: The name of the project
@@ -139,13 +134,15 @@ def clone_projects(projects: List[Project]) -> List[tuple[Project, str]]:
139134
return project_dirs
140135

141136

142-
def build_database(project: Project, project_dir: str) -> str | None:
137+
def build_database(language: str, extractor_options, project: Project, project_dir: str) -> str | None:
143138
"""
144139
Build a CodeQL database for a project.
145140
146141
Args:
142+
language: The language for which to build the database (e.g., "rust").
143+
extractor_options: Additional options for the extractor.
147144
project: A dictionary containing project information with 'name' and 'git_repo' keys.
148-
project_dir: The directory containing the project source code.
145+
project_dir: Path to the CodeQL database.
149146
150147
Returns:
151148
The path to the created database directory.
@@ -158,17 +155,17 @@ def build_database(project: Project, project_dir: str) -> str | None:
158155
# Only build the database if it doesn't already exist
159156
if not os.path.exists(database_dir):
160157
print(f"Building CodeQL database for {name}...")
158+
extractor_options = [option for x in extractor_options for option in ("-O", x)]
161159
try:
162160
subprocess.check_call(
163161
[
164162
"codeql",
165163
"database",
166164
"create",
167-
"--language=rust",
165+
f"--language={language}",
168166
"--source-root=" + project_dir,
169167
"--overwrite",
170-
"-O",
171-
"cargo_features='*'",
168+
*extractor_options,
172169
"--",
173170
database_dir,
174171
]
@@ -184,40 +181,72 @@ def build_database(project: Project, project_dir: str) -> str | None:
184181

185182
return database_dir
186183

187-
188-
def generate_models(project: Project, database_dir: str) -> None:
184+
def generate_models(args, name: str, database_dir: str) -> None:
189185
"""
190186
Generate models for a project.
191187
192188
Args:
193-
project: A dictionary containing project information with 'name' and 'git_repo' keys.
194-
project_dir: The directory containing the project source code.
189+
args: Command line arguments passed to this script.
190+
name: The name of the project.
191+
database_dir: Path to the CodeQL database.
195192
"""
196-
name = project["name"]
197193

198-
generator = mad.Generator("rust")
199-
generator.generateSinks = True
200-
generator.generateSources = True
201-
generator.generateSummaries = True
194+
generator = mad.Generator(args.lang)
195+
generator.generateSinks = args.with_sinks
196+
generator.generateSources = args.with_sources
197+
generator.generateSummaries = args.with_summaries
202198
generator.setenvironment(database=database_dir, folder=name)
203199
generator.run()
204200

201+
def build_databases_from_projects(language: str, extractor_options, projects: List[Project]) -> List[tuple[str, str | None]]:
202+
"""
203+
Build databases for all projects in parallel.
204+
205+
Args:
206+
language: The language for which to build the databases (e.g., "rust").
207+
extractor_options: Additional options for the extractor.
208+
projects: List of projects to build databases for.
209+
210+
Returns:
211+
List of (project_name, database_dir) pairs, where database_dir is None if the build failed.
212+
"""
213+
# Phase 1: Clone projects in parallel
214+
print("=== Phase 1: Cloning projects ===")
215+
project_dirs = clone_projects(projects)
216+
217+
# Phase 2: Build databases for all projects
218+
print("\n=== Phase 2: Building databases ===")
219+
database_results = [
220+
(project["name"], build_database(language, extractor_options, project, project_dir))
221+
for project, project_dir in project_dirs
222+
]
223+
return database_results
224+
225+
def get_destination_for_project(config, name: str) -> str:
226+
return os.path.join(config["destination"], name)
227+
228+
def get_strategy(config) -> str:
229+
return config["strategy"].lower()
205230

206-
def main() -> None:
231+
def main(config, args) -> None:
207232
"""
208-
Process all projects in three distinct phases:
209-
1. Clone projects (in parallel)
210-
2. Build databases for projects
211-
3. Generate models for successful database builds
233+
Main function to handle the bulk generation of MaD models.
234+
Args:
235+
config: Configuration dictionary containing project details and other settings.
236+
args: Command line arguments passed to this script.
212237
"""
213238

239+
projects = config["targets"]
240+
destination = config["destination"]
241+
language = args.lang
242+
214243
# Create build directory if it doesn't exist
215244
if not os.path.exists(build_dir):
216245
os.makedirs(build_dir)
217246

218247
# Check if any of the MaD directories contain working directory changes in git
219248
for project in projects:
220-
mad_dir = path_to_mad_directory("rust", project["name"])
249+
mad_dir = get_destination_for_project(config, project["name"])
221250
if os.path.exists(mad_dir):
222251
git_status_output = subprocess.check_output(
223252
["git", "status", "-s", mad_dir], text=True
@@ -232,22 +261,17 @@ def main() -> None:
232261
)
233262
sys.exit(1)
234263

235-
# Phase 1: Clone projects in parallel
236-
print("=== Phase 1: Cloning projects ===")
237-
project_dirs = clone_projects(projects)
238-
239-
# Phase 2: Build databases for all projects
240-
print("\n=== Phase 2: Building databases ===")
241-
database_results = [
242-
(project, build_database(project, project_dir))
243-
for project, project_dir in project_dirs
244-
]
264+
database_results = []
265+
match get_strategy(config):
266+
case "repo":
267+
extractor_options = config.get("extractor_options", [])
268+
database_results = build_databases_from_projects(language, extractor_options, projects)
245269

246270
# Phase 3: Generate models for all projects
247271
print("\n=== Phase 3: Generating models ===")
248272

249273
failed_builds = [
250-
project["name"] for project, db_dir in database_results if db_dir is None
274+
project for project, db_dir in database_results if db_dir is None
251275
]
252276
if failed_builds:
253277
print(
@@ -257,15 +281,14 @@ def main() -> None:
257281

258282
# Delete the MaD directory for each project
259283
for project, database_dir in database_results:
260-
mad_dir = path_to_mad_directory("rust", project["name"])
284+
mad_dir = get_destination_for_project(config, project)
261285
if os.path.exists(mad_dir):
262286
print(f"Deleting existing MaD directory at {mad_dir}")
263287
subprocess.check_call(["rm", "-rf", mad_dir])
264288

265289
for project, database_dir in database_results:
266290
if database_dir is not None:
267-
generate_models(project, database_dir)
268-
291+
generate_models(args, project, database_dir)
269292

270293
if __name__ == "__main__":
271294
parser = argparse.ArgumentParser()

rust/misc/bulk_generation_targets.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
{
2+
"strategy": "repo",
23
"targets": [
34
{
45
"name": "libc",
@@ -65,5 +66,9 @@
6566
"git_repo": "https://github.com/clap-rs/clap",
6667
"git_tag": "v4.5.38"
6768
}
69+
],
70+
"destination": "rust/ql/lib/ext/generated",
71+
"extractor_options": [
72+
"cargo_features='*'"
6873
]
6974
}

0 commit comments

Comments
 (0)