26
26
)
27
27
build_dir = os .path .join (gitroot , "mad-generation-build" )
28
28
29
-
30
- def path_to_mad_directory (language : str , name : str ) -> str :
31
- return os .path .join (gitroot , f"{ language } /ql/lib/ext/generated/{ name } " )
32
-
33
-
34
29
# A project to generate models for
35
30
class Project (TypedDict ):
36
31
"""
37
- Type definition for Rust projects to model.
32
+ Type definition for projects (acquired via a GitHub repo) to model.
38
33
39
34
Attributes:
40
35
name: The name of the project
@@ -139,13 +134,15 @@ def clone_projects(projects: List[Project]) -> List[tuple[Project, str]]:
139
134
return project_dirs
140
135
141
136
142
- def build_database (project : Project , project_dir : str ) -> str | None :
137
+ def build_database (language : str , extractor_options , project : Project , project_dir : str ) -> str | None :
143
138
"""
144
139
Build a CodeQL database for a project.
145
140
146
141
Args:
142
+ language: The language for which to build the database (e.g., "rust").
143
+ extractor_options: Additional options for the extractor.
147
144
project: A dictionary containing project information with 'name' and 'git_repo' keys.
148
- project_dir: The directory containing the project source code .
145
+ project_dir: Path to the CodeQL database .
149
146
150
147
Returns:
151
148
The path to the created database directory.
@@ -158,17 +155,17 @@ def build_database(project: Project, project_dir: str) -> str | None:
158
155
# Only build the database if it doesn't already exist
159
156
if not os .path .exists (database_dir ):
160
157
print (f"Building CodeQL database for { name } ..." )
158
+ extractor_options = [option for x in extractor_options for option in ("-O" , x )]
161
159
try :
162
160
subprocess .check_call (
163
161
[
164
162
"codeql" ,
165
163
"database" ,
166
164
"create" ,
167
- "--language=rust " ,
165
+ f "--language={ language } " ,
168
166
"--source-root=" + project_dir ,
169
167
"--overwrite" ,
170
- "-O" ,
171
- "cargo_features='*'" ,
168
+ * extractor_options ,
172
169
"--" ,
173
170
database_dir ,
174
171
]
@@ -184,40 +181,72 @@ def build_database(project: Project, project_dir: str) -> str | None:
184
181
185
182
return database_dir
186
183
187
-
188
- def generate_models (project : Project , database_dir : str ) -> None :
184
+ def generate_models (args , name : str , database_dir : str ) -> None :
189
185
"""
190
186
Generate models for a project.
191
187
192
188
Args:
193
- project: A dictionary containing project information with 'name' and 'git_repo' keys.
194
- project_dir: The directory containing the project source code.
189
+ args: Command line arguments passed to this script.
190
+ name: The name of the project.
191
+ database_dir: Path to the CodeQL database.
195
192
"""
196
- name = project ["name" ]
197
193
198
- generator = mad .Generator ("rust" )
199
- generator .generateSinks = True
200
- generator .generateSources = True
201
- generator .generateSummaries = True
194
+ generator = mad .Generator (args . lang )
195
+ generator .generateSinks = args . with_sinks
196
+ generator .generateSources = args . with_sources
197
+ generator .generateSummaries = args . with_summaries
202
198
generator .setenvironment (database = database_dir , folder = name )
203
199
generator .run ()
204
200
201
+ def build_databases_from_projects (language : str , extractor_options , projects : List [Project ]) -> List [tuple [str , str | None ]]:
202
+ """
203
+ Build databases for all projects in parallel.
204
+
205
+ Args:
206
+ language: The language for which to build the databases (e.g., "rust").
207
+ extractor_options: Additional options for the extractor.
208
+ projects: List of projects to build databases for.
209
+
210
+ Returns:
211
+ List of (project_name, database_dir) pairs, where database_dir is None if the build failed.
212
+ """
213
+ # Phase 1: Clone projects in parallel
214
+ print ("=== Phase 1: Cloning projects ===" )
215
+ project_dirs = clone_projects (projects )
216
+
217
+ # Phase 2: Build databases for all projects
218
+ print ("\n === Phase 2: Building databases ===" )
219
+ database_results = [
220
+ (project ["name" ], build_database (language , extractor_options , project , project_dir ))
221
+ for project , project_dir in project_dirs
222
+ ]
223
+ return database_results
224
+
225
+ def get_destination_for_project (config , name : str ) -> str :
226
+ return os .path .join (config ["destination" ], name )
227
+
228
+ def get_strategy (config ) -> str :
229
+ return config ["strategy" ].lower ()
205
230
206
- def main () -> None :
231
+ def main (config , args ) -> None :
207
232
"""
208
- Process all projects in three distinct phases:
209
- 1. Clone projects (in parallel)
210
- 2. Build databases for projects
211
- 3. Generate models for successful database builds
233
+ Main function to handle the bulk generation of MaD models.
234
+ Args:
235
+ config: Configuration dictionary containing project details and other settings.
236
+ args: Command line arguments passed to this script.
212
237
"""
213
238
239
+ projects = config ["targets" ]
240
+ destination = config ["destination" ]
241
+ language = args .lang
242
+
214
243
# Create build directory if it doesn't exist
215
244
if not os .path .exists (build_dir ):
216
245
os .makedirs (build_dir )
217
246
218
247
# Check if any of the MaD directories contain working directory changes in git
219
248
for project in projects :
220
- mad_dir = path_to_mad_directory ( "rust" , project ["name" ])
249
+ mad_dir = get_destination_for_project ( config , project ["name" ])
221
250
if os .path .exists (mad_dir ):
222
251
git_status_output = subprocess .check_output (
223
252
["git" , "status" , "-s" , mad_dir ], text = True
@@ -232,22 +261,17 @@ def main() -> None:
232
261
)
233
262
sys .exit (1 )
234
263
235
- # Phase 1: Clone projects in parallel
236
- print ("=== Phase 1: Cloning projects ===" )
237
- project_dirs = clone_projects (projects )
238
-
239
- # Phase 2: Build databases for all projects
240
- print ("\n === Phase 2: Building databases ===" )
241
- database_results = [
242
- (project , build_database (project , project_dir ))
243
- for project , project_dir in project_dirs
244
- ]
264
+ database_results = []
265
+ match get_strategy (config ):
266
+ case "repo" :
267
+ extractor_options = config .get ("extractor_options" , [])
268
+ database_results = build_databases_from_projects (language , extractor_options , projects )
245
269
246
270
# Phase 3: Generate models for all projects
247
271
print ("\n === Phase 3: Generating models ===" )
248
272
249
273
failed_builds = [
250
- project [ "name" ] for project , db_dir in database_results if db_dir is None
274
+ project for project , db_dir in database_results if db_dir is None
251
275
]
252
276
if failed_builds :
253
277
print (
@@ -257,15 +281,14 @@ def main() -> None:
257
281
258
282
# Delete the MaD directory for each project
259
283
for project , database_dir in database_results :
260
- mad_dir = path_to_mad_directory ( "rust" , project [ "name" ] )
284
+ mad_dir = get_destination_for_project ( config , project )
261
285
if os .path .exists (mad_dir ):
262
286
print (f"Deleting existing MaD directory at { mad_dir } " )
263
287
subprocess .check_call (["rm" , "-rf" , mad_dir ])
264
288
265
289
for project , database_dir in database_results :
266
290
if database_dir is not None :
267
- generate_models (project , database_dir )
268
-
291
+ generate_models (args , project , database_dir )
269
292
270
293
if __name__ == "__main__" :
271
294
parser = argparse .ArgumentParser ()
0 commit comments