@@ -185,35 +185,40 @@ def _extract_rule_ids(self, path: str, target_files: set[str]) -> set[str]:
185
185
If any Semgrep rule file could not be safely loaded, or if their format was not in the expected Semgrep
186
186
format, or if there were any files in 'target_files' not found when searching in 'path'.
187
187
"""
188
- path_tree = glob .glob (os .path .join (path , "**" , "*" ), recursive = True )
189
- all_file_names = {os .path .basename (file ) for file in path_tree if os .path .isfile (file )}
190
- if not target_files .issubset (all_file_names ):
191
- error_msg = f"The following semgrep files were not found in { path } : { target_files - all_file_names } "
188
+ # We keep a record of any file paths we coulnd't find to provide a more useful error message, rather than raising
189
+ # an error on the first missing file we see.
190
+ missing_files : list [str ] = []
191
+ target_file_paths : list [str ] = []
192
+ rule_ids : set [str ] = set ()
193
+
194
+ for target_file in target_files :
195
+ file_paths = glob .glob (os .path .join (path , "**" , target_file ), recursive = True )
196
+ if not file_paths :
197
+ missing_files .append (target_file )
198
+ target_file_paths .extend (file_paths )
199
+
200
+ if missing_files :
201
+ error_msg = f"The following semgrep files were not found in { path } : { missing_files } "
192
202
logger .debug (error_msg )
193
203
raise ConfigurationError (error_msg )
194
204
195
- rule_ids = set ()
196
- for root , _ , files in os .walk (path ):
197
- files_found = set .intersection (target_files , set (files ))
198
- for filename in files_found :
199
- semgrep_ruleset_file = os .path .join (root , filename )
200
-
201
- try :
202
- with open (semgrep_ruleset_file , encoding = "utf-8" ) as file :
203
- semgrep_ruleset : dict [str , list ] = yaml .safe_load (file .read ())
204
- except yaml .YAMLError as yaml_error :
205
- error_msg = f"Unable to open semgrep rule file { semgrep_ruleset_file } : { yaml_error } ."
206
- logger .debug (error_msg )
207
- raise ConfigurationError (error_msg ) from yaml_error
208
-
209
- # should be a top-level key "rules", and then a list of rules (dictionaries) with "id" entries
210
- try :
211
- for semgrep_rule in semgrep_ruleset ["rules" ]:
212
- rule_ids .add (semgrep_rule ["id" ])
213
- except (KeyError , TypeError ) as format_error :
214
- error_msg = f"Invalid semgrep rule format for { semgrep_ruleset_file } : { format_error } ."
215
- logger .debug (error_msg )
216
- raise ConfigurationError (error_msg ) from format_error
205
+ for file_path in target_file_paths :
206
+ try :
207
+ with open (file_path , encoding = "utf-8" ) as file :
208
+ semgrep_ruleset : dict [str , list ] = yaml .safe_load (file .read ())
209
+ except yaml .YAMLError as yaml_error :
210
+ error_msg = f"Unable to open semgrep rule file { file_path } : { yaml_error } ."
211
+ logger .debug (error_msg )
212
+ raise ConfigurationError (error_msg ) from yaml_error
213
+
214
+ # should be a top-level key "rules", and then a list of rules (dictionaries) with "id" entries
215
+ try :
216
+ for semgrep_rule in semgrep_ruleset ["rules" ]:
217
+ rule_ids .add (semgrep_rule ["id" ])
218
+ except (KeyError , TypeError ) as format_error :
219
+ error_msg = f"Invalid semgrep rule format for { file_path } : { format_error } ."
220
+ logger .debug (error_msg )
221
+ raise ConfigurationError (error_msg ) from format_error
217
222
218
223
return rule_ids
219
224
@@ -306,7 +311,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
306
311
# e.g. rule_id = src.macaron.resources.pypi_malware_rules.obfuscation_decode-and-execute, which comes from
307
312
# the rule ID 'obfuscation_decode-and-execute' inside 'obfuscation.yaml'.
308
313
if rule_id .split ("." )[- 1 ] in self .disabled_rule_ids :
309
- if rule_id not in self . disabled_rule_ids :
314
+ if rule_id not in disabled_results :
310
315
disabled_results [rule_id ] = {"message" : message , "detections" : []}
311
316
disabled_results [rule_id ]["detections" ].append ({"file" : file , "start" : start , "end" : end })
312
317
0 commit comments