@@ -238,10 +238,11 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
238
238
if there is no source code available.
239
239
"""
240
240
analysis_result : dict = {}
241
- disabled_results : dict = (
242
- {}
243
- ) # since we have to run them anyway, return disabled rule findings for debug information
244
- # only run semgrep open-source features, and disable 'nosemgrep' ignoring so this does not bypass our scan
241
+ # since we have to run them anyway, return disabled rule findings for debug information
242
+ disabled_results : dict = {}
243
+ # Here, we disable 'nosemgrep' ignoring so that this is not an evasion method of our scan (i.e. malware includes
244
+ # 'nosemgrep' comments to prevent our scan detecting those code lines). Read more about the 'nosemgrep' feature
245
+ # here: https://semgrep.dev/docs/ignoring-files-folders-code
245
246
semgrep_commands : list [str ] = ["semgrep" , "scan" , "--oss-only" , "--disable-nosem" ]
246
247
result : HeuristicResult = HeuristicResult .PASS
247
248
@@ -302,6 +303,8 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
302
303
# only work if `--experimental` is also supplied to enable experimental features, which we do not use.
303
304
# Semgrep provides a relative path separated by '.' to the rule ID, where the rule ID is always the
304
305
# final element in that path, so we use that to match our rule IDs.
306
+ # e.g. rule_id = src.macaron.resources.pypi_malware_rules.obfuscation_decode-and-execute, which comes from
307
+ # the rule ID 'obfuscation_decode-and-execute' inside 'obfuscation.yaml'.
305
308
if rule_id .split ("." )[- 1 ] in self .disabled_rule_ids :
306
309
if rule_id not in self .disabled_rule_ids :
307
310
disabled_results [rule_id ] = {"message" : message , "detections" : []}
0 commit comments