Skip to content

Commit 2510d7d

Browse files
authored
refactor: replace unreachable project links heuristic with source code repo heuristic (#983)
replaced unreachable project links heuristic with source code repo heuristic, which checks if the repository component has been found by macaron.
1 parent a6460d1 commit 2510d7d

File tree

7 files changed

+67
-153
lines changed

7 files changed

+67
-153
lines changed

docs/source/pages/developers_guide/apidoc/macaron.malware_analyzer.pypi_heuristics.metadata.rst

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,18 +49,18 @@ macaron.malware\_analyzer.pypi\_heuristics.metadata.one\_release module
4949
:undoc-members:
5050
:show-inheritance:
5151

52-
macaron.malware\_analyzer.pypi\_heuristics.metadata.unchanged\_release module
52+
macaron.malware\_analyzer.pypi\_heuristics.metadata.source\_code\_repo module
5353
-----------------------------------------------------------------------------
5454

55-
.. automodule:: macaron.malware_analyzer.pypi_heuristics.metadata.unchanged_release
55+
.. automodule:: macaron.malware_analyzer.pypi_heuristics.metadata.source_code_repo
5656
:members:
5757
:undoc-members:
5858
:show-inheritance:
5959

60-
macaron.malware\_analyzer.pypi\_heuristics.metadata.unreachable\_project\_links module
61-
--------------------------------------------------------------------------------------
60+
macaron.malware\_analyzer.pypi\_heuristics.metadata.unchanged\_release module
61+
-----------------------------------------------------------------------------
6262

63-
.. automodule:: macaron.malware_analyzer.pypi_heuristics.metadata.unreachable_project_links
63+
.. automodule:: macaron.malware_analyzer.pypi_heuristics.metadata.unchanged_release
6464
:members:
6565
:undoc-members:
6666
:show-inheritance:

src/macaron/malware_analyzer/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ When a heuristic fails, with `HeuristicResult.FAIL`, then that is an indicator b
1111
Repositories). Many malicious packages do not include any project links.
1212
- **Rule**: Return `HeuristicResult.FAIL` when there are no project links; otherwise, return `HeuristicResult.PASS`.
1313

14-
2. **Unreachable Project Links**
15-
- **Description**: Checks the accessibility of the project links. This is considered an auxiliary
16-
heuristic since no cases have met this heuristic.
17-
- **Rule**: Return `HeuristicResult.FAIL` if all project links are unreachable; otherwise, return `HeuristicResult.PASS`.
14+
2. **Source Code Repo**
15+
- **Description**: Check's if Macaron was able to find a repository containing this package's source code (i.e., checks if
16+
one exists).
17+
- **Rule**: Return `HeuristicResult.FAIL` if no repository was found; otherwise, return `HeuristicResult.PASS`.
1818
- **Dependency**: Will be run if the Empty Project Link heuristic passes.
1919

2020
3. **One Release**

src/macaron/malware_analyzer/pypi_heuristics/heuristics.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ class Heuristics(str, Enum):
1212
#: Indicates that the package does not contain any project links (such as documentation or Git repository pages).
1313
EMPTY_PROJECT_LINK = "empty_project_link"
1414

15-
#: Indicates that the package contains project links, but all of them are unreachable.
16-
UNREACHABLE_PROJECT_LINKS = "unreachable_project_links"
15+
#: Indicates that the source code repository for the package was not found.
16+
SOURCE_CODE_REPO = "source_code_repo"
1717

1818
#: Indicates that the package contains only one release.
1919
ONE_RELEASE = "one_release"
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
1-
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

4-
"""The heuristic analyzer to check the project links."""
4+
"""The heuristic analyzer to check if a source code repo was found."""
55

66
import logging
77

8-
import requests
9-
108
from macaron.json_tools import JsonType
119
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
1210
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
@@ -15,17 +13,17 @@
1513
logger: logging.Logger = logging.getLogger(__name__)
1614

1715

18-
class UnreachableProjectLinksAnalyzer(BaseHeuristicAnalyzer):
16+
class SourceCodeRepoAnalyzer(BaseHeuristicAnalyzer):
1917
"""
20-
Analyze the accessibility of the project links.
18+
Analyze the accessibility of the source code repository.
2119
22-
If >= 1 project links are reachable, the analyzer consider the package as benign.
20+
Passes if a repository was found and validated by the repo finder, otherwise fails.
2321
"""
2422

2523
def __init__(self) -> None:
2624
super().__init__(
27-
name="unreachable_project_links_analyzer",
28-
heuristic=Heuristics.UNREACHABLE_PROJECT_LINKS,
25+
name="source_code_repo_analyzer",
26+
heuristic=Heuristics.SOURCE_CODE_REPO,
2927
depends_on=[(Heuristics.EMPTY_PROJECT_LINK, HeuristicResult.PASS)],
3028
)
3129

@@ -42,18 +40,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
4240
tuple[HeuristicResult, dict[str, JsonType]]:
4341
The result and related information collected during the analysis.
4442
"""
45-
project_links: dict | None = pypi_package_json.get_project_links()
46-
47-
if project_links is None:
48-
return HeuristicResult.SKIP, {}
49-
50-
for link in project_links.values():
51-
try:
52-
response = requests.head(link, timeout=3)
53-
if response.status_code < 400:
54-
return HeuristicResult.PASS, {}
55-
except requests.exceptions.RequestException as error:
56-
logger.debug(error)
57-
continue
58-
59-
return HeuristicResult.FAIL, {}
43+
# If a sourcecode repo exists, then this will have already been validated
44+
if not pypi_package_json.component.repository:
45+
return HeuristicResult.FAIL, {}
46+
return HeuristicResult.PASS, {}

src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
from macaron.malware_analyzer.pypi_heuristics.metadata.empty_project_link import EmptyProjectLinkAnalyzer
2121
from macaron.malware_analyzer.pypi_heuristics.metadata.high_release_frequency import HighReleaseFrequencyAnalyzer
2222
from macaron.malware_analyzer.pypi_heuristics.metadata.one_release import OneReleaseAnalyzer
23+
from macaron.malware_analyzer.pypi_heuristics.metadata.source_code_repo import SourceCodeRepoAnalyzer
2324
from macaron.malware_analyzer.pypi_heuristics.metadata.unchanged_release import UnchangedReleaseAnalyzer
24-
from macaron.malware_analyzer.pypi_heuristics.metadata.unreachable_project_links import UnreachableProjectLinksAnalyzer
2525
from macaron.malware_analyzer.pypi_heuristics.metadata.wheel_absence import WheelAbsenceAnalyzer
2626
from macaron.malware_analyzer.pypi_heuristics.pypi_sourcecode_analyzer import PyPISourcecodeAnalyzer
2727
from macaron.malware_analyzer.pypi_heuristics.sourcecode.suspicious_setup import SuspiciousSetupAnalyzer
@@ -69,7 +69,7 @@ class MaliciousMetadataFacts(CheckFacts):
6969
# When implementing new analyzer, appending the classes to this list
7070
ANALYZERS: list = [
7171
EmptyProjectLinkAnalyzer,
72-
UnreachableProjectLinksAnalyzer,
72+
SourceCodeRepoAnalyzer,
7373
OneReleaseAnalyzer,
7474
HighReleaseFrequencyAnalyzer,
7575
UnchangedReleaseAnalyzer,
@@ -97,7 +97,7 @@ class MaliciousMetadataFacts(CheckFacts):
9797
] = {
9898
(
9999
HeuristicResult.FAIL, # Empty Project
100-
HeuristicResult.SKIP, # Unreachable Project Links
100+
HeuristicResult.SKIP, # Source Code Repo
101101
HeuristicResult.FAIL, # One Release
102102
HeuristicResult.SKIP, # High Release Frequency
103103
HeuristicResult.SKIP, # Unchanged Release
@@ -112,7 +112,7 @@ class MaliciousMetadataFacts(CheckFacts):
112112
): Confidence.HIGH,
113113
(
114114
HeuristicResult.FAIL, # Empty Project
115-
HeuristicResult.SKIP, # Unreachable Project Links
115+
HeuristicResult.SKIP, # Source Code Repo
116116
HeuristicResult.FAIL, # One Release
117117
HeuristicResult.SKIP, # High Release Frequency
118118
HeuristicResult.SKIP, # Unchanged Release
@@ -127,7 +127,7 @@ class MaliciousMetadataFacts(CheckFacts):
127127
): Confidence.HIGH,
128128
(
129129
HeuristicResult.FAIL, # Empty Project
130-
HeuristicResult.SKIP, # Unreachable Project Links
130+
HeuristicResult.SKIP, # Source Code Repo
131131
HeuristicResult.PASS, # One Release
132132
HeuristicResult.FAIL, # High Release Frequency
133133
HeuristicResult.FAIL, # Unchanged Release
@@ -141,7 +141,7 @@ class MaliciousMetadataFacts(CheckFacts):
141141
): Confidence.HIGH,
142142
(
143143
HeuristicResult.FAIL, # Empty Project
144-
HeuristicResult.SKIP, # Unreachable Project Links
144+
HeuristicResult.SKIP, # Source Code Repo
145145
HeuristicResult.PASS, # One Release
146146
HeuristicResult.FAIL, # High Release Frequency
147147
HeuristicResult.PASS, # Unchanged Release
@@ -155,7 +155,7 @@ class MaliciousMetadataFacts(CheckFacts):
155155
): Confidence.HIGH,
156156
(
157157
HeuristicResult.FAIL, # Empty Project
158-
HeuristicResult.SKIP, # Unreachable Project Links
158+
HeuristicResult.SKIP, # Source Code Repo
159159
HeuristicResult.PASS, # One Release
160160
HeuristicResult.FAIL, # High Release Frequency
161161
HeuristicResult.FAIL, # Unchanged Release
@@ -169,7 +169,7 @@ class MaliciousMetadataFacts(CheckFacts):
169169
): Confidence.MEDIUM,
170170
(
171171
HeuristicResult.FAIL, # Empty Project
172-
HeuristicResult.SKIP, # Unreachable Project Links
172+
HeuristicResult.SKIP, # Source Code Repo
173173
HeuristicResult.PASS, # One Release
174174
HeuristicResult.FAIL, # High Release Frequency
175175
HeuristicResult.FAIL, # Unchanged Release
@@ -183,21 +183,21 @@ class MaliciousMetadataFacts(CheckFacts):
183183
): Confidence.MEDIUM,
184184
(
185185
HeuristicResult.PASS, # Empty Project
186-
HeuristicResult.FAIL, # Unreachable Project Links
186+
HeuristicResult.FAIL, # Source Code Repo
187187
HeuristicResult.PASS, # One Release
188188
HeuristicResult.FAIL, # High Release Frequency
189189
HeuristicResult.PASS, # Unchanged Release
190190
HeuristicResult.FAIL, # Closer Release Join Date
191191
HeuristicResult.FAIL, # Suspicious Setup
192192
HeuristicResult.FAIL, # Wheel Absence
193193
HeuristicResult.SKIP, # Anomalous Version
194-
# All project links are unreachable, frequent releases of multiple versions,
194+
# No source code repo, frequent releases of multiple versions,
195195
# and the maintainer released it shortly after account registration.
196196
# The setup.py file contains suspicious imports and .whl file isn't present.
197197
): Confidence.HIGH,
198198
(
199199
HeuristicResult.FAIL, # Empty Project
200-
HeuristicResult.SKIP, # Unreachable Project Links
200+
HeuristicResult.SKIP, # Source Code Repo
201201
HeuristicResult.FAIL, # One Release
202202
HeuristicResult.SKIP, # High Release Frequency
203203
HeuristicResult.SKIP, # Unchanged Release
@@ -212,7 +212,7 @@ class MaliciousMetadataFacts(CheckFacts):
212212
): Confidence.MEDIUM,
213213
(
214214
HeuristicResult.FAIL, # Empty Project
215-
HeuristicResult.SKIP, # Unreachable Project Links
215+
HeuristicResult.SKIP, # Source Code Repo
216216
HeuristicResult.FAIL, # One Release
217217
HeuristicResult.SKIP, # High Release Frequency
218218
HeuristicResult.SKIP, # Unchanged Release
@@ -227,7 +227,7 @@ class MaliciousMetadataFacts(CheckFacts):
227227
): Confidence.MEDIUM,
228228
(
229229
HeuristicResult.FAIL, # Empty Project
230-
HeuristicResult.SKIP, # Unreachable Project Links
230+
HeuristicResult.SKIP, # Source Code Repo
231231
HeuristicResult.FAIL, # One Release
232232
HeuristicResult.SKIP, # High Release Frequency
233233
HeuristicResult.SKIP, # Unchanged Release
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
"""Tests for heuristic detecting malicious metadata from PyPI"""
5+
6+
from unittest.mock import MagicMock
7+
8+
import pytest
9+
10+
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult
11+
from macaron.malware_analyzer.pypi_heuristics.metadata.source_code_repo import SourceCodeRepoAnalyzer
12+
13+
14+
@pytest.mark.parametrize(
15+
("repository", "expected_result"),
16+
[
17+
pytest.param(None, HeuristicResult.FAIL, id="test_no_repo"),
18+
pytest.param(
19+
MagicMock(),
20+
HeuristicResult.PASS,
21+
id="test_valid_repo",
22+
),
23+
],
24+
)
25+
def test_repo_existence(
26+
pypi_package_json: MagicMock, repository: MagicMock | None, expected_result: HeuristicResult
27+
) -> None:
28+
"""Test if the source code repo exists."""
29+
pypi_package_json.component.repository = repository
30+
analyzer = SourceCodeRepoAnalyzer()
31+
result, _ = analyzer.analyze(pypi_package_json)
32+
assert result == expected_result

tests/malware_analyzer/pypi/test_unreachable_project_links_analyzer.py

Lines changed: 0 additions & 105 deletions
This file was deleted.

0 commit comments

Comments
 (0)