From 52ff60e06f8e3a5b3db23942247a25790c98d165 Mon Sep 17 00:00:00 2001 From: Facundo Tuesca Date: Wed, 14 Aug 2024 17:46:47 +0200 Subject: [PATCH 1/6] Verify URLs that link to the project page on PyPI --- tests/unit/forklift/test_legacy.py | 119 ++++++++++++++++++++++++++++- warehouse/forklift/legacy.py | 36 ++++++++- 2 files changed, 151 insertions(+), 4 deletions(-) diff --git a/tests/unit/forklift/test_legacy.py b/tests/unit/forklift/test_legacy.py index 6bd87049aa22..8ebe6b4fe17c 100644 --- a/tests/unit/forklift/test_legacy.py +++ b/tests/unit/forklift/test_legacy.py @@ -4871,5 +4871,120 @@ def test_missing_trailing_slash_redirect(pyramid_request): ), ], ) -def test_verify_url(url, publisher_url, expected): - assert legacy._verify_url(url, publisher_url) == expected +def test_verify_url_with_trusted_publisher(url, publisher_url, expected): + assert legacy._verify_url_with_trusted_publisher(url, publisher_url) == expected + + +@pytest.mark.parametrize( + ("url", "project_name", "project_normalized_name", "expected"), + [ + ( # PyPI /project/ case + "https://pypi.org/project/myproject", + "myproject", + "myproject", + True, + ), + ( # PyPI /p/ case + "https://pypi.org/p/myproject", + "myproject", + "myproject", + True, + ), + ( # pypi.python.org /project/ case + "https://pypi.python.org/project/myproject", + "myproject", + "myproject", + True, + ), + ( # pypi.python.org /p/ case + "https://pypi.python.org/p/myproject", + "myproject", + "myproject", + True, + ), + ( # python.org/pypi/ case + "https://python.org/pypi/myproject", + "myproject", + "myproject", + True, + ), + ( # PyPI /project/ case + "https://pypi.org/project/myproject", + "myproject", + "myproject", + True, + ), + ( # Normalized name differs from URL + "https://pypi.org/project/my_project", + "my_project", + "my-project", + True, + ), + ( # Normalized name same as URL + "https://pypi.org/project/my-project", + "my_project", + "my-project", + True, + ), + ( # Trailing slash + "https://pypi.org/project/myproject/", + "myproject", + "myproject", + True, + ), + ( # Domains are case insensitive + "https://PyPI.org/project/myproject", + "myproject", + "myproject", + True, + ), + ( # Paths are case-sensitive + "https://pypi.org/Project/myproject", + "myproject", + "myproject", + False, + ), + ( # Wrong domain + "https://example.com/project/myproject", + "myproject", + "myproject", + False, + ), + ( # Wrong path + "https://pypi.org/something/myproject", + "myproject", + "myproject", + False, + ), + ( # Path has extra components + "https://pypi.org/something/myproject/something", + "myproject", + "myproject", + False, + ), + ( # Wrong package name + "https://pypi.org/project/otherproject", + "myproject", + "myproject", + False, + ), + ( # Similar package name + "https://pypi.org/project/myproject", + "myproject2", + "myproject2", + False, + ), + ( # Similar package name + "https://pypi.org/project/myproject2", + "myproject", + "myproject", + False, + ), + ], +) +def test_verify_url_with_trusted_publisher( + url, project_name, project_normalized_name, expected +): + assert ( + legacy._verify_url_pypi(url, project_name, project_normalized_name) == expected + ) diff --git a/warehouse/forklift/legacy.py b/warehouse/forklift/legacy.py index 0ee97af807c1..98515d4c9587 100644 --- a/warehouse/forklift/legacy.py +++ b/warehouse/forklift/legacy.py @@ -457,7 +457,31 @@ def _process_attestations(request, distribution: Distribution): metrics.increment("warehouse.upload.attestations.ok") -def _verify_url(url: str, publisher_url: str | None) -> bool: +_pypi_project_urls = [ + "https://pypi.org/project/", + "https://pypi.org/p/", + "https://pypi.python.org/project/", + "https://pypi.python.org/p/", + "https://python.org/pypi/", +] + + +def _verify_url_pypi(url: str, project_name: str, project_normalized_name: str) -> bool: + candidate_urls = ( + f"{pypi_project_url}{name}{optional_slash}" + for pypi_project_url in _pypi_project_urls + for name in {project_name, project_normalized_name} + for optional_slash in ["/", ""] + ) + + user_uri = rfc3986.api.uri_reference(url).normalize() + return any( + user_uri == rfc3986.api.uri_reference(candidate_url).normalize() + for candidate_url in candidate_urls + ) + + +def _verify_url_with_trusted_publisher(url: str, publisher_url: str | None) -> bool: """ Verify a given URL against a Trusted Publisher URL @@ -866,7 +890,15 @@ def file_upload(request): else { name: { "url": url, - "verified": _verify_url(url=url, publisher_url=publisher_base_url), + "verified": _verify_url_with_trusted_publisher( + url=url, + publisher_url=publisher_base_url, + ) + or _verify_url_pypi( + url=url, + project_name=project.name, + project_normalized_name=project.normalized_name, + ), } for name, url in meta.project_urls.items() } From 2977abc1cd7868102ce4459f59ab89893d146762 Mon Sep 17 00:00:00 2001 From: Facundo Tuesca Date: Wed, 14 Aug 2024 18:00:34 +0200 Subject: [PATCH 2/6] fix test name --- tests/unit/forklift/test_legacy.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/unit/forklift/test_legacy.py b/tests/unit/forklift/test_legacy.py index 8ebe6b4fe17c..80a2c41621fe 100644 --- a/tests/unit/forklift/test_legacy.py +++ b/tests/unit/forklift/test_legacy.py @@ -4982,9 +4982,7 @@ def test_verify_url_with_trusted_publisher(url, publisher_url, expected): ), ], ) -def test_verify_url_with_trusted_publisher( - url, project_name, project_normalized_name, expected -): +def test_verify_url_pypi(url, project_name, project_normalized_name, expected): assert ( legacy._verify_url_pypi(url, project_name, project_normalized_name) == expected ) From 386e93c1074d0e663e2420150aeb8b5a7404e5ba Mon Sep 17 00:00:00 2001 From: Facundo Tuesca Date: Wed, 14 Aug 2024 19:03:35 +0200 Subject: [PATCH 3/6] use single helper function to verify urls --- tests/unit/forklift/test_legacy.py | 5 ----- warehouse/forklift/legacy.py | 27 +++++++++++++++++++-------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/tests/unit/forklift/test_legacy.py b/tests/unit/forklift/test_legacy.py index 80a2c41621fe..0162672f7d7e 100644 --- a/tests/unit/forklift/test_legacy.py +++ b/tests/unit/forklift/test_legacy.py @@ -4864,11 +4864,6 @@ def test_missing_trailing_slash_redirect(pyramid_request): "https://github.com", False, ), - ( # Publisher URL is None - "https://github.com/owner/project", - None, - False, - ), ], ) def test_verify_url_with_trusted_publisher(url, publisher_url, expected): diff --git a/warehouse/forklift/legacy.py b/warehouse/forklift/legacy.py index 98515d4c9587..6f1daf695fef 100644 --- a/warehouse/forklift/legacy.py +++ b/warehouse/forklift/legacy.py @@ -481,7 +481,7 @@ def _verify_url_pypi(url: str, project_name: str, project_normalized_name: str) ) -def _verify_url_with_trusted_publisher(url: str, publisher_url: str | None) -> bool: +def _verify_url_with_trusted_publisher(url: str, publisher_url: str) -> bool: """ Verify a given URL against a Trusted Publisher URL @@ -497,9 +497,6 @@ def _verify_url_with_trusted_publisher(url: str, publisher_url: str | None) -> b the authority includes the host, and in practice neither URL should have user nor port information. """ - if not publisher_url: - return False - publisher_uri = rfc3986.api.uri_reference(publisher_url).normalize() user_uri = rfc3986.api.uri_reference(url).normalize() if publisher_uri.path is None: @@ -520,6 +517,23 @@ def _verify_url_with_trusted_publisher(url: str, publisher_url: str | None) -> b ) +def _verify_url( + url: str, publisher_url: str | None, project_name: str, project_normalized_name: str +) -> bool: + is_verified_pypi = _verify_url_pypi( + url=url, + project_name=project_name, + project_normalized_name=project_normalized_name, + ) + + if publisher_url: + return is_verified_pypi or _verify_url_with_trusted_publisher( + url=url, publisher_url=publisher_url + ) + else: + return is_verified_pypi + + def _sort_releases(request: Request, project: Project): releases = ( request.db.query(Release) @@ -890,12 +904,9 @@ def file_upload(request): else { name: { "url": url, - "verified": _verify_url_with_trusted_publisher( + "verified": _verify_url( url=url, publisher_url=publisher_base_url, - ) - or _verify_url_pypi( - url=url, project_name=project.name, project_normalized_name=project.normalized_name, ), From 097cc9193f6453a4c22e98336c8280d4566dbafa Mon Sep 17 00:00:00 2001 From: Facundo Tuesca Date: Wed, 14 Aug 2024 19:53:59 +0200 Subject: [PATCH 4/6] Update warehouse/forklift/legacy.py Co-authored-by: William Woodruff --- warehouse/forklift/legacy.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/warehouse/forklift/legacy.py b/warehouse/forklift/legacy.py index 6f1daf695fef..a072be62ada5 100644 --- a/warehouse/forklift/legacy.py +++ b/warehouse/forklift/legacy.py @@ -520,18 +520,19 @@ def _verify_url_with_trusted_publisher(url: str, publisher_url: str) -> bool: def _verify_url( url: str, publisher_url: str | None, project_name: str, project_normalized_name: str ) -> bool: - is_verified_pypi = _verify_url_pypi( + if _verify_url_pypi( url=url, project_name=project_name, project_normalized_name=project_normalized_name, - ) + ): + return True + + if not publisher_url: + return False - if publisher_url: - return is_verified_pypi or _verify_url_with_trusted_publisher( - url=url, publisher_url=publisher_url - ) - else: - return is_verified_pypi + return _verify_url_with_trusted_publisher( + url=url, publisher_url=publisher_url + ) def _sort_releases(request: Request, project: Project): From bf1226e90e8e72a5e3e3ee9474bf6be1fa5c92a5 Mon Sep 17 00:00:00 2001 From: Facundo Tuesca Date: Wed, 14 Aug 2024 19:55:31 +0200 Subject: [PATCH 5/6] lint --- warehouse/forklift/legacy.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/warehouse/forklift/legacy.py b/warehouse/forklift/legacy.py index a072be62ada5..46aee9f58091 100644 --- a/warehouse/forklift/legacy.py +++ b/warehouse/forklift/legacy.py @@ -526,13 +526,11 @@ def _verify_url( project_normalized_name=project_normalized_name, ): return True - + if not publisher_url: return False - return _verify_url_with_trusted_publisher( - url=url, publisher_url=publisher_url - ) + return _verify_url_with_trusted_publisher(url=url, publisher_url=publisher_url) def _sort_releases(request: Request, project: Project): From 646e723193ea56173c1d6860dda30dc7d73cf394 Mon Sep 17 00:00:00 2001 From: Facundo Tuesca Date: Wed, 14 Aug 2024 20:14:28 +0200 Subject: [PATCH 6/6] fix missing coverage --- tests/unit/forklift/test_legacy.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/unit/forklift/test_legacy.py b/tests/unit/forklift/test_legacy.py index 0162672f7d7e..b07ff00347a6 100644 --- a/tests/unit/forklift/test_legacy.py +++ b/tests/unit/forklift/test_legacy.py @@ -4981,3 +4981,28 @@ def test_verify_url_pypi(url, project_name, project_normalized_name, expected): assert ( legacy._verify_url_pypi(url, project_name, project_normalized_name) == expected ) + + +def test_verify_url(): + # `_verify_url` is just a helper function that calls `_verify_url_pypi` and + # `_verify_url_with_trusted_publisher`, where the actual verification logic lives. + assert legacy._verify_url( + url="https://pypi.org/project/myproject/", + publisher_url=None, + project_name="myproject", + project_normalized_name="myproject", + ) + + assert legacy._verify_url( + url="https://github.com/org/myproject/issues", + publisher_url="https://github.com/org/myproject", + project_name="myproject", + project_normalized_name="myproject", + ) + + assert not legacy._verify_url( + url="example.com", + publisher_url="https://github.com/or/myproject", + project_name="myproject", + project_normalized_name="myproject", + )