diff --git a/tests/unit/forklift/test_legacy.py b/tests/unit/forklift/test_legacy.py index ec7f4caf7bd8..d5c596352021 100644 --- a/tests/unit/forklift/test_legacy.py +++ b/tests/unit/forklift/test_legacy.py @@ -4799,84 +4799,6 @@ def test_missing_trailing_slash_redirect(pyramid_request): assert resp.headers["Location"] == "/legacy/" -@pytest.mark.parametrize( - ("url", "publisher_url", "expected"), - [ - ( # GitHub trivial case - "https://github.com/owner/project", - "https://github.com/owner/project", - True, - ), - ( # ActiveState trivial case - "https://platform.activestate.com/owner/project", - "https://platform.activestate.com/owner/project", - True, - ), - ( # GitLab trivial case - "https://gitlab.com/owner/project", - "https://gitlab.com/owner/project", - True, - ), - ( # URL is a sub-path of the TP URL - "https://github.com/owner/project/issues", - "https://github.com/owner/project", - True, - ), - ( # Normalization - "https://GiThUB.com/owner/project/", - "https://github.com/owner/project", - True, - ), - ( # TP URL is a prefix, but not a parent of the URL - "https://github.com/owner/project22", - "https://github.com/owner/project", - False, - ), - ( # URL is a parent of the TP URL - "https://github.com/owner", - "https://github.com/owner/project", - False, - ), - ( # Scheme component does not match - "http://github.com/owner/project", - "https://github.com/owner/project", - False, - ), - ( # Host component does not match - "https://gitlab.com/owner/project", - "https://github.com/owner/project", - False, - ), - ( # Host component matches, but contains user and port info - "https://user@github.com:443/owner/project", - "https://github.com/owner/project", - False, - ), - ( # URL path component is empty - "https://github.com", - "https://github.com/owner/project", - False, - ), - ( # TP URL path component is empty - # (currently no TPs have an empty path, so even if the given URL is a - # sub-path of the TP URL, we fail the verification) - "https://github.com/owner/project", - "https://github.com", - False, - ), - ( # Both path components are empty - # (currently no TPs have an empty path, so even if the given URL is the - # same as the TP URL, we fail the verification) - "https://github.com", - "https://github.com", - False, - ), - ], -) -def test_verify_url_with_trusted_publisher(url, publisher_url, expected): - assert legacy._verify_url_with_trusted_publisher(url, publisher_url) == expected - - @pytest.mark.parametrize( ("url", "project_name", "project_normalized_name", "expected"), [ @@ -4992,24 +4914,27 @@ def test_verify_url_pypi(url, project_name, project_normalized_name, expected): def test_verify_url(): # `_verify_url` is just a helper function that calls `_verify_url_pypi` and - # `_verify_url_with_trusted_publisher`, where the actual verification logic lives. + # `OIDCPublisher.verify_url`, where the actual verification logic lives. + publisher_verifies = pretend.stub(verify_url=lambda url: True) + publisher_fails = pretend.stub(verify_url=lambda url: False) + assert legacy._verify_url( url="https://pypi.org/project/myproject/", - publisher_url=None, + publisher=None, project_name="myproject", project_normalized_name="myproject", ) assert legacy._verify_url( url="https://github.com/org/myproject/issues", - publisher_url="https://github.com/org/myproject", + publisher=publisher_verifies, project_name="myproject", project_normalized_name="myproject", ) assert not legacy._verify_url( url="example.com", - publisher_url="https://github.com/or/myproject", + publisher=publisher_fails, project_name="myproject", project_normalized_name="myproject", ) diff --git a/tests/unit/oidc/models/test_core.py b/tests/unit/oidc/models/test_core.py index fc888c380478..2daeb5468948 100644 --- a/tests/unit/oidc/models/test_core.py +++ b/tests/unit/oidc/models/test_core.py @@ -54,6 +54,89 @@ def test_oidc_publisher_not_default_verifiable(self): publisher.verify_claims(signed_claims={}, publisher_service=pretend.stub()) assert str(e.value) == "No required verifiable claims" + @pytest.mark.parametrize( + ("url", "publisher_url", "expected"), + [ + ( # GitHub trivial case + "https://github.com/owner/project", + "https://github.com/owner/project", + True, + ), + ( # ActiveState trivial case + "https://platform.activestate.com/owner/project", + "https://platform.activestate.com/owner/project", + True, + ), + ( # GitLab trivial case + "https://gitlab.com/owner/project", + "https://gitlab.com/owner/project", + True, + ), + ( # URL is a sub-path of the TP URL + "https://github.com/owner/project/issues", + "https://github.com/owner/project", + True, + ), + ( # Normalization + "https://GiThUB.com/owner/project/", + "https://github.com/owner/project", + True, + ), + ( # TP URL is a prefix, but not a parent of the URL + "https://github.com/owner/project22", + "https://github.com/owner/project", + False, + ), + ( # URL is a parent of the TP URL + "https://github.com/owner", + "https://github.com/owner/project", + False, + ), + ( # Scheme component does not match + "http://github.com/owner/project", + "https://github.com/owner/project", + False, + ), + ( # Host component does not match + "https://gitlab.com/owner/project", + "https://github.com/owner/project", + False, + ), + ( # Host component matches, but contains user and port info + "https://user@github.com:443/owner/project", + "https://github.com/owner/project", + False, + ), + ( # URL path component is empty + "https://github.com", + "https://github.com/owner/project", + False, + ), + ( # TP URL path component is empty + # (currently no TPs have an empty path, so even if the given URL is a + # sub-path of the TP URL, we fail the verification) + "https://github.com/owner/project", + "https://github.com", + False, + ), + ( # Both path components are empty + # (currently no TPs have an empty path, so even if the given URL is the + # same as the TP URL, we fail the verification) + "https://github.com", + "https://github.com", + False, + ), + ], + ) + def test_verify_url(self, monkeypatch, url, publisher_url, expected): + class ConcretePublisher(_core.OIDCPublisher): + @property + def publisher_base_url(self): + return publisher_url + + publisher = ConcretePublisher() + assert publisher.verify_url(url) == expected + def test_check_existing_jti(): publisher = pretend.stub( diff --git a/tests/unit/oidc/models/test_github.py b/tests/unit/oidc/models/test_github.py index 7a7865ac9de1..9a195541a218 100644 --- a/tests/unit/oidc/models/test_github.py +++ b/tests/unit/oidc/models/test_github.py @@ -656,6 +656,27 @@ def test_github_publisher_duplicates_cant_be_created(self, db_request): db_request.db.add(publisher2) db_request.db.commit() + @pytest.mark.parametrize( + ("url", "expected"), + [ + ("https://repository_owner.github.io/repository_name/", True), + ("https://repository_owner.github.io/repository_name", True), + ("https://repository_owner.github.io/repository_name/subpage", True), + ("https://repository_owner.github.io/repository_name/../malicious", False), + ("https://repository_owner.github.io/", False), + ("https://repository_owner.github.io/unrelated_name/", False), + ], + ) + def test_github_publisher_verify_url(self, url, expected): + publisher = github.GitHubPublisher( + repository_name="repository_name", + repository_owner="repository_owner", + repository_owner_id="666", + workflow_filename="workflow_filename.yml", + environment="", + ) + assert publisher.verify_url(url) == expected + class TestPendingGitHubPublisher: def test_reify_does_not_exist_yet(self, db_request): diff --git a/warehouse/forklift/legacy.py b/warehouse/forklift/legacy.py index bfb073d41b16..08693a72a80c 100644 --- a/warehouse/forklift/legacy.py +++ b/warehouse/forklift/legacy.py @@ -62,6 +62,7 @@ from warehouse.forklift.forms import UploadForm, _filetype_extension_mapping from warehouse.macaroons.models import Macaroon from warehouse.metrics import IMetricsService +from warehouse.oidc.models import OIDCPublisher from warehouse.packaging.interfaces import IFileStorage, IProjectService from warehouse.packaging.models import ( Dependency, @@ -481,44 +482,11 @@ def _verify_url_pypi(url: str, project_name: str, project_normalized_name: str) ) -def _verify_url_with_trusted_publisher(url: str, publisher_url: str) -> bool: - """ - Verify a given URL against a Trusted Publisher URL - - A URL is considered "verified" iff it matches the Trusted Publisher URL - such that, when both URLs are normalized: - - The scheme component is the same (e.g: both use `https`) - - The authority component is the same (e.g.: `github.com`) - - The path component is the same, or a sub-path of the Trusted Publisher URL - (e.g.: `org/project` and `org/project/issues.html` will pass verification - against an `org/project` Trusted Publisher path component) - - The path component of the Trusted Publisher URL is not empty - Note: We compare the authority component instead of the host component because - the authority includes the host, and in practice neither URL should have user - nor port information. - """ - publisher_uri = rfc3986.api.uri_reference(publisher_url).normalize() - user_uri = rfc3986.api.uri_reference(url).normalize() - if publisher_uri.path is None: - # Currently no Trusted Publishers have an empty path component, - # so we defensively fail verification. - return False - elif user_uri.path and publisher_uri.path: - is_subpath = publisher_uri.path == user_uri.path or user_uri.path.startswith( - publisher_uri.path + "/" - ) - else: - is_subpath = publisher_uri.path == user_uri.path - - return ( - publisher_uri.scheme == user_uri.scheme - and publisher_uri.authority == user_uri.authority - and is_subpath - ) - - def _verify_url( - url: str, publisher_url: str | None, project_name: str, project_normalized_name: str + url: str, + publisher: OIDCPublisher | None, + project_name: str, + project_normalized_name: str, ) -> bool: if _verify_url_pypi( url=url, @@ -527,10 +495,10 @@ def _verify_url( ): return True - if not publisher_url: + if not publisher: return False - return _verify_url_with_trusted_publisher(url=url, publisher_url=publisher_url) + return publisher.verify_url(url) def _sort_releases(request: Request, project: Project): @@ -894,9 +862,6 @@ def file_upload(request): ) from None # Verify any verifiable URLs - publisher_base_url = ( - request.oidc_publisher.publisher_base_url if request.oidc_publisher else None - ) project_urls = ( {} if not meta.project_urls @@ -905,7 +870,7 @@ def file_upload(request): "url": url, "verified": _verify_url( url=url, - publisher_url=publisher_base_url, + publisher=request.oidc_publisher, project_name=project.name, project_normalized_name=project.normalized_name, ), diff --git a/warehouse/oidc/models/_core.py b/warehouse/oidc/models/_core.py index 588cb4992473..60bf8a3803d3 100644 --- a/warehouse/oidc/models/_core.py +++ b/warehouse/oidc/models/_core.py @@ -15,6 +15,7 @@ from collections.abc import Callable from typing import TYPE_CHECKING, Any, TypedDict, TypeVar, Unpack +import rfc3986 import sentry_sdk from sigstore.verify.policy import VerificationPolicy @@ -325,6 +326,42 @@ def stored_claims( # Only concrete subclasses are constructed. raise NotImplementedError + def verify_url(self, url: str) -> bool: + """ + Verify a given URL against this Trusted Publisher's base URL + + A URL is considered "verified" iff it matches the Trusted Publisher URL + such that, when both URLs are normalized: + - The scheme component is the same (e.g: both use `https`) + - The authority component is the same (e.g.: `github.com`) + - The path component is the same, or a sub-path of the Trusted Publisher URL + (e.g.: `org/project` and `org/project/issues.html` will pass verification + against an `org/project` Trusted Publisher path component) + - The path component of the Trusted Publisher URL is not empty + Note: We compare the authority component instead of the host component because + the authority includes the host, and in practice neither URL should have user + nor port information. + """ + publisher_uri = rfc3986.api.uri_reference(self.publisher_base_url).normalize() + user_uri = rfc3986.api.uri_reference(url).normalize() + if publisher_uri.path is None: + # Currently no Trusted Publishers have an empty path component, + # so we defensively fail verification. + return False + elif user_uri.path and publisher_uri.path: + is_subpath = ( + publisher_uri.path == user_uri.path + or user_uri.path.startswith(publisher_uri.path + "/") + ) + else: + is_subpath = publisher_uri.path == user_uri.path + + return ( + publisher_uri.scheme == user_uri.scheme + and publisher_uri.authority == user_uri.authority + and is_subpath + ) + class OIDCPublisher(OIDCPublisherMixin, db.Model): __tablename__ = "oidc_publishers" diff --git a/warehouse/oidc/models/github.py b/warehouse/oidc/models/github.py index 5114076bc7e5..a5aa9764ef4e 100644 --- a/warehouse/oidc/models/github.py +++ b/warehouse/oidc/models/github.py @@ -14,6 +14,8 @@ from typing import Any +import rfc3986 + from sigstore.verify.policy import ( AllOf, AnyOf, @@ -333,6 +335,39 @@ class GitHubPublisher(GitHubPublisherMixin, OIDCPublisher): UUID(as_uuid=True), ForeignKey(OIDCPublisher.id), primary_key=True ) + def verify_url(self, url: str): + """ + Verify a given URL against this GitHub's publisher information + + In addition to the generic Trusted Publisher verification logic in + the parent class, the GitHub Trusted Publisher allows URLs hosted + on `github.io` for the configured repository, i.e: + `https://${OWNER}.github.io/${REPO_NAME}/`. + + As with the generic verification, we allow subpaths of the `.io` URL, + but we normalize using `rfc3986` to reject things like + `https://${OWNER}.github.io/${REPO_NAME}/../malicious`, which would + resolve to a URL outside the `/$REPO_NAME` path. + """ + if super().verify_url(url): + return True + + docs_url = f"https://{self.repository_owner}.github.io/{self.repository_name}" + docs_uri = rfc3986.api.uri_reference(docs_url).normalize() + user_uri = rfc3986.api.uri_reference(url).normalize() + + if not user_uri.path: + return False + + is_subpath = docs_uri.path == user_uri.path or user_uri.path.startswith( + docs_uri.path + "/" + ) + return ( + docs_uri.scheme == user_uri.scheme + and docs_uri.authority == user_uri.authority + and is_subpath + ) + class PendingGitHubPublisher(GitHubPublisherMixin, PendingOIDCPublisher): __tablename__ = "pending_github_oidc_publishers"