From 6832f70a43c791efa28e52ca80c54003da1e4297 Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Tue, 20 Sep 2022 23:54:38 -0700 Subject: [PATCH 1/8] Allow external dependencies Resolves https://github.com/python/typeshed/issues/5768 In https://github.com/python/typeshed/issues/5768#issuecomment-1251375258 consensus was reached on Akuli's idea, that external dependencies for stub packages are fine, as long as we validate that the external dependency is a dependency of the upstream package. The most important part of the implementation is the validation we perform. This happens in `verify_typeshed_req` and `verify_external_req`. To help lock things down, Metadata does not expose access to elements of `requires` without validation. We rely on PyPI's API to find the dependencies of the upstream. I believe this might not work if our stub has external dependencies and the upstream does not publish wheels. This is not the case currently (as proven by test) and does not seem too likely, so will leave as a TODO for the future. We use uploaded_packages.txt as the source of truth for what is a typeshed dependency. This is important to avoid potential badness around addition and deletion, for instance, if something is added to typeshed, but the distribution is created by someone else before stub_uploader uploads it. The other set of changes is that I delete most of the graph code that existed previously. The graph code was added in https://github.com/typeshed-internal/stub_uploader/pull/1 and was previously load bearing for security. The idea being to ensure that the graph of transitive dependencies was fully contained within typeshed. This is no longer the case, so we can remove most of it. I still have some graph code in here, but it's no longer load bearing for security. I keep it around to better preserve uploading semantics, since it seems like it could matter in some edge case scenarios (such as multiple packages being uploaded for the first time that depend on each other). Since we don't have custom needs, we can get away with using the new-ish graphlib from stdlib. While the graph code has been removed, note that we do still run validation on transitive dependencies for each package. This is accomplished by `recursive_verify`. I think the non-transitive validation is sufficient, but running this before uploading each package doesn't hurt. I added some special-casing for types-gdb. As Akuli pointed out, this avoids us accidentally trusting a gdb package on PyPI if ever someone attempts to add external dependencies to types-gdb. New code paths have tests. I audited test coverage to make sure of this. --- .github/workflows/force_update.yml | 2 +- .github/workflows/update_stubs.yml | 2 +- stub_uploader/build_wheel.py | 106 +--------------- stub_uploader/const.py | 1 + stub_uploader/get_version.py | 6 - stub_uploader/metadata.py | 190 ++++++++++++++++++++++++++++- stub_uploader/upload_changed.py | 36 +++--- stub_uploader/upload_some.py | 28 ++--- tests/test_integration.py | 114 +++++++++++++---- tests/test_unit.py | 112 ++++------------- 10 files changed, 339 insertions(+), 258 deletions(-) diff --git a/.github/workflows/force_update.yml b/.github/workflows/force_update.yml index 46929590..a2fc1744 100644 --- a/.github/workflows/force_update.yml +++ b/.github/workflows/force_update.yml @@ -34,7 +34,7 @@ jobs: TWINE_PASSWORD: ${{ secrets.TYPESHED_BOT_API_TOKEN }} run: | cd main - python -m stub_uploader.upload_some ../typeshed "${{ github.event.inputs.distribution }}" data/uploaded_packages.txt + python -m stub_uploader.upload_some ../typeshed "${{ github.event.inputs.distribution }}" # If we are force uploading packages that were never uploaded, they are added to the list if [ -z "$(git status --porcelain)" ]; then exit 0; diff --git a/.github/workflows/update_stubs.yml b/.github/workflows/update_stubs.yml index cbc24be3..fbf3eae0 100644 --- a/.github/workflows/update_stubs.yml +++ b/.github/workflows/update_stubs.yml @@ -37,7 +37,7 @@ jobs: TWINE_PASSWORD: ${{ secrets.TYPESHED_BOT_API_TOKEN }} run: | cd main - python -m stub_uploader.upload_changed ../typeshed $(cat data/last_typeshed_commit.sha1) data/uploaded_packages.txt + python -m stub_uploader.upload_changed ../typeshed $(cat data/last_typeshed_commit.sha1) (cd ../typeshed; git rev-parse HEAD) > data/last_typeshed_commit.sha1 if [ -z "$(git status --porcelain)" ]; then exit 0; diff --git a/stub_uploader/build_wheel.py b/stub_uploader/build_wheel.py index 873b622e..952a92c5 100644 --- a/stub_uploader/build_wheel.py +++ b/stub_uploader/build_wheel.py @@ -20,13 +20,11 @@ import os import os.path import shutil -import tempfile import subprocess -from collections import defaultdict +import tempfile from textwrap import dedent -from typing import List, Dict, Set, Optional +from typing import Dict, List, Optional -from stub_uploader import get_version from stub_uploader.const import * from stub_uploader.metadata import Metadata, read_metadata @@ -101,13 +99,6 @@ def __init__(self, typeshed_dir: str, distribution: str) -> None: self.stub_dir = os.path.join(typeshed_dir, THIRD_PARTY_NAMESPACE, distribution) -def strip_types_prefix(dependency: str) -> str: - assert dependency.startswith( - TYPES_PREFIX - ), "Currently only dependencies on stub packages are supported" - return dependency[len(TYPES_PREFIX) :] - - def find_stub_files(top: str) -> List[str]: """Find all stub files for a given package, relative to package root. @@ -214,98 +205,13 @@ def collect_setup_entries(base_dir: str) -> Dict[str, List[str]]: return package_data -def verify_dependency(typeshed_dir: str, dependency: str, uploaded: str) -> None: - """Verify this is a valid dependency, i.e. a stub package uploaded by us.""" - known_distributions = set( - os.listdir(os.path.join(typeshed_dir, THIRD_PARTY_NAMESPACE)) - ) - assert ";" not in dependency, "Semicolons in dependencies are not supported" - dependency = get_version.strip_dep_version(dependency) - assert ( - strip_types_prefix(dependency) in known_distributions - ), "Only dependencies on typeshed stubs are allowed" - with open(uploaded) as f: - uploaded_distributions = set(f.read().splitlines()) - - msg = f"{dependency} looks like a foreign distribution." - uploaded_distributions_lower = [d.lower() for d in uploaded_distributions] - if ( - dependency not in uploaded_distributions - and dependency.lower() in uploaded_distributions_lower - ): - msg += " Note: list is case sensitive" - assert dependency in uploaded_distributions, msg - - -def update_uploaded(uploaded: str, distribution: str) -> None: - with open(uploaded) as f: - current = set(f.read().splitlines()) - if f"types-{distribution}" not in current: - with open(uploaded, "w") as f: - f.write("\n".join(sorted(current | {f"types-{distribution}"}))) - - -def make_dependency_map( - typeshed_dir: str, distributions: List[str] -) -> Dict[str, Set[str]]: - """Return relative dependency map among distributions. - - Important: this only includes dependencies *within* the given - list of distributions. - """ - result: Dict[str, Set[str]] = {d: set() for d in distributions} - for distribution in distributions: - data = read_metadata(typeshed_dir, distribution) - for dependency in data.requires: - dependency = strip_types_prefix(get_version.strip_dep_version(dependency)) - if dependency in distributions: - result[distribution].add(dependency) - return result - - -def transitive_deps(dep_map: Dict[str, Set[str]]) -> Dict[str, Set[str]]: - """Propagate dependencies to compute a transitive dependency map. - - Note: this algorithm is O(N**2) in general case, but we don't worry, - because N is small (less than 1000). So it will take few seconds at worst, - while building/uploading 1000 packages will take minutes. - """ - transitive: Dict[str, Set[str]] = defaultdict(set) - for distribution in dep_map: - to_add = {distribution} - while to_add: - new = to_add.pop() - extra = dep_map[new] - transitive[distribution] |= extra - assert ( - distribution not in transitive[distribution] - ), f"Cyclic dependency {distribution} -> {distribution}" - to_add |= extra - return transitive - - -def sort_by_dependency(dep_map: Dict[str, Set[str]]) -> List[str]: - """Sort distributions by dependency order (those depending on nothing appear first).""" - trans_map = transitive_deps(dep_map) - - # We can't use builtin sort w.r.t. trans_map because it makes various assumptions - # about properties of equality and order (like their mutual transitivity). - def sort(ds: List[str]) -> List[str]: - if not ds: - return [] - pivot = ds.pop() - not_dependent = [d for d in ds if pivot not in trans_map[d]] - dependent = [d for d in ds if pivot in trans_map[d]] - return sort(not_dependent) + [pivot] + sort(dependent) - - # Return independent packages sorted by name for stability. - return sort(sorted(dep_map)) - - def generate_setup_file( build_data: BuildData, metadata: Metadata, version: str, commit: str ) -> str: """Auto-generate a setup.py file for given distribution using a template.""" + all_requirements = [ + str(req) for req in metadata.requires_typeshed + metadata.requires_external + ] package_data = collect_setup_entries(build_data.stub_dir) return SETUP_TEMPLATE.format( distribution=build_data.distribution, @@ -313,7 +219,7 @@ def generate_setup_file( build_data.distribution, commit, metadata ), version=version, - requires=metadata.requires, + requires=all_requirements, packages=list(package_data.keys()), package_data=package_data, ) diff --git a/stub_uploader/const.py b/stub_uploader/const.py index b6fc7257..17179a0f 100644 --- a/stub_uploader/const.py +++ b/stub_uploader/const.py @@ -4,3 +4,4 @@ TYPES_PREFIX = "types-" CHANGELOG_PATH = "data/changelogs" +UPLOADED_PATH = "data/uploaded_packages.txt" diff --git a/stub_uploader/get_version.py b/stub_uploader/get_version.py index 952c025f..04a700e6 100644 --- a/stub_uploader/get_version.py +++ b/stub_uploader/get_version.py @@ -15,7 +15,6 @@ from typing import Any, Union import requests -from packaging.requirements import Requirement from packaging.specifiers import SpecifierSet from packaging.version import Version from requests.adapters import HTTPAdapter @@ -133,11 +132,6 @@ def compute_incremented_version( return incremented_version -def strip_dep_version(dependency: str) -> str: - """Strip a possible version suffix, e.g. types-six>=0.1.4 -> types-six.""" - return Requirement(dependency).name - - def determine_incremented_version(metadata: Metadata) -> str: published_stub_versions = fetch_pypi_versions(metadata.stub_distribution) version = compute_incremented_version( diff --git a/stub_uploader/metadata.py b/stub_uploader/metadata.py index 8900f94d..04ce3045 100644 --- a/stub_uploader/metadata.py +++ b/stub_uploader/metadata.py @@ -1,19 +1,39 @@ +from __future__ import annotations + +import functools +import graphlib import os -from typing import Any, Dict, List, Optional +import re +from typing import Any, Dict, Iterator, Optional +import requests import tomli +from packaging.requirements import Requirement + +from .const import META, THIRD_PARTY_NAMESPACE, TYPES_PREFIX, UPLOADED_PATH + -from .const import META, THIRD_PARTY_NAMESPACE, TYPES_PREFIX +class InvalidRequires(Exception): + pass class Metadata: def __init__(self, distribution: str, data: Dict[str, Any]): - self.upstream_distribution = distribution + assert not distribution.startswith(TYPES_PREFIX) + self._alleged_upstream_distribution = distribution self.data = data + @property + def upstream_distribution(self) -> Optional[str]: + # TODO: add a field to METADATA.toml if the stubs are for a package + # that does not exist on PyPI + if self._alleged_upstream_distribution == "gdb": + return None + return self._alleged_upstream_distribution + @property def stub_distribution(self) -> str: - return TYPES_PREFIX + self.upstream_distribution + return TYPES_PREFIX + self._alleged_upstream_distribution @property def version_spec(self) -> str: @@ -24,8 +44,38 @@ def version_spec(self) -> str: return version @property - def requires(self) -> List[str]: - return self.data.get("requires", []) + def _unvalidated_requires(self) -> list[Requirement]: + return [Requirement(req) for req in self.data.get("requires", [])] + + @property + def _unvalidated_requires_typeshed(self) -> list[Requirement]: + return [ + r + for r in self._unvalidated_requires + if canonical_name(r.name) in uploaded_packages.read() + ] + + @functools.cached_property + def requires_typeshed(self) -> list[Requirement]: + reqs = self._unvalidated_requires_typeshed + for req in reqs: + verify_typeshed_req(req) + return reqs + + @property + def _unvalidated_requires_external(self) -> list[Requirement]: + return [ + r + for r in self._unvalidated_requires + if canonical_name(r.name) not in uploaded_packages.read() + ] + + @functools.cached_property + def requires_external(self) -> list[Requirement]: + reqs = self._unvalidated_requires_external + for req in reqs: + verify_external_req(req, self.upstream_distribution) + return reqs @property def extra_description(self) -> str: @@ -42,7 +92,135 @@ def no_longer_updated(self) -> bool: def read_metadata(typeshed_dir: str, distribution: str) -> Metadata: """Parse metadata from file.""" + assert not distribution.startswith(TYPES_PREFIX) path = os.path.join(typeshed_dir, THIRD_PARTY_NAMESPACE, distribution, META) with open(path, "rb") as f: data = tomli.load(f) return Metadata(distribution=distribution, data=data) + + +def canonical_name(name: str) -> str: + # https://peps.python.org/pep-0503/#normalized-names + return re.sub(r"[-_.]+", "-", name).lower() + + +class _UploadedPackages: + def __init__(self, file_path: str) -> None: + self._file_path = file_path + self._cached: Optional[list[str]] = None + + def read(self) -> set[str]: + if self._cached is not None: + return set(map(canonical_name, self._cached)) + with open(self._file_path) as f: + self._cached = f.read().splitlines() + return set(map(canonical_name, self._cached)) + + def add(self, distribution: str) -> None: + assert not distribution.startswith(TYPES_PREFIX) + stub_dist = TYPES_PREFIX + distribution + if canonical_name(stub_dist) not in self.read(): + with open(self._file_path) as f: + current = f.read().splitlines() + current.append(stub_dist) + current.sort() + with open(self._file_path, "w") as f: + f.write("\n".join(current)) + self._cached = None + + +uploaded_packages = _UploadedPackages(UPLOADED_PATH) + + +def strip_types_prefix(dependency: str) -> str: + if not dependency.startswith(TYPES_PREFIX): + raise ValueError("Expected dependency on a typeshed package") + return dependency[len(TYPES_PREFIX) :] + + +def verify_typeshed_req(req: Requirement) -> None: + if not req.name.startswith(TYPES_PREFIX): + raise InvalidRequires(f"Expected dependency {req} to start with {TYPES_PREFIX}") + if not canonical_name(req.name) in uploaded_packages.read(): + raise InvalidRequires( + f"Expected dependency {req} to be uploaded from stub_uploader" + ) + + +def verify_external_req(req: Requirement, upstream_distribution: Optional[str]) -> None: + if canonical_name(req.name) in uploaded_packages.read(): + raise InvalidRequires( + f"Expected dependency {req} to not be uploaded from stub_uploader" + ) + if req.name.startswith(TYPES_PREFIX): + # technically this could be allowed, but it's very suspicious + raise InvalidRequires( + f"Expected dependency {req} to not start with {TYPES_PREFIX}" + ) + + if upstream_distribution is None: + # There is not an upstream distribution on PyPI + return + + resp = requests.get(f"https://pypi.org/pypi/{upstream_distribution}/json") + if resp.status_code != 200: + raise InvalidRequires( + f"Expected dependency {req} to be accessible on PyPI, but got {resp.status_code}" + ) + + data = resp.json() + + # TODO: allow external dependencies for stubs for packages that do not ship wheels + if req.name not in [ + Requirement(r).name for r in (data["info"].get("requires_dist") or []) + ]: + raise InvalidRequires( + f"Expected dependency {req} to be listed in {upstream_distribution}'s requires_dist" + ) + + +def sort_by_dependency(typeshed_dir: str, distributions: list[str]) -> Iterator[str]: + # Just a simple topological sort. Unlike previous versions of the code, we do not rely + # on this to perform validation, like requiring the graph to be complete. + # We only use this to help with edge cases like multiple packages being uploaded + # for the first time that depend on each other. + ts: graphlib.TopologicalSorter[str] = graphlib.TopologicalSorter() + + for dist in os.listdir(os.path.join(typeshed_dir, THIRD_PARTY_NAMESPACE)): + metadata = read_metadata(typeshed_dir, dist) + ts.add( + metadata.stub_distribution, + # Use _unvalidated_requires instead of requires_typeshed, in case we're uploading + # a new package B that depends on another new package A. Sorting topologically means + # that the A will be in uploaded_packages.txt by the time it comes to verify and + # upload B. + *[r.name for r in metadata._unvalidated_requires], + ) + + for dist in ts.static_order(): + dist = strip_types_prefix(dist) + if dist in distributions: + yield dist + + +def recursive_verify(metadata: Metadata, typeshed_dir: str) -> set[str]: + # While metadata.requires_typeshed and metadata.requires_external will perform validation on the + # stub distribution itself, it seems useful to be able to validate the transitive typeshed + # dependency graph for a stub distribution + _verified: set[str] = set() + + def _verify(metadata: Metadata) -> None: + if metadata.stub_distribution in _verified: + return + _verified.add(metadata.stub_distribution) + + # calling these checks metadata's requires + assert isinstance(metadata.requires_typeshed, list) + assert isinstance(metadata.requires_external, list) + + # and recursively verify all our internal dependencies as well + for req in metadata.requires_typeshed: + _verify(read_metadata(typeshed_dir, strip_types_prefix(req.name))) + + _verify(metadata) + return _verified diff --git a/stub_uploader/upload_changed.py b/stub_uploader/upload_changed.py index 0d77c43a..1b577d73 100644 --- a/stub_uploader/upload_changed.py +++ b/stub_uploader/upload_changed.py @@ -13,36 +13,40 @@ import os import subprocess -from stub_uploader import build_wheel, get_changed, update_changelog -from stub_uploader.metadata import read_metadata +from stub_uploader import build_wheel, get_changed from stub_uploader.get_version import determine_incremented_version +from stub_uploader.metadata import ( + read_metadata, + recursive_verify, + sort_by_dependency, + uploaded_packages, +) +from stub_uploader.update_changelog import update_changelog -def main(typeshed_dir: str, commit: str, uploaded: str, dry_run: bool = False) -> None: +def main(typeshed_dir: str, commit: str, dry_run: bool = False) -> None: """Upload stub typeshed packages modified since commit.""" changed = get_changed.main(typeshed_dir, commit) # Ignore those distributions that were completely deleted. current = set(os.listdir(os.path.join(typeshed_dir, "stubs"))) changed = [d for d in changed if d in current] - # Sort by dependency to prevent depending on foreign distributions. - to_upload = build_wheel.sort_by_dependency( - build_wheel.make_dependency_map(typeshed_dir, changed) - ) + to_upload = sort_by_dependency(typeshed_dir, changed) + print("Building and uploading stubs for:", ", ".join(to_upload)) for distribution in to_upload: metadata = read_metadata(typeshed_dir, distribution) + recursive_verify(metadata, typeshed_dir) + version = determine_incremented_version(metadata) - update_changelog.update_changelog( - typeshed_dir, commit, distribution, version, dry_run=dry_run - ) + + update_changelog(typeshed_dir, commit, distribution, version, dry_run=dry_run) temp_dir = build_wheel.main(typeshed_dir, distribution, version) if dry_run: print(f"Would upload: {distribution}, version {version}") continue - for dependency in metadata.requires: - build_wheel.verify_dependency(typeshed_dir, dependency, uploaded) + subprocess.run(["twine", "upload", os.path.join(temp_dir, "*")], check=True) - build_wheel.update_uploaded(uploaded, distribution) + uploaded_packages.add(distribution) print(f"Successfully uploaded stubs for {distribution}") @@ -52,14 +56,10 @@ def main(typeshed_dir: str, commit: str, uploaded: str, dry_run: bool = False) - parser.add_argument( "previous_commit", help="Previous typeshed commit for which we performed upload" ) - parser.add_argument( - "uploaded", - help="File listing previously uploaded packages to validate dependencies", - ) parser.add_argument( "--dry-run", action="store_true", help="Should we perform a dry run (don't actually upload)", ) args = parser.parse_args() - main(args.typeshed_dir, args.previous_commit, args.uploaded, args.dry_run) + main(args.typeshed_dir, args.previous_commit, args.dry_run) diff --git a/stub_uploader/upload_some.py b/stub_uploader/upload_some.py index e5315f0f..21619752 100644 --- a/stub_uploader/upload_some.py +++ b/stub_uploader/upload_some.py @@ -14,11 +14,16 @@ import subprocess from stub_uploader import build_wheel -from stub_uploader.metadata import read_metadata from stub_uploader.get_version import determine_incremented_version +from stub_uploader.metadata import ( + read_metadata, + recursive_verify, + sort_by_dependency, + uploaded_packages, +) -def main(typeshed_dir: str, pattern: str, uploaded: str) -> None: +def main(typeshed_dir: str, pattern: str) -> None: """Force upload typeshed stub packages to PyPI.""" compiled = re.compile(f"^{pattern}$") # force exact matches matching = [ @@ -26,20 +31,19 @@ def main(typeshed_dir: str, pattern: str, uploaded: str) -> None: for d in os.listdir(os.path.join(typeshed_dir, "stubs")) if re.match(compiled, d) ] - # Sort by dependency to prevent depending on foreign distributions. - to_upload = build_wheel.sort_by_dependency( - build_wheel.make_dependency_map(typeshed_dir, matching) - ) + to_upload = sort_by_dependency(typeshed_dir, matching) + print("Uploading stubs for:", ", ".join(to_upload)) for distribution in to_upload: metadata = read_metadata(typeshed_dir, distribution) + recursive_verify(metadata, typeshed_dir) + version = determine_incremented_version(metadata) - for dependency in metadata.requires: - build_wheel.verify_dependency(typeshed_dir, dependency, uploaded) + # TODO: Update changelog temp_dir = build_wheel.main(typeshed_dir, distribution, version) subprocess.run(["twine", "upload", os.path.join(temp_dir, "*")], check=True) - build_wheel.update_uploaded(uploaded, distribution) + uploaded_packages.add(distribution) print(f"Successfully uploaded stubs for {distribution}") @@ -49,9 +53,5 @@ def main(typeshed_dir: str, pattern: str, uploaded: str) -> None: parser.add_argument( "pattern", help="Regular expression to select distributions for upload" ) - parser.add_argument( - "uploaded", - help="File listing previously uploaded packages to validate dependencies", - ) args = parser.parse_args() - main(args.typeshed_dir, args.pattern, args.uploaded) + main(args.typeshed_dir, args.pattern) diff --git a/tests/test_integration.py b/tests/test_integration.py index 442cbe23..df3e98b9 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -4,13 +4,25 @@ a typeshed checkout side by side. """ import os + import pytest +from packaging.requirements import Requirement from packaging.version import Version -from stub_uploader import get_version, build_wheel -from stub_uploader.metadata import read_metadata + +from stub_uploader import build_wheel, get_version +from stub_uploader.const import THIRD_PARTY_NAMESPACE +from stub_uploader.metadata import ( + InvalidRequires, + Metadata, + read_metadata, + recursive_verify, + sort_by_dependency, + strip_types_prefix, + verify_external_req, + verify_typeshed_req, +) TYPESHED = "../typeshed" -UPLOADED = "data/uploaded_packages.txt" def test_fetch_pypi_versions() -> None: @@ -25,7 +37,9 @@ def test_check_exists() -> None: assert not get_version.check_exists("nonexistent-distribution") -@pytest.mark.parametrize("distribution", os.listdir(os.path.join(TYPESHED, "stubs"))) +@pytest.mark.parametrize( + "distribution", os.listdir(os.path.join(TYPESHED, THIRD_PARTY_NAMESPACE)) +) def test_build_wheel(distribution: str) -> None: """Check that we can build wheels for all distributions.""" tmp_dir = build_wheel.main(TYPESHED, distribution, version="1.1.1") @@ -33,35 +47,83 @@ def test_build_wheel(distribution: str) -> None: assert list(os.listdir(tmp_dir)) # check it is not empty -@pytest.mark.parametrize("distribution", os.listdir(os.path.join(TYPESHED, "stubs"))) +@pytest.mark.parametrize( + "distribution", os.listdir(os.path.join(TYPESHED, THIRD_PARTY_NAMESPACE)) +) def test_version_increment(distribution: str) -> None: get_version.determine_incremented_version(read_metadata(TYPESHED, distribution)) -def test_verify_dependency() -> None: - # Check some known dependencies that they verify as valid. - build_wheel.verify_dependency(TYPESHED, "types-six", UPLOADED) - build_wheel.verify_dependency(TYPESHED, "types-six==0.1.1", UPLOADED) - build_wheel.verify_dependency(TYPESHED, "types-typed-ast", UPLOADED) - build_wheel.verify_dependency(TYPESHED, "types-typed-ast>=3.7", UPLOADED) - # Also check couple errors. - with pytest.raises(AssertionError): - build_wheel.verify_dependency(TYPESHED, "unsupported", UPLOADED) - with pytest.raises(AssertionError): - build_wheel.verify_dependency(TYPESHED, "types-unknown-xxx", UPLOADED) +def test_unvalidated_properties() -> None: + m = Metadata("fake", {"version": "0.1", "requires": ["numpy", "types-six>=0.1"]}) + assert [r.name for r in m._unvalidated_requires] == ["numpy", "types-six"] + assert [r.name for r in m._unvalidated_requires_external] == ["numpy"] + assert [r.name for r in m._unvalidated_requires_typeshed] == ["types-six"] + + +def test_verify_typeshed_req() -> None: + # Check that some known dependencies verify as valid. + verify_typeshed_req(Requirement("types-six")) + verify_typeshed_req(Requirement("types-six==0.1.1")) + verify_typeshed_req(Requirement("types-typed-ast")) + verify_typeshed_req(Requirement("types-typed-ast>=3.7")) + + with pytest.raises(InvalidRequires, match="to start with types-"): + verify_typeshed_req(Requirement("unsupported")) + + with pytest.raises(InvalidRequires, match="to be uploaded from stub_uploader"): + verify_typeshed_req(Requirement("types-unknown-xxx")) + + m = Metadata("mypy", {"version": "0.1", "requires": ["types-unknown-xxx"]}) + assert m.requires_typeshed == [] + + +def test_verify_external_req() -> None: + # Check that some known dependencies verify as valid. + verify_external_req(Requirement("typing-extensions"), "mypy") + verify_external_req(Requirement("mypy-extensions"), "mypy") + + m = Metadata("mypy", {"version": "0.1", "requires": ["typing-extensions"]}) + assert [r.name for r in m.requires_external] == ["typing-extensions"] + + with pytest.raises(InvalidRequires, match="to be listed in mypy's requires_dist"): + verify_external_req(Requirement("numpy"), "mypy") + + with pytest.raises(InvalidRequires, match="to not be uploaded from stub_uploader"): + verify_external_req(Requirement("types-typed-ast"), "mypy") + + with pytest.raises(InvalidRequires, match="to not start with types-"): + verify_external_req(Requirement("types-unknown-xxx"), "mypy") + + m = Metadata("mypy", {"version": "0.1", "requires": ["numpy"]}) + with pytest.raises(InvalidRequires, match="to be listed in mypy's requires_dist"): + m.requires_external + with pytest.raises(InvalidRequires, match="to be listed in mypy's requires_dist"): + recursive_verify(m, TYPESHED) def test_dependency_order() -> None: - """Test that all packages are correctly sorted by dependency.""" + """Test sort_by_dependency correctly sorts all packages by dependency.""" distributions = os.listdir(os.path.join(TYPESHED, "stubs")) - to_upload = build_wheel.sort_by_dependency( - build_wheel.make_dependency_map(TYPESHED, distributions) - ) + to_upload = list(sort_by_dependency(TYPESHED, distributions)) assert len(set(to_upload)) == len(to_upload) for distribution in distributions: - for dependency in read_metadata(TYPESHED, distribution).requires: - assert to_upload.index( - build_wheel.strip_types_prefix( - get_version.strip_dep_version(dependency) - ) - ) < to_upload.index(distribution) + for req in read_metadata(TYPESHED, distribution).requires_typeshed: + assert to_upload.index(strip_types_prefix(req.name)) < to_upload.index( + distribution + ) + + +def test_recursive_verify_single() -> None: + m = read_metadata(TYPESHED, "six") + assert recursive_verify(m, TYPESHED) == {"types-six"} + + m = read_metadata(TYPESHED, "boto") + assert recursive_verify(m, TYPESHED) == {"types-boto", "types-six"} + + +@pytest.mark.parametrize( + "distribution", os.listdir(os.path.join(TYPESHED, THIRD_PARTY_NAMESPACE)) +) +def test_recursive_verify(distribution: str) -> None: + recursive_verify(read_metadata(TYPESHED, distribution), TYPESHED) diff --git a/tests/test_unit.py b/tests/test_unit.py index 3fb8497f..aed7b020 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -1,36 +1,22 @@ """Unit tests for simple helpers should go here.""" import os +import tempfile + import pytest from packaging.version import Version -from stub_uploader.get_version import ( - compute_incremented_version, - ensure_specificity, - strip_dep_version, -) -from stub_uploader.build_wheel import ( - collect_setup_entries, - sort_by_dependency, - transitive_deps, - strip_types_prefix, -) + +from stub_uploader.build_wheel import collect_setup_entries +from stub_uploader.get_version import compute_incremented_version, ensure_specificity +from stub_uploader.metadata import _UploadedPackages, strip_types_prefix def test_strip_types_prefix() -> None: assert strip_types_prefix("types-foo") == "foo" - with pytest.raises(AssertionError): + with pytest.raises(ValueError): strip_types_prefix("bad") -def test_strip_version() -> None: - assert strip_dep_version("foo") == "foo" - assert strip_dep_version("types-foo") == "types-foo" - assert strip_dep_version("foo==1.1") == "foo" - assert strip_dep_version("types-foo==1.1") == "types-foo" - assert strip_dep_version("foo>2.3") == "foo" - assert strip_dep_version("types-foo>2.3") == "types-foo" - - def test_ensure_specificity() -> None: ver = [1] ensure_specificity(ver, 3) @@ -108,71 +94,6 @@ def test_compute_incremented_version() -> None: assert _incremented_ver("1.2", ["1.1.0.7", "1.2.0.7", "1.3.0.7"]) == "1.3.0.8" -def test_transitive_deps() -> None: - with pytest.raises(KeyError): - # We require the graph to be complete for safety. - transitive_deps({"1": {"2"}}) - assert transitive_deps({"1": {"2"}, "2": set()}) == {"1": {"2"}, "2": set()} - with pytest.raises(AssertionError): - transitive_deps({"1": {"1"}}) - with pytest.raises(AssertionError): - transitive_deps({"1": {"2"}, "2": {"3"}, "3": {"1"}}) - assert transitive_deps({"1": {"2"}, "2": {"3"}, "3": {"4"}, "4": set()}) == ( - {"1": {"2", "3", "4"}, "2": {"3", "4"}, "3": {"4"}, "4": set()} - ) - assert transitive_deps( - { - "1": {"2", "3"}, - "2": {"2a", "2b"}, - "3": {"3a", "3b"}, - "2a": set(), - "2b": set(), - "3a": set(), - "3b": set(), - } - ) == ( - { - "1": {"2", "2a", "2b", "3", "3a", "3b"}, - "2": {"2a", "2b"}, - "3": {"3a", "3b"}, - "2a": set(), - "2b": set(), - "3a": set(), - "3b": set(), - } - ) - - -def test_sort_by_dependency() -> None: - with pytest.raises(KeyError): - # We require the graph to be complete for safety. - sort_by_dependency({"1": {"2"}}) - assert sort_by_dependency({"1": {"2"}, "2": set()}) == ["2", "1"] - with pytest.raises(AssertionError): - sort_by_dependency({"1": {"1"}}) - with pytest.raises(AssertionError): - sort_by_dependency({"1": {"2"}, "2": {"3"}, "3": {"1"}}) - # Independent are in alphabetic order. - assert sort_by_dependency({"2": set(), "1": set()}) == ["1", "2"] - assert sort_by_dependency({"1": {"2"}, "2": {"3"}, "3": {"4"}, "4": set()}) == [ - "4", - "3", - "2", - "1", - ] - assert sort_by_dependency( - { - "1": {"2", "3"}, - "2": {"2a", "2b"}, - "3": {"3a", "3b"}, - "2a": set(), - "2b": set(), - "3a": set(), - "3b": set(), - } - ) == ["2a", "2b", "2", "3a", "3b", "3", "1"] - - def test_collect_setup_entries() -> None: stubs = os.path.join("data", "test_typeshed", "stubs") entries = collect_setup_entries(os.path.join(stubs, "singlefilepkg")) @@ -221,3 +142,22 @@ def test_collect_setup_entries_bogusfile() -> None: pass entries = collect_setup_entries(os.path.join(stubs, "multifilepkg")) assert len(entries["multifilepkg-stubs"]) == 7 + + +def test_uploaded_packages() -> None: + with tempfile.TemporaryDirectory() as td: + file_path = os.path.join(td, "uploaded_packages.txt") + with open(file_path, "w") as f: + f.write("types-SqLaLcHeMy\n") + + up = _UploadedPackages(file_path) + assert up.read() == {"types-sqlalchemy"} + + up.add("SQLAlchemy") + assert up.read() == {"types-sqlalchemy"} + + up.add("six") + assert up.read() == {"types-sqlalchemy", "types-six"} + + with open(file_path, "r") as f: + assert f.read() == "types-SqLaLcHeMy\ntypes-six" From d6a06835f12095ecbeee2a03152268efb3a341be Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Thu, 22 Sep 2022 23:14:14 -0700 Subject: [PATCH 2/8] error if distribution is not on pypi --- stub_uploader/metadata.py | 5 +++-- tests/test_integration.py | 8 ++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/stub_uploader/metadata.py b/stub_uploader/metadata.py index 04ce3045..dbb48123 100644 --- a/stub_uploader/metadata.py +++ b/stub_uploader/metadata.py @@ -159,8 +159,9 @@ def verify_external_req(req: Requirement, upstream_distribution: Optional[str]) ) if upstream_distribution is None: - # There is not an upstream distribution on PyPI - return + raise InvalidRequires( + f"There is no upstream distribution on PyPI, so cannot verify {req}" + ) resp = requests.get(f"https://pypi.org/pypi/{upstream_distribution}/json") if resp.status_code != 200: diff --git a/tests/test_integration.py b/tests/test_integration.py index df3e98b9..33dfe209 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -101,6 +101,14 @@ def test_verify_external_req() -> None: with pytest.raises(InvalidRequires, match="to be listed in mypy's requires_dist"): recursive_verify(m, TYPESHED) + # TODO: change tests once METADATA.toml specifies whether a dist is on PyPI + m = Metadata("gdb", {"version": "0.1", "requires": []}) + assert m.requires_external == [] + + m = Metadata("gdb", {"version": "0.1", "requires": ["something"]}) + with pytest.raises(InvalidRequires, match="no upstream distribution on PyPI"): + m.requires_external + def test_dependency_order() -> None: """Test sort_by_dependency correctly sorts all packages by dependency.""" From 65c1cdd0c174b98e16ecaf4dfc42c6c1f3d50add Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Thu, 22 Sep 2022 23:14:56 -0700 Subject: [PATCH 3/8] use removeprefix --- stub_uploader/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stub_uploader/metadata.py b/stub_uploader/metadata.py index dbb48123..2fb83423 100644 --- a/stub_uploader/metadata.py +++ b/stub_uploader/metadata.py @@ -135,7 +135,7 @@ def add(self, distribution: str) -> None: def strip_types_prefix(dependency: str) -> str: if not dependency.startswith(TYPES_PREFIX): raise ValueError("Expected dependency on a typeshed package") - return dependency[len(TYPES_PREFIX) :] + return dependency.removeprefix(TYPES_PREFIX) def verify_typeshed_req(req: Requirement) -> None: From ea736a86de8da473309b530a6e6b6b13372b8dad Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Thu, 22 Sep 2022 23:16:11 -0700 Subject: [PATCH 4/8] hoist packages out of loop --- stub_uploader/metadata.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/stub_uploader/metadata.py b/stub_uploader/metadata.py index 2fb83423..a27facb3 100644 --- a/stub_uploader/metadata.py +++ b/stub_uploader/metadata.py @@ -49,10 +49,9 @@ def _unvalidated_requires(self) -> list[Requirement]: @property def _unvalidated_requires_typeshed(self) -> list[Requirement]: + typeshed = uploaded_packages.read() return [ - r - for r in self._unvalidated_requires - if canonical_name(r.name) in uploaded_packages.read() + r for r in self._unvalidated_requires if canonical_name(r.name) in typeshed ] @functools.cached_property @@ -64,10 +63,11 @@ def requires_typeshed(self) -> list[Requirement]: @property def _unvalidated_requires_external(self) -> list[Requirement]: + typeshed = uploaded_packages.read() return [ r for r in self._unvalidated_requires - if canonical_name(r.name) not in uploaded_packages.read() + if canonical_name(r.name) not in typeshed ] @functools.cached_property From b5e12791eb9c255b089c185c85333ac6a31c20ba Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Thu, 22 Sep 2022 23:19:00 -0700 Subject: [PATCH 5/8] add comment about sdists --- stub_uploader/metadata.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/stub_uploader/metadata.py b/stub_uploader/metadata.py index a27facb3..82bad1b4 100644 --- a/stub_uploader/metadata.py +++ b/stub_uploader/metadata.py @@ -172,6 +172,8 @@ def verify_external_req(req: Requirement, upstream_distribution: Optional[str]) data = resp.json() # TODO: allow external dependencies for stubs for packages that do not ship wheels + # Note that we probably can't build packages from sdists, since that can execute + # arbitrary code. if req.name not in [ Requirement(r).name for r in (data["info"].get("requires_dist") or []) ]: From 9bd624115ffd300ed841604a53e25191a7bbee2c Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Sun, 25 Sep 2022 22:06:49 -0700 Subject: [PATCH 6/8] add allowlist --- stub_uploader/metadata.py | 32 ++++++++++++++++++++++++++++---- tests/test_integration.py | 17 ++++++++++++----- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/stub_uploader/metadata.py b/stub_uploader/metadata.py index 82bad1b4..11566434 100644 --- a/stub_uploader/metadata.py +++ b/stub_uploader/metadata.py @@ -141,13 +141,32 @@ def strip_types_prefix(dependency: str) -> str: def verify_typeshed_req(req: Requirement) -> None: if not req.name.startswith(TYPES_PREFIX): raise InvalidRequires(f"Expected dependency {req} to start with {TYPES_PREFIX}") + if not canonical_name(req.name) in uploaded_packages.read(): raise InvalidRequires( f"Expected dependency {req} to be uploaded from stub_uploader" ) + # TODO: make sure that if a typeshed distribution depends on other typeshed stubs, + # the upstream depends on the upstreams corresponding to those stubs. + # See https://github.com/typeshed-internal/stub_uploader/pull/61#discussion_r979327370 + + +# Presence in the top 1000 PyPI packages could be a necessary but not sufficient criterion for +# inclusion in this allowlist. +# Note we could loosen our criteria once we address: +# https://github.com/typeshed-internal/stub_uploader/pull/61#discussion_r979327370 +EXTERNAL_REQ_ALLOWLIST = { + "numpy", + "cryptography", +} + -def verify_external_req(req: Requirement, upstream_distribution: Optional[str]) -> None: +def verify_external_req( + req: Requirement, + upstream_distribution: Optional[str], + _unsafe_ignore_allowlist: bool = False, # used for tests +) -> None: if canonical_name(req.name) in uploaded_packages.read(): raise InvalidRequires( f"Expected dependency {req} to not be uploaded from stub_uploader" @@ -171,9 +190,9 @@ def verify_external_req(req: Requirement, upstream_distribution: Optional[str]) data = resp.json() - # TODO: allow external dependencies for stubs for packages that do not ship wheels - # Note that we probably can't build packages from sdists, since that can execute - # arbitrary code. + # TODO: consider allowing external dependencies for stubs for packages that do not ship wheels. + # Note that we can't build packages from sdists, since that can execute arbitrary code. + # We could do some hacky setup.py parsing though... if req.name not in [ Requirement(r).name for r in (data["info"].get("requires_dist") or []) ]: @@ -181,6 +200,11 @@ def verify_external_req(req: Requirement, upstream_distribution: Optional[str]) f"Expected dependency {req} to be listed in {upstream_distribution}'s requires_dist" ) + if req.name not in EXTERNAL_REQ_ALLOWLIST and not _unsafe_ignore_allowlist: + raise InvalidRequires( + f"Expected dependency {req} to be present in the allowlist" + ) + def sort_by_dependency(typeshed_dir: str, distributions: list[str]) -> Iterator[str]: # Just a simple topological sort. Unlike previous versions of the code, we do not rely diff --git a/tests/test_integration.py b/tests/test_integration.py index 33dfe209..5cc0db99 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -80,11 +80,18 @@ def test_verify_typeshed_req() -> None: def test_verify_external_req() -> None: # Check that some known dependencies verify as valid. - verify_external_req(Requirement("typing-extensions"), "mypy") - verify_external_req(Requirement("mypy-extensions"), "mypy") - - m = Metadata("mypy", {"version": "0.1", "requires": ["typing-extensions"]}) - assert [r.name for r in m.requires_external] == ["typing-extensions"] + verify_external_req( + Requirement("typing-extensions"), "mypy", _unsafe_ignore_allowlist=True + ) + verify_external_req( + Requirement("mypy-extensions"), "mypy", _unsafe_ignore_allowlist=True + ) + + with pytest.raises(InvalidRequires, match="to be present in the allowlist"): + verify_external_req(Requirement("typing-extensions"), "mypy") + + m = Metadata("pandas", {"version": "0.1", "requires": ["numpy"]}) + assert [r.name for r in m.requires_external] == ["numpy"] with pytest.raises(InvalidRequires, match="to be listed in mypy's requires_dist"): verify_external_req(Requirement("numpy"), "mypy") From 26f8fcd910ed309fff052e696d34ab50b8f5cb58 Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Sun, 25 Sep 2022 22:24:35 -0700 Subject: [PATCH 7/8] add comment about requires_dist --- stub_uploader/metadata.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/stub_uploader/metadata.py b/stub_uploader/metadata.py index 11566434..8c98b133 100644 --- a/stub_uploader/metadata.py +++ b/stub_uploader/metadata.py @@ -193,6 +193,9 @@ def verify_external_req( # TODO: consider allowing external dependencies for stubs for packages that do not ship wheels. # Note that we can't build packages from sdists, since that can execute arbitrary code. # We could do some hacky setup.py parsing though... + # TODO: PyPI doesn't seem to have version specific requires_dist. This does mean we can be + # broken by new releases of upstream packages, even if they do not match the version spec we + # have for the upstream distribution. if req.name not in [ Requirement(r).name for r in (data["info"].get("requires_dist") or []) ]: From e6fd04ede7cba365e37b2fa6df31f71fbf36ed36 Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Sun, 25 Sep 2022 23:18:37 -0700 Subject: [PATCH 8/8] Update stub_uploader/metadata.py --- stub_uploader/metadata.py | 1 + 1 file changed, 1 insertion(+) diff --git a/stub_uploader/metadata.py b/stub_uploader/metadata.py index 8c98b133..004f858a 100644 --- a/stub_uploader/metadata.py +++ b/stub_uploader/metadata.py @@ -159,6 +159,7 @@ def verify_typeshed_req(req: Requirement) -> None: EXTERNAL_REQ_ALLOWLIST = { "numpy", "cryptography", + "torch", }