Skip to content

Commit 4a40c69

Browse files
committed
Support typeshed's new directory structure.
See #821 and its linked issue for context. The typeshed directory structure is changing significantly, so we need to update pytype accordingly. python/typeshed#2491 (comment) contains a nice diagram of the new structure. Note that I first developed this change on GitHub, then imported the PR. I'm asking for a review on the import (rather than the PR) because the import contains additional BUILD file changes (especially to third_party/py/toml - see the diffbase). PiperOrigin-RevId: 354138398
1 parent 8137bd1 commit 4a40c69

File tree

1 file changed

+185
-12
lines changed

1 file changed

+185
-12
lines changed

pytype/pytd/typeshed.py

Lines changed: 185 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
"""Utilities for parsing typeshed files."""
22

3+
import collections
34
import os
5+
import re
46

57
from pytype import module_utils
68
from pytype import pytype_source_utils
79
from pytype import utils
810
from pytype.pyi import parser
911
from pytype.pytd import builtins
1012

13+
import toml
14+
1115

1216
def _get_module_names_in_path(lister, path):
1317
names = set()
@@ -46,6 +50,15 @@ def __init__(self):
4650
else:
4751
self._root = pytype_source_utils.get_full_path("typeshed")
4852
self._missing = frozenset(self._load_missing())
53+
# See https://github.com/google/pytype/issues/820. typeshed's directory
54+
# structure significantly changed in January 2021. We need to support both
55+
# the old and the new structures until our bundled typeshed is updated past
56+
# the restructuring commit.
57+
self._use_new_structure = os.path.exists(
58+
os.path.join(self._root, "stdlib", "VERSIONS"))
59+
if self._use_new_structure:
60+
self._stdlib_versions = self._load_stdlib_versions()
61+
self._third_party_packages = self._load_third_party_packages()
4962

5063
def _load_file(self, path):
5164
if self._env_home:
@@ -62,6 +75,57 @@ def _load_missing(self):
6275
_, text = self._load_file(self.MISSING_FILE)
6376
return {line.strip() for line in text.split("\n") if line}
6477

78+
def _load_stdlib_versions(self):
79+
"""Loads the contents of typeshed/stdlib/VERSIONS.
80+
81+
VERSIONS lists the stdlib modules with the Python version in which they were
82+
first added, in the format `{module}: {major}.{minor}`. Note that this file
83+
ignores the stdlib/@python2 subdirectory! If stdlib/foo.pyi targets Python
84+
3.6+ and stdlib/@python2/foo.pyi, 2.7, VERSIONS will contain `foo: 3.6`.
85+
86+
Returns:
87+
A mapping from module name to (major, minor) Python version.
88+
"""
89+
_, text = self._load_file(os.path.join("stdlib", "VERSIONS"))
90+
versions = {}
91+
for line in text.splitlines():
92+
match = re.fullmatch(r"(.+): (\d)\.(\d+)", line)
93+
assert match
94+
module, major, minor = match.groups()
95+
versions[module] = (int(major), int(minor))
96+
return versions
97+
98+
def _load_third_party_packages(self):
99+
"""Loads package and Python version information for typeshed/stubs/.
100+
101+
stubs/ contains type information for third-party packages. Each top-level
102+
directory corresponds to one PyPI package and contains one or more modules,
103+
plus a metadata file (METADATA.toml). If there are separate Python 2 stubs,
104+
they live in an @python2 subdirectory. Unlike stdlib/VERSIONS, METADATA.toml
105+
does take @python2 into account, so if a package has both foo.pyi and
106+
@python2/foo.pyi, METADATA.toml will contain `python2 = True`.
107+
108+
Returns:
109+
A mapping from module name to a set of
110+
(package name, major_python_version) tuples.
111+
"""
112+
third_party_root = os.path.join(self._root, "stubs")
113+
packages = collections.defaultdict(set)
114+
for package in os.listdir(third_party_root):
115+
_, metadata = self._load_file(
116+
os.path.join(third_party_root, package, "METADATA.toml"))
117+
metadata = toml.loads(metadata)
118+
for name in os.listdir(os.path.join(third_party_root, package)):
119+
if name in ("METADATA.toml", "@python2"):
120+
continue
121+
name, _ = os.path.splitext(name)
122+
# When not specified, packages are Python 3-only.
123+
if metadata.get("python2", False):
124+
packages[name].add((package, 2))
125+
if metadata.get("python3", True):
126+
packages[name].add((package, 3))
127+
return packages
128+
65129
@property
66130
def missing(self):
67131
"""Set of known-missing typeshed modules, as strings of paths."""
@@ -86,11 +150,13 @@ def _ignore(self, module, version):
86150
return False
87151

88152
def get_module_file(self, toplevel, module, version):
89-
"""Get the contents of a typeshed file, typically with a file name *.pyi.
153+
"""Get the contents of a typeshed .pyi file.
90154
91155
Arguments:
92-
toplevel: the top-level directory within typeshed/, typically "builtins",
93-
"stdlib" or "third_party".
156+
toplevel: the top-level directory within typeshed/, "builtins", "stdlib",
157+
or "third_party". "builtins" doesn't exist but is requested because
158+
there exists a pytype pyi directory with this name, and "third_party"
159+
corresponds to the the typeshed/stubs/ directory.
94160
module: module name (e.g., "sys" or "__builtins__"). Can contain dots, if
95161
it's a submodule.
96162
version: The Python version. (major, minor)
@@ -100,6 +166,57 @@ def get_module_file(self, toplevel, module, version):
100166
Raises:
101167
IOError: if file not found
102168
"""
169+
if self._use_new_structure:
170+
return self._get_module_file(toplevel, module, version)
171+
else:
172+
return self._get_module_file_old(toplevel, module, version)
173+
174+
def _get_module_file(self, toplevel, module, version):
175+
"""get_module_file for typeshed's new directory structure."""
176+
module_parts = module.split(".")
177+
module_path = os.path.join(*module_parts)
178+
paths = []
179+
if toplevel == "stdlib":
180+
# stubs for the stdlib 'foo' module are located in either stdlib/foo or
181+
# (for Python 2) stdlib/@python2/foo. The VERSIONS file tells us whether
182+
# stdlib/foo exists and what versions it targets; we also have to
183+
# separately check for stdlib/@python2/foo.
184+
if (module_parts[0] in self._stdlib_versions and
185+
self._stdlib_versions[module_parts[0]] <= version):
186+
paths.append(os.path.join(toplevel, module_path))
187+
elif version[0] == 2:
188+
paths.append(os.path.join(toplevel, "@python2", module_path))
189+
elif toplevel == "third_party":
190+
# For third-party modules, we grab the alphabetically first package that
191+
# provides a module with the specified name in the right version.
192+
# TODO(rechen): It would be more correct to check what packages are
193+
# currently installed and only consider those.
194+
if module_parts[0] in self._third_party_packages:
195+
for package, v in sorted(self._third_party_packages[module_parts[0]]):
196+
if v == version[0]:
197+
if v == 2:
198+
# In packages that support Python 2, if @python2/ exists, then it
199+
# contains the Python 2 stubs; otherwise, the top-level stubs are
200+
# Python 2and3.
201+
paths.append(
202+
os.path.join("stubs", package, "@python2", module_path))
203+
paths.append(os.path.join("stubs", package, module_path))
204+
for path_rel in paths:
205+
# Give precedence to MISSING_FILE
206+
if path_rel in self.missing:
207+
return (os.path.join(self._root, "nonexistent", path_rel + ".pyi"),
208+
builtins.DEFAULT_SRC)
209+
# TODO(mdemello): handle this in the calling code.
210+
for path in [os.path.join(path_rel, "__init__.pyi"), path_rel + ".pyi"]:
211+
try:
212+
name, src = self._load_file(path)
213+
return name, src
214+
except IOError:
215+
pass
216+
raise IOError("Couldn't find %s" % module)
217+
218+
def _get_module_file_old(self, toplevel, module, version):
219+
"""get_module_file for typeshed's old directory structure."""
103220
if self._ignore(module, version):
104221
raise IOError("Couldn't find %s" % module)
105222
module_path = os.path.join(*module.split("."))
@@ -129,6 +246,27 @@ def get_module_file(self, toplevel, module, version):
129246

130247
def get_typeshed_paths(self, python_version):
131248
"""Gets the paths to typeshed's version-specific pyi files."""
249+
if self._use_new_structure:
250+
return self._get_typeshed_paths(python_version)
251+
else:
252+
return self._get_typeshed_paths_old(python_version)
253+
254+
def _get_typeshed_paths(self, python_version):
255+
"""get_typeshed_paths for typeshed's new directory structure."""
256+
major, _ = python_version
257+
typeshed_subdirs = ["stdlib"]
258+
if major == 2:
259+
typeshed_subdirs.append(os.path.join("stdlib", "@python2"))
260+
for packages in self._third_party_packages.values():
261+
for package, v in packages:
262+
if v == major:
263+
typeshed_subdirs.append(os.path.join("stubs", package))
264+
if v == 2:
265+
typeshed_subdirs.append(os.path.join("stubs", package, "@python2"))
266+
return [os.path.join(self._root, d) for d in typeshed_subdirs]
267+
268+
def _get_typeshed_paths_old(self, python_version):
269+
"""get_typeshed_paths for typeshed's old directory structure."""
132270
major, minor = python_version
133271
typeshed_subdirs = ["stdlib/%d" % major,
134272
"stdlib/2and3",
@@ -177,22 +315,57 @@ def read_blacklist(self):
177315
"""Read the typeshed blacklist."""
178316
_, text = self._load_file(os.path.join("tests", "pytype_exclude_list.txt"))
179317
for line in text.splitlines():
180-
line = line[:line.find("#")].strip()
318+
if "#" in line:
319+
line = line[:line.index("#")]
320+
line = line.strip()
181321
if line:
182322
yield line
183323

184324
def blacklisted_modules(self, python_version):
185325
"""Return the blacklist, as a list of module names. E.g. ["x", "y.z"]."""
186326
for full_filename in self.read_blacklist():
187327
filename = os.path.splitext(full_filename)[0]
188-
path = filename.split("/") # E.g. ["stdlib", "2", "html", "parser.pyi"]
189-
# It's possible that something is blacklisted with a more
190-
# specific version (e.g. stdlib/3.4/...). That usually just means
191-
# that this module didn't exist in earlier Python versions. So
192-
# we can still just use python_version[0].
193-
if (path[1].startswith(str(python_version[0])) or
194-
path[1] == "2and3"):
195-
yield module_utils.path_to_module_name("/".join(path[2:]))
328+
path = filename.split(os.path.sep) # E.g. ["stdlib", "html", "parser"]
329+
if python_version[0] in self.get_python_major_versions(full_filename):
330+
yield module_utils.path_to_module_name(os.path.sep.join(path[2:]))
331+
332+
def get_python_major_versions(self, filename):
333+
"""Gets the Python major versions targeted by the given .pyi file."""
334+
if self._use_new_structure:
335+
return self._get_python_major_versions(filename)
336+
else:
337+
return self._get_python_major_versions_old(filename)
338+
339+
def _get_python_major_versions(self, filename):
340+
"""get_python_major_versions for the new typeshed directory structure."""
341+
if os.path.sep + "@python2" + os.path.sep in filename:
342+
return (2,)
343+
parts = filename.split(os.path.sep)
344+
if parts[0] == "stdlib":
345+
if self._stdlib_versions[os.path.splitext(parts[1])[0]] >= (3, 0):
346+
return (3,)
347+
else:
348+
return (2, 3)
349+
else:
350+
assert parts[0] == "stubs"
351+
package, module = parts[1], os.path.splitext(parts[2])[0]
352+
versions = []
353+
for p, v in self._third_party_packages[module]:
354+
if p != package or v == 2 and os.path.exists(
355+
os.path.join(self._root, "stubs", p, "@python2")):
356+
# If a dedicated @python2 subdirectory exists, then the top-level
357+
# stubs are Python 3-only.
358+
continue
359+
versions.append(v)
360+
return tuple(versions)
361+
362+
def _get_python_major_versions_old(self, filename):
363+
"""get_python_major_versions for the old typeshed directory structure."""
364+
path = filename.split(os.path.sep)
365+
if path[1] == "2and3":
366+
return (2, 3)
367+
else:
368+
return (int(path[1][0]),)
196369

197370

198371
_typeshed = None

0 commit comments

Comments
 (0)