From c065b07c4b8b5db664f18e63c7496794d0decc55 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 8 Sep 2021 23:18:51 +0100 Subject: [PATCH 1/5] bpo-45017: move opcode-related logic from modulefinder to dis --- Doc/library/dis.rst | 18 ++++++++++++++++++ Lib/dis.py | 35 +++++++++++++++++++++++++++++++++++ Lib/modulefinder.py | 33 +++++++-------------------------- Lib/test/test_dis.py | 31 +++++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 26 deletions(-) diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 21747069b3a195..f035949960f54d 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -248,6 +248,24 @@ operation is being performed, so the intermediate analysis object isn't useful: return a list of these offsets. +.. function:: find_imports(code) + + Find import statements in *code* + + Generate triplets (`name`, `level`, `fromlist`) where + `name` is the imported module and `level`, `fromlist` are + the corresponding args to `__import__`. + + .. versionadded:: 3.11 + + +.. function:: find_store_names(code) + + Generate a sequence of names of variables which are written in *code*. + + .. versionadded:: 3.11 + + .. function:: stack_effect(opcode, oparg=None, *, jump=None) Compute the stack effect of *opcode* with argument *oparg*. diff --git a/Lib/dis.py b/Lib/dis.py index 66487dce0eefc0..abed33c5b46a9a 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -535,6 +535,41 @@ def findlinestarts(code): yield start, line return +def find_imports(co): + """Find import statements in the code + + Generate triplets (name, level, fromlist) where + name is the imported module and level, fromlist are + the corresponding args to __import__. + """ + IMPORT_NAME = opmap['IMPORT_NAME'] + LOAD_CONST = opmap['LOAD_CONST'] + + consts = co.co_consts + names = co.co_names + opargs = [(op, arg) for _, op, arg in _unpack_opargs(co.co_code) + if op != EXTENDED_ARG] + for i, (op, oparg) in enumerate(opargs): + if (op == IMPORT_NAME and i >= 2 + and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST): + level = consts[opargs[i-2][1]] + fromlist = consts[opargs[i-1][1]] + yield (names[oparg], level, fromlist) + +def find_store_names(co): + """Find names of variables which are written in the code + + Generate sequence of strings + """ + STORE_OPS = ( + opmap['STORE_NAME'], + opmap['STORE_GLOBAL']) + + names = co.co_names + for _, op, arg in _unpack_opargs(co.co_code): + if op in STORE_OPS: + yield names[arg] + class Bytecode: """The bytecode operations of a piece of code diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py index cb455f40c4d789..02e4717e9ce377 100644 --- a/Lib/modulefinder.py +++ b/Lib/modulefinder.py @@ -8,14 +8,6 @@ import io import sys - -LOAD_CONST = dis.opmap['LOAD_CONST'] -IMPORT_NAME = dis.opmap['IMPORT_NAME'] -STORE_NAME = dis.opmap['STORE_NAME'] -STORE_GLOBAL = dis.opmap['STORE_GLOBAL'] -STORE_OPS = STORE_NAME, STORE_GLOBAL -EXTENDED_ARG = dis.EXTENDED_ARG - # Old imp constants: _SEARCH_ERROR = 0 @@ -394,24 +386,13 @@ def _safe_import_hook(self, name, caller, fromlist, level=-1): def scan_opcodes(self, co): # Scan the code, and yield 'interesting' opcode combinations - code = co.co_code - names = co.co_names - consts = co.co_consts - opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code) - if op != EXTENDED_ARG] - for i, (op, oparg) in enumerate(opargs): - if op in STORE_OPS: - yield "store", (names[oparg],) - continue - if (op == IMPORT_NAME and i >= 2 - and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST): - level = consts[opargs[i-2][1]] - fromlist = consts[opargs[i-1][1]] - if level == 0: # absolute import - yield "absolute_import", (fromlist, names[oparg]) - else: # relative import - yield "relative_import", (level, fromlist, names[oparg]) - continue + for name in dis.find_store_names(co): + yield "store", (name,) + for name, level, fromlist in dis.find_imports(co): + if level == 0: # absolute import + yield "absolute_import", (fromlist, name) + else: # relative import + yield "relative_import", (level, fromlist, name) def scan_code(self, co, m): code = co.co_code diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index b97e41cdfab5ec..cf24fb289faf3f 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -1326,5 +1326,36 @@ def test_assert_not_in_with_arg_in_bytecode(self): with self.assertRaises(AssertionError): self.assertNotInBytecode(code, "LOAD_CONST", 1) +class TestFinderMethods(unittest.TestCase): + def test_find_imports(self): + cases = [ + ("import a.b.c", ('a.b.c', 0, None)), + ("from a.b import c", ('a.b', 0, ('c',))), + ("from a.b import c as d", ('a.b', 0, ('c',))), + ("from ...a.b import c as d", ('a.b', 3, ('c',))), + ("from ..a.b import c as d, e as f", ('a.b', 2, ('c', 'e'))) + ] + for src, expected in cases: + with self.subTest(src=src): + code = compile(src, "", "exec") + res = tuple(dis.find_imports(code)) + self.assertEqual(len(res), 1) + self.assertEqual(res[0], expected) + + def test_find_store_names(self): + cases = [ + ("x+y", ()), + ("x=y=1", ('x', 'y')), + ("x+=y", ('x',)), + ("global x\nx=y=1", ('x', 'y')), + ("global x\nz=x", ('z',)), + ] + for src, expected in cases: + with self.subTest(src=src): + code = compile(src, "", "exec") + res = tuple(dis.find_store_names(code)) + self.assertEqual(res, expected) + + if __name__ == "__main__": unittest.main() From 946adb9d065f92d2d8d4801f7c518d030af7b36f Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Wed, 8 Sep 2021 22:24:16 +0000 Subject: [PATCH 2/5] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2021-09-08-22-24-15.bpo-45017.q0t8nk.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2021-09-08-22-24-15.bpo-45017.q0t8nk.rst diff --git a/Misc/NEWS.d/next/Library/2021-09-08-22-24-15.bpo-45017.q0t8nk.rst b/Misc/NEWS.d/next/Library/2021-09-08-22-24-15.bpo-45017.q0t8nk.rst new file mode 100644 index 00000000000000..64cfa7e5f6dded --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-09-08-22-24-15.bpo-45017.q0t8nk.rst @@ -0,0 +1,3 @@ +Add two functions to the :mod:`dis` module. :func:`find_imports` generates information about import statements in a code object and :func:`find_store_name` generates a sequence of names that the code object writes. + +Refactored :mod:`modulefinder` to use these functions instead of scanning opcodes itself. \ No newline at end of file From 62afef8052cb00730a7e946e8e0109282dda144e Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Thu, 9 Sep 2021 00:03:13 +0100 Subject: [PATCH 3/5] tuple to set --- Lib/dis.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/dis.py b/Lib/dis.py index abed33c5b46a9a..5b96e55763e16a 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -561,9 +561,10 @@ def find_store_names(co): Generate sequence of strings """ - STORE_OPS = ( + STORE_OPS = { opmap['STORE_NAME'], - opmap['STORE_GLOBAL']) + opmap['STORE_GLOBAL'] + } names = co.co_names for _, op, arg in _unpack_opargs(co.co_code): From 151c27a33ee92785faa8582c51afc78053aa9a48 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Thu, 9 Sep 2021 00:03:43 +0100 Subject: [PATCH 4/5] added test for star imports --- Lib/test/test_dis.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index cf24fb289faf3f..7e293e63bd1bb6 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -1332,8 +1332,10 @@ def test_find_imports(self): ("import a.b.c", ('a.b.c', 0, None)), ("from a.b import c", ('a.b', 0, ('c',))), ("from a.b import c as d", ('a.b', 0, ('c',))), + ("from a.b import *", ('a.b', 0, ('*',))), ("from ...a.b import c as d", ('a.b', 3, ('c',))), - ("from ..a.b import c as d, e as f", ('a.b', 2, ('c', 'e'))) + ("from ..a.b import c as d, e as f", ('a.b', 2, ('c', 'e'))), + ("from ..a.b import *", ('a.b', 2, ('*',))), ] for src, expected in cases: with self.subTest(src=src): From 117f7d50e72b4d35aea89a3d35db48ca8b794b5e Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Thu, 9 Sep 2021 12:43:12 +0100 Subject: [PATCH 5/5] make the two new dis functions private --- Doc/library/dis.rst | 18 ------------------ Lib/dis.py | 4 ++-- Lib/modulefinder.py | 4 ++-- Lib/test/test_dis.py | 8 ++++---- .../2021-09-08-22-24-15.bpo-45017.q0t8nk.rst | 3 --- 5 files changed, 8 insertions(+), 29 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2021-09-08-22-24-15.bpo-45017.q0t8nk.rst diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index f035949960f54d..21747069b3a195 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -248,24 +248,6 @@ operation is being performed, so the intermediate analysis object isn't useful: return a list of these offsets. -.. function:: find_imports(code) - - Find import statements in *code* - - Generate triplets (`name`, `level`, `fromlist`) where - `name` is the imported module and `level`, `fromlist` are - the corresponding args to `__import__`. - - .. versionadded:: 3.11 - - -.. function:: find_store_names(code) - - Generate a sequence of names of variables which are written in *code*. - - .. versionadded:: 3.11 - - .. function:: stack_effect(opcode, oparg=None, *, jump=None) Compute the stack effect of *opcode* with argument *oparg*. diff --git a/Lib/dis.py b/Lib/dis.py index 5b96e55763e16a..a073572e59e66d 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -535,7 +535,7 @@ def findlinestarts(code): yield start, line return -def find_imports(co): +def _find_imports(co): """Find import statements in the code Generate triplets (name, level, fromlist) where @@ -556,7 +556,7 @@ def find_imports(co): fromlist = consts[opargs[i-1][1]] yield (names[oparg], level, fromlist) -def find_store_names(co): +def _find_store_names(co): """Find names of variables which are written in the code Generate sequence of strings diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py index 02e4717e9ce377..a0a020f9eeb9b4 100644 --- a/Lib/modulefinder.py +++ b/Lib/modulefinder.py @@ -386,9 +386,9 @@ def _safe_import_hook(self, name, caller, fromlist, level=-1): def scan_opcodes(self, co): # Scan the code, and yield 'interesting' opcode combinations - for name in dis.find_store_names(co): + for name in dis._find_store_names(co): yield "store", (name,) - for name, level, fromlist in dis.find_imports(co): + for name, level, fromlist in dis._find_imports(co): if level == 0: # absolute import yield "absolute_import", (fromlist, name) else: # relative import diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 7e293e63bd1bb6..a140a89f0e7e8d 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -1327,7 +1327,7 @@ def test_assert_not_in_with_arg_in_bytecode(self): self.assertNotInBytecode(code, "LOAD_CONST", 1) class TestFinderMethods(unittest.TestCase): - def test_find_imports(self): + def test__find_imports(self): cases = [ ("import a.b.c", ('a.b.c', 0, None)), ("from a.b import c", ('a.b', 0, ('c',))), @@ -1340,11 +1340,11 @@ def test_find_imports(self): for src, expected in cases: with self.subTest(src=src): code = compile(src, "", "exec") - res = tuple(dis.find_imports(code)) + res = tuple(dis._find_imports(code)) self.assertEqual(len(res), 1) self.assertEqual(res[0], expected) - def test_find_store_names(self): + def test__find_store_names(self): cases = [ ("x+y", ()), ("x=y=1", ('x', 'y')), @@ -1355,7 +1355,7 @@ def test_find_store_names(self): for src, expected in cases: with self.subTest(src=src): code = compile(src, "", "exec") - res = tuple(dis.find_store_names(code)) + res = tuple(dis._find_store_names(code)) self.assertEqual(res, expected) diff --git a/Misc/NEWS.d/next/Library/2021-09-08-22-24-15.bpo-45017.q0t8nk.rst b/Misc/NEWS.d/next/Library/2021-09-08-22-24-15.bpo-45017.q0t8nk.rst deleted file mode 100644 index 64cfa7e5f6dded..00000000000000 --- a/Misc/NEWS.d/next/Library/2021-09-08-22-24-15.bpo-45017.q0t8nk.rst +++ /dev/null @@ -1,3 +0,0 @@ -Add two functions to the :mod:`dis` module. :func:`find_imports` generates information about import statements in a code object and :func:`find_store_name` generates a sequence of names that the code object writes. - -Refactored :mod:`modulefinder` to use these functions instead of scanning opcodes itself. \ No newline at end of file