wip

czgdp1807 · czgdp1807 · commit dc196bcc3b9b · 2022-09-29T15:29:54.000+05:30
diff --git a/integration_tests/CMakeLists.txt b/integration_tests/CMakeLists.txt
@@ -275,7 +275,7 @@ RUN(NAME generics_list_01    LABELS cpython llvm)
 RUN(NAME test_statistics     LABELS cpython llvm)
 RUN(NAME test_str_attributes LABELS cpython llvm)
 RUN(NAME kwargs_01           LABELS cpython llvm)
-RUN(NAME test_01_goto        LABELS llvm c)
+RUN(NAME test_01_goto        LABELS cpython llvm c)
 
 RUN(NAME func_inline_01 LABELS llvm wasm)
 RUN(NAME func_static_01 LABELS cpython llvm c wasm)
diff --git a/integration_tests/test_01_goto.py b/integration_tests/test_01_goto.py
@@ -1,4 +1,4 @@
-from ltypes import with_goto, goto, label, i32
+from ltypes import with_goto, i32
 
 @with_goto
 def f() -> i32:
diff --git a/src/runtime/ltypes/goto.py b/src/runtime/ltypes/goto.py
@@ -0,0 +1,235 @@
+import dis
+import struct
+import array
+import types
+import functools
+
+
+try:
+    _array_to_bytes = array.array.tobytes
+except AttributeError:
+    _array_to_bytes = array.array.tostring
+
+
+class _Bytecode:
+    def __init__(self):
+        code = (lambda: x if x else y).__code__.co_code
+        opcode, oparg = struct.unpack_from('BB', code, 2)
+
+        # Starting with Python 3.6, the bytecode format has changed, using
+        # 16-bit words (8-bit opcode + 8-bit argument) for each instruction,
+        # as opposed to previously 24 bit (8-bit opcode + 16-bit argument)
+        # for instructions that expect an argument and otherwise 8 bit.
+        # https://bugs.python.org/issue26647
+        if dis.opname[opcode] == 'POP_JUMP_IF_FALSE':
+            self.argument = struct.Struct('B')
+            self.have_argument = 0
+            # As of Python 3.6, jump targets are still addressed by their
+            # byte unit. This is matter to change, so that jump targets,
+            # in the future might refer to code units (address in bytes / 2).
+            # https://bugs.python.org/issue26647
+            self.jump_unit = 8 // oparg
+        else:
+            self.argument = struct.Struct('<H')
+            self.have_argument = dis.HAVE_ARGUMENT
+            self.jump_unit = 1
+
+    @property
+    def argument_bits(self):
+        return self.argument.size * 8
+
+
+_BYTECODE = _Bytecode()
+
+
+def _make_code(code, codestring):
+    return code.replace(co_code=codestring)
+
+
+def _parse_instructions(code):
+    extended_arg = 0
+    extended_arg_offset = None
+    pos = 0
+
+    while pos < len(code):
+        offset = pos
+        if extended_arg_offset is not None:
+            offset = extended_arg_offset
+
+        opcode = struct.unpack_from('B', code, pos)[0]
+        pos += 1
+
+        oparg = None
+        if opcode >= _BYTECODE.have_argument:
+            oparg = extended_arg | _BYTECODE.argument.unpack_from(code, pos)[0]
+            pos += _BYTECODE.argument.size
+
+            if opcode == dis.EXTENDED_ARG:
+                extended_arg = oparg << _BYTECODE.argument_bits
+                extended_arg_offset = offset
+                continue
+
+        extended_arg = 0
+        extended_arg_offset = None
+        yield (dis.opname[opcode], oparg, offset)
+
+
+def _get_instruction_size(opname, oparg=0):
+    size = 1
+
+    extended_arg = oparg >> _BYTECODE.argument_bits
+    if extended_arg != 0:
+        size += _get_instruction_size('EXTENDED_ARG', extended_arg)
+        oparg &= (1 << _BYTECODE.argument_bits) - 1
+
+    opcode = dis.opmap[opname]
+    if opcode >= _BYTECODE.have_argument:
+        size += _BYTECODE.argument.size
+
+    return size
+
+
+def _get_instructions_size(ops):
+    size = 0
+    for op in ops:
+        if isinstance(op, str):
+            size += _get_instruction_size(op)
+        else:
+            size += _get_instruction_size(*op)
+    return size
+
+
+def _write_instruction(buf, pos, opname, oparg=0):
+    extended_arg = oparg >> _BYTECODE.argument_bits
+    if extended_arg != 0:
+        pos = _write_instruction(buf, pos, 'EXTENDED_ARG', extended_arg)
+        oparg &= (1 << _BYTECODE.argument_bits) - 1
+
+    opcode = dis.opmap[opname]
+    buf[pos] = opcode
+    pos += 1
+
+    if opcode >= _BYTECODE.have_argument:
+        _BYTECODE.argument.pack_into(buf, pos, oparg)
+        pos += _BYTECODE.argument.size
+
+    return pos
+
+
+def _write_instructions(buf, pos, ops):
+    for op in ops:
+        if isinstance(op, str):
+            pos = _write_instruction(buf, pos, op)
+        else:
+            pos = _write_instruction(buf, pos, *op)
+    return pos
+
+
+def _find_labels_and_gotos(code):
+    labels = {}
+    gotos = []
+
+    block_stack = []
+    block_counter = 0
+
+    opname1 = oparg1 = offset1 = None
+    opname2 = oparg2 = offset2 = None
+    opname3 = oparg3 = offset3 = None
+
+    for opname4, oparg4, offset4 in _parse_instructions(code.co_code):
+        if opname1 in ('LOAD_GLOBAL', 'LOAD_NAME'):
+            if opname2 == 'LOAD_ATTR' and opname3 == 'POP_TOP':
+                name = code.co_names[oparg1]
+                if name == 'label':
+                    if oparg2 in labels:
+                        raise SyntaxError('Ambiguous label {0!r}'.format(
+                            code.co_names[oparg2]
+                        ))
+                    labels[oparg2] = (offset1,
+                                      offset4,
+                                      tuple(block_stack))
+                elif name == 'goto':
+                    gotos.append((offset1,
+                                  offset4,
+                                  oparg2,
+                                  tuple(block_stack)))
+        elif opname1 in ('SETUP_LOOP',
+                         'SETUP_EXCEPT', 'SETUP_FINALLY',
+                         'SETUP_WITH', 'SETUP_ASYNC_WITH'):
+            block_counter += 1
+            block_stack.append(block_counter)
+        elif opname1 == 'POP_BLOCK' and block_stack:
+            block_stack.pop()
+
+        opname1, oparg1, offset1 = opname2, oparg2, offset2
+        opname2, oparg2, offset2 = opname3, oparg3, offset3
+        opname3, oparg3, offset3 = opname4, oparg4, offset4
+
+    return labels, gotos
+
+
+def _inject_nop_sled(buf, pos, end):
+    while pos < end:
+        pos = _write_instruction(buf, pos, 'NOP')
+
+
+def _patch_code(code):
+    labels, gotos = _find_labels_and_gotos(code)
+    buf = array.array('B', code.co_code)
+
+    for pos, end, _ in labels.values():
+        _inject_nop_sled(buf, pos, end)
+
+    for pos, end, label, origin_stack in gotos:
+        try:
+            _, target, target_stack = labels[label]
+        except KeyError:
+            raise SyntaxError('Unknown label {0!r}'.format(
+                code.co_names[label]
+            ))
+
+        target_depth = len(target_stack)
+        if origin_stack[:target_depth] != target_stack:
+            raise SyntaxError('Jump into different block')
+
+        ops = []
+        for i in range(len(origin_stack) - target_depth):
+            ops.append('POP_BLOCK')
+        ops.append(('JUMP_ABSOLUTE', target // _BYTECODE.jump_unit))
+
+        if pos + _get_instructions_size(ops) > end:
+            # not enough space, add code at buffer end and jump there
+            buf_end = len(buf)
+
+            go_to_end_ops = [('JUMP_ABSOLUTE', buf_end // _BYTECODE.jump_unit)]
+
+            if pos + _get_instructions_size(go_to_end_ops) > end:
+                # not sure if reachable
+                raise SyntaxError('Goto in an incredibly huge function')
+
+            pos = _write_instructions(buf, pos, go_to_end_ops)
+            _inject_nop_sled(buf, pos, end)
+
+            buf.extend([0] * _get_instructions_size(ops))
+            _write_instructions(buf, buf_end, ops)
+        else:
+            pos = _write_instructions(buf, pos, ops)
+            _inject_nop_sled(buf, pos, end)
+
+    return _make_code(code, _array_to_bytes(buf))
+
+
+def with_goto(func_or_code):
+    if isinstance(func_or_code, types.CodeType):
+        return _patch_code(func_or_code)
+
+    return functools.update_wrapper(
+        types.FunctionType(
+            _patch_code(func_or_code.__code__),
+            func_or_code.__globals__,
+            func_or_code.__name__,
+            func_or_code.__defaults__,
+            func_or_code.__closure__,
+        ),
+        func_or_code
+    )
diff --git a/src/runtime/ltypes/ltypes.py b/src/runtime/ltypes/ltypes.py
@@ -3,11 +3,12 @@
 import ctypes
 import platform
 from dataclasses import dataclass
+from goto import with_goto
 
 # TODO: this does not seem to restrict other imports
 __slots__ = ["i8", "i16", "i32", "i64", "f32", "f64", "c32", "c64", "CPtr",
         "overload", "ccall", "TypeVar", "pointer", "c_p_pointer", "Pointer",
-        "p_c_pointer", "vectorize", "inline", "Union", "static"]
+        "p_c_pointer", "vectorize", "inline", "Union", "static", "with_goto"]
 
 # data-types
 

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-from ltypes import with_goto, goto, label, i32`
	`1`	`+from ltypes import with_goto, i32`
`2`	`2`
`3`	`3`	`@with_goto`
`4`	`4`	`def f() -> i32:`