Skip to content

Commit dc196bc

Browse files
committed
wip
1 parent 6e9b277 commit dc196bc

File tree

4 files changed

+239
-3
lines changed

4 files changed

+239
-3
lines changed

integration_tests/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ RUN(NAME generics_list_01 LABELS cpython llvm)
275275
RUN(NAME test_statistics LABELS cpython llvm)
276276
RUN(NAME test_str_attributes LABELS cpython llvm)
277277
RUN(NAME kwargs_01 LABELS cpython llvm)
278-
RUN(NAME test_01_goto LABELS llvm c)
278+
RUN(NAME test_01_goto LABELS cpython llvm c)
279279

280280
RUN(NAME func_inline_01 LABELS llvm wasm)
281281
RUN(NAME func_static_01 LABELS cpython llvm c wasm)

integration_tests/test_01_goto.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from ltypes import with_goto, goto, label, i32
1+
from ltypes import with_goto, i32
22

33
@with_goto
44
def f() -> i32:

src/runtime/ltypes/goto.py

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
import dis
2+
import struct
3+
import array
4+
import types
5+
import functools
6+
7+
8+
try:
9+
_array_to_bytes = array.array.tobytes
10+
except AttributeError:
11+
_array_to_bytes = array.array.tostring
12+
13+
14+
class _Bytecode:
15+
def __init__(self):
16+
code = (lambda: x if x else y).__code__.co_code
17+
opcode, oparg = struct.unpack_from('BB', code, 2)
18+
19+
# Starting with Python 3.6, the bytecode format has changed, using
20+
# 16-bit words (8-bit opcode + 8-bit argument) for each instruction,
21+
# as opposed to previously 24 bit (8-bit opcode + 16-bit argument)
22+
# for instructions that expect an argument and otherwise 8 bit.
23+
# https://bugs.python.org/issue26647
24+
if dis.opname[opcode] == 'POP_JUMP_IF_FALSE':
25+
self.argument = struct.Struct('B')
26+
self.have_argument = 0
27+
# As of Python 3.6, jump targets are still addressed by their
28+
# byte unit. This is matter to change, so that jump targets,
29+
# in the future might refer to code units (address in bytes / 2).
30+
# https://bugs.python.org/issue26647
31+
self.jump_unit = 8 // oparg
32+
else:
33+
self.argument = struct.Struct('<H')
34+
self.have_argument = dis.HAVE_ARGUMENT
35+
self.jump_unit = 1
36+
37+
@property
38+
def argument_bits(self):
39+
return self.argument.size * 8
40+
41+
42+
_BYTECODE = _Bytecode()
43+
44+
45+
def _make_code(code, codestring):
46+
return code.replace(co_code=codestring)
47+
48+
49+
def _parse_instructions(code):
50+
extended_arg = 0
51+
extended_arg_offset = None
52+
pos = 0
53+
54+
while pos < len(code):
55+
offset = pos
56+
if extended_arg_offset is not None:
57+
offset = extended_arg_offset
58+
59+
opcode = struct.unpack_from('B', code, pos)[0]
60+
pos += 1
61+
62+
oparg = None
63+
if opcode >= _BYTECODE.have_argument:
64+
oparg = extended_arg | _BYTECODE.argument.unpack_from(code, pos)[0]
65+
pos += _BYTECODE.argument.size
66+
67+
if opcode == dis.EXTENDED_ARG:
68+
extended_arg = oparg << _BYTECODE.argument_bits
69+
extended_arg_offset = offset
70+
continue
71+
72+
extended_arg = 0
73+
extended_arg_offset = None
74+
yield (dis.opname[opcode], oparg, offset)
75+
76+
77+
def _get_instruction_size(opname, oparg=0):
78+
size = 1
79+
80+
extended_arg = oparg >> _BYTECODE.argument_bits
81+
if extended_arg != 0:
82+
size += _get_instruction_size('EXTENDED_ARG', extended_arg)
83+
oparg &= (1 << _BYTECODE.argument_bits) - 1
84+
85+
opcode = dis.opmap[opname]
86+
if opcode >= _BYTECODE.have_argument:
87+
size += _BYTECODE.argument.size
88+
89+
return size
90+
91+
92+
def _get_instructions_size(ops):
93+
size = 0
94+
for op in ops:
95+
if isinstance(op, str):
96+
size += _get_instruction_size(op)
97+
else:
98+
size += _get_instruction_size(*op)
99+
return size
100+
101+
102+
def _write_instruction(buf, pos, opname, oparg=0):
103+
extended_arg = oparg >> _BYTECODE.argument_bits
104+
if extended_arg != 0:
105+
pos = _write_instruction(buf, pos, 'EXTENDED_ARG', extended_arg)
106+
oparg &= (1 << _BYTECODE.argument_bits) - 1
107+
108+
opcode = dis.opmap[opname]
109+
buf[pos] = opcode
110+
pos += 1
111+
112+
if opcode >= _BYTECODE.have_argument:
113+
_BYTECODE.argument.pack_into(buf, pos, oparg)
114+
pos += _BYTECODE.argument.size
115+
116+
return pos
117+
118+
119+
def _write_instructions(buf, pos, ops):
120+
for op in ops:
121+
if isinstance(op, str):
122+
pos = _write_instruction(buf, pos, op)
123+
else:
124+
pos = _write_instruction(buf, pos, *op)
125+
return pos
126+
127+
128+
def _find_labels_and_gotos(code):
129+
labels = {}
130+
gotos = []
131+
132+
block_stack = []
133+
block_counter = 0
134+
135+
opname1 = oparg1 = offset1 = None
136+
opname2 = oparg2 = offset2 = None
137+
opname3 = oparg3 = offset3 = None
138+
139+
for opname4, oparg4, offset4 in _parse_instructions(code.co_code):
140+
if opname1 in ('LOAD_GLOBAL', 'LOAD_NAME'):
141+
if opname2 == 'LOAD_ATTR' and opname3 == 'POP_TOP':
142+
name = code.co_names[oparg1]
143+
if name == 'label':
144+
if oparg2 in labels:
145+
raise SyntaxError('Ambiguous label {0!r}'.format(
146+
code.co_names[oparg2]
147+
))
148+
labels[oparg2] = (offset1,
149+
offset4,
150+
tuple(block_stack))
151+
elif name == 'goto':
152+
gotos.append((offset1,
153+
offset4,
154+
oparg2,
155+
tuple(block_stack)))
156+
elif opname1 in ('SETUP_LOOP',
157+
'SETUP_EXCEPT', 'SETUP_FINALLY',
158+
'SETUP_WITH', 'SETUP_ASYNC_WITH'):
159+
block_counter += 1
160+
block_stack.append(block_counter)
161+
elif opname1 == 'POP_BLOCK' and block_stack:
162+
block_stack.pop()
163+
164+
opname1, oparg1, offset1 = opname2, oparg2, offset2
165+
opname2, oparg2, offset2 = opname3, oparg3, offset3
166+
opname3, oparg3, offset3 = opname4, oparg4, offset4
167+
168+
return labels, gotos
169+
170+
171+
def _inject_nop_sled(buf, pos, end):
172+
while pos < end:
173+
pos = _write_instruction(buf, pos, 'NOP')
174+
175+
176+
def _patch_code(code):
177+
labels, gotos = _find_labels_and_gotos(code)
178+
buf = array.array('B', code.co_code)
179+
180+
for pos, end, _ in labels.values():
181+
_inject_nop_sled(buf, pos, end)
182+
183+
for pos, end, label, origin_stack in gotos:
184+
try:
185+
_, target, target_stack = labels[label]
186+
except KeyError:
187+
raise SyntaxError('Unknown label {0!r}'.format(
188+
code.co_names[label]
189+
))
190+
191+
target_depth = len(target_stack)
192+
if origin_stack[:target_depth] != target_stack:
193+
raise SyntaxError('Jump into different block')
194+
195+
ops = []
196+
for i in range(len(origin_stack) - target_depth):
197+
ops.append('POP_BLOCK')
198+
ops.append(('JUMP_ABSOLUTE', target // _BYTECODE.jump_unit))
199+
200+
if pos + _get_instructions_size(ops) > end:
201+
# not enough space, add code at buffer end and jump there
202+
buf_end = len(buf)
203+
204+
go_to_end_ops = [('JUMP_ABSOLUTE', buf_end // _BYTECODE.jump_unit)]
205+
206+
if pos + _get_instructions_size(go_to_end_ops) > end:
207+
# not sure if reachable
208+
raise SyntaxError('Goto in an incredibly huge function')
209+
210+
pos = _write_instructions(buf, pos, go_to_end_ops)
211+
_inject_nop_sled(buf, pos, end)
212+
213+
buf.extend([0] * _get_instructions_size(ops))
214+
_write_instructions(buf, buf_end, ops)
215+
else:
216+
pos = _write_instructions(buf, pos, ops)
217+
_inject_nop_sled(buf, pos, end)
218+
219+
return _make_code(code, _array_to_bytes(buf))
220+
221+
222+
def with_goto(func_or_code):
223+
if isinstance(func_or_code, types.CodeType):
224+
return _patch_code(func_or_code)
225+
226+
return functools.update_wrapper(
227+
types.FunctionType(
228+
_patch_code(func_or_code.__code__),
229+
func_or_code.__globals__,
230+
func_or_code.__name__,
231+
func_or_code.__defaults__,
232+
func_or_code.__closure__,
233+
),
234+
func_or_code
235+
)

src/runtime/ltypes/ltypes.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
import ctypes
44
import platform
55
from dataclasses import dataclass
6+
from goto import with_goto
67

78
# TODO: this does not seem to restrict other imports
89
__slots__ = ["i8", "i16", "i32", "i64", "f32", "f64", "c32", "c64", "CPtr",
910
"overload", "ccall", "TypeVar", "pointer", "c_p_pointer", "Pointer",
10-
"p_c_pointer", "vectorize", "inline", "Union", "static"]
11+
"p_c_pointer", "vectorize", "inline", "Union", "static", "with_goto"]
1112

1213
# data-types
1314

0 commit comments

Comments
 (0)