Skip to content

Commit 9393fa1

Browse files
authored
[mypyc] Track definedness of native int attributes using a bitmap (#13532)
Since native ints can't support a reserved value to mark an undefined attribute, use a separate bitmap attribute (or attributes) to store information about defined/undefined attributes with native int types. The bitmap is only defined if we can't infer that an attribute is always defined, and it's only needed for native int attributes. We only access the bitmap if the runtime value of an attribute is equal to the (overlapping) error value. This way the performance cost of the bitmap is pretty low on average. I'll add support for traits in a follow-up PR to keep this PR simple. Work on mypyc/mypyc#837.
1 parent 2857736 commit 9393fa1

File tree

10 files changed

+503
-17
lines changed

10 files changed

+503
-17
lines changed

mypyc/analysis/attrdefined.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def foo(self) -> int:
9191
SetMem,
9292
Unreachable,
9393
)
94-
from mypyc.ir.rtypes import RInstance
94+
from mypyc.ir.rtypes import RInstance, is_fixed_width_rtype
9595

9696
# If True, print out all always-defined attributes of native classes (to aid
9797
# debugging and testing)
@@ -120,6 +120,11 @@ def analyze_always_defined_attrs(class_irs: list[ClassIR]) -> None:
120120
for cl in class_irs:
121121
update_always_defined_attrs_using_subclasses(cl, seen)
122122

123+
# Final pass: detect attributes that need to use a bitmap to track definedness
124+
seen = set()
125+
for cl in class_irs:
126+
detect_undefined_bitmap(cl, seen)
127+
123128

124129
def analyze_always_defined_attrs_in_class(cl: ClassIR, seen: set[ClassIR]) -> None:
125130
if cl in seen:
@@ -407,3 +412,17 @@ def update_always_defined_attrs_using_subclasses(cl: ClassIR, seen: set[ClassIR]
407412
removed.add(attr)
408413
cl._always_initialized_attrs -= removed
409414
seen.add(cl)
415+
416+
417+
def detect_undefined_bitmap(cl: ClassIR, seen: Set[ClassIR]) -> None:
418+
if cl in seen:
419+
return
420+
seen.add(cl)
421+
for base in cl.base_mro[1:]:
422+
detect_undefined_bitmap(cl, seen)
423+
424+
if len(cl.base_mro) > 1:
425+
cl.bitmap_attrs.extend(cl.base_mro[1].bitmap_attrs)
426+
for n, t in cl.attributes.items():
427+
if is_fixed_width_rtype(t) and not cl.is_always_defined(n):
428+
cl.bitmap_attrs.append(n)

mypyc/codegen/emit.py

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from mypyc.codegen.literals import Literals
1010
from mypyc.common import (
11+
ATTR_BITMAP_BITS,
1112
ATTR_PREFIX,
1213
FAST_ISINSTANCE_MAX_SUBCLASSES,
1314
NATIVE_PREFIX,
@@ -329,21 +330,81 @@ def tuple_c_declaration(self, rtuple: RTuple) -> list[str]:
329330

330331
return result
331332

333+
def bitmap_field(self, index: int) -> str:
334+
"""Return C field name used for attribute bitmap."""
335+
n = index // ATTR_BITMAP_BITS
336+
if n == 0:
337+
return "bitmap"
338+
return f"bitmap{n + 1}"
339+
340+
def attr_bitmap_expr(self, obj: str, cl: ClassIR, index: int) -> str:
341+
"""Return reference to the attribute definedness bitmap."""
342+
cast = f"({cl.struct_name(self.names)} *)"
343+
attr = self.bitmap_field(index)
344+
return f"({cast}{obj})->{attr}"
345+
346+
def emit_attr_bitmap_set(
347+
self, value: str, obj: str, rtype: RType, cl: ClassIR, attr: str
348+
) -> None:
349+
"""Mark an attribute as defined in the attribute bitmap.
350+
351+
Assumes that the attribute is tracked in the bitmap (only some attributes
352+
use the bitmap). If 'value' is not equal to the error value, do nothing.
353+
"""
354+
self._emit_attr_bitmap_update(value, obj, rtype, cl, attr, clear=False)
355+
356+
def emit_attr_bitmap_clear(self, obj: str, rtype: RType, cl: ClassIR, attr: str) -> None:
357+
"""Mark an attribute as undefined in the attribute bitmap.
358+
359+
Unlike emit_attr_bitmap_set, clear unconditionally.
360+
"""
361+
self._emit_attr_bitmap_update("", obj, rtype, cl, attr, clear=True)
362+
363+
def _emit_attr_bitmap_update(
364+
self, value: str, obj: str, rtype: RType, cl: ClassIR, attr: str, clear: bool
365+
) -> None:
366+
if value:
367+
self.emit_line(f"if (unlikely({value} == {self.c_undefined_value(rtype)})) {{")
368+
index = cl.bitmap_attrs.index(attr)
369+
mask = 1 << (index & (ATTR_BITMAP_BITS - 1))
370+
bitmap = self.attr_bitmap_expr(obj, cl, index)
371+
if clear:
372+
self.emit_line(f"{bitmap} &= ~{mask};")
373+
else:
374+
self.emit_line(f"{bitmap} |= {mask};")
375+
if value:
376+
self.emit_line("}")
377+
332378
def use_vectorcall(self) -> bool:
333379
return use_vectorcall(self.capi_version)
334380

335381
def emit_undefined_attr_check(
336-
self, rtype: RType, attr_expr: str, compare: str, unlikely: bool = False
382+
self,
383+
rtype: RType,
384+
attr_expr: str,
385+
compare: str,
386+
obj: str,
387+
attr: str,
388+
cl: ClassIR,
389+
*,
390+
unlikely: bool = False,
337391
) -> None:
338392
if isinstance(rtype, RTuple):
339-
check = "({})".format(
393+
check = "{}".format(
340394
self.tuple_undefined_check_cond(rtype, attr_expr, self.c_undefined_value, compare)
341395
)
342396
else:
343-
check = f"({attr_expr} {compare} {self.c_undefined_value(rtype)})"
397+
undefined = self.c_undefined_value(rtype)
398+
check = f"{attr_expr} {compare} {undefined}"
344399
if unlikely:
345-
check = f"(unlikely{check})"
346-
self.emit_line(f"if {check} {{")
400+
check = f"unlikely({check})"
401+
if is_fixed_width_rtype(rtype):
402+
index = cl.bitmap_attrs.index(attr)
403+
bit = 1 << (index & (ATTR_BITMAP_BITS - 1))
404+
attr = self.bitmap_field(index)
405+
obj_expr = f"({cl.struct_name(self.names)} *){obj}"
406+
check = f"{check} && !(({obj_expr})->{attr} & {bit})"
407+
self.emit_line(f"if ({check}) {{")
347408

348409
def tuple_undefined_check_cond(
349410
self,

mypyc/codegen/emitclass.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,17 @@
1717
generate_richcompare_wrapper,
1818
generate_set_del_item_wrapper,
1919
)
20-
from mypyc.common import NATIVE_PREFIX, PREFIX, REG_PREFIX, use_fastcall
20+
from mypyc.common import (
21+
ATTR_BITMAP_BITS,
22+
ATTR_BITMAP_TYPE,
23+
NATIVE_PREFIX,
24+
PREFIX,
25+
REG_PREFIX,
26+
use_fastcall,
27+
)
2128
from mypyc.ir.class_ir import ClassIR, VTableEntries
2229
from mypyc.ir.func_ir import FUNC_CLASSMETHOD, FUNC_STATICMETHOD, FuncDecl, FuncIR
23-
from mypyc.ir.rtypes import RTuple, RType, object_rprimitive
30+
from mypyc.ir.rtypes import RTuple, RType, is_fixed_width_rtype, object_rprimitive
2431
from mypyc.namegen import NameGenerator
2532
from mypyc.sametype import is_same_type
2633

@@ -367,8 +374,17 @@ def generate_object_struct(cl: ClassIR, emitter: Emitter) -> None:
367374
lines += ["typedef struct {", "PyObject_HEAD", "CPyVTableItem *vtable;"]
368375
if cl.has_method("__call__") and emitter.use_vectorcall():
369376
lines.append("vectorcallfunc vectorcall;")
377+
bitmap_attrs = []
370378
for base in reversed(cl.base_mro):
371379
if not base.is_trait:
380+
if base.bitmap_attrs:
381+
# Do we need another attribute bitmap field?
382+
if emitter.bitmap_field(len(base.bitmap_attrs) - 1) not in bitmap_attrs:
383+
for i in range(0, len(base.bitmap_attrs), ATTR_BITMAP_BITS):
384+
attr = emitter.bitmap_field(i)
385+
if attr not in bitmap_attrs:
386+
lines.append(f"{ATTR_BITMAP_TYPE} {attr};")
387+
bitmap_attrs.append(attr)
372388
for attr, rtype in base.attributes.items():
373389
if (attr, rtype) not in seen_attrs:
374390
lines.append(f"{emitter.ctype_spaced(rtype)}{emitter.attr(attr)};")
@@ -546,6 +562,9 @@ def generate_setup_for_class(
546562
emitter.emit_line("}")
547563
else:
548564
emitter.emit_line(f"self->vtable = {vtable_name};")
565+
for i in range(0, len(cl.bitmap_attrs), ATTR_BITMAP_BITS):
566+
field = emitter.bitmap_field(i)
567+
emitter.emit_line(f"self->{field} = 0;")
549568

550569
if cl.has_method("__call__") and emitter.use_vectorcall():
551570
name = cl.method_decl("__call__").cname(emitter.names)
@@ -887,7 +906,7 @@ def generate_getter(cl: ClassIR, attr: str, rtype: RType, emitter: Emitter) -> N
887906
always_defined = cl.is_always_defined(attr) and not rtype.is_refcounted
888907

889908
if not always_defined:
890-
emitter.emit_undefined_attr_check(rtype, attr_expr, "==", unlikely=True)
909+
emitter.emit_undefined_attr_check(rtype, attr_expr, "==", "self", attr, cl, unlikely=True)
891910
emitter.emit_line("PyErr_SetString(PyExc_AttributeError,")
892911
emitter.emit_line(f' "attribute {repr(attr)} of {repr(cl.name)} undefined");')
893912
emitter.emit_line("return NULL;")
@@ -926,7 +945,7 @@ def generate_setter(cl: ClassIR, attr: str, rtype: RType, emitter: Emitter) -> N
926945
if rtype.is_refcounted:
927946
attr_expr = f"self->{attr_field}"
928947
if not always_defined:
929-
emitter.emit_undefined_attr_check(rtype, attr_expr, "!=")
948+
emitter.emit_undefined_attr_check(rtype, attr_expr, "!=", "self", attr, cl)
930949
emitter.emit_dec_ref(f"self->{attr_field}", rtype)
931950
if not always_defined:
932951
emitter.emit_line("}")
@@ -943,9 +962,14 @@ def generate_setter(cl: ClassIR, attr: str, rtype: RType, emitter: Emitter) -> N
943962
emitter.emit_lines("if (!tmp)", " return -1;")
944963
emitter.emit_inc_ref("tmp", rtype)
945964
emitter.emit_line(f"self->{attr_field} = tmp;")
965+
if is_fixed_width_rtype(rtype) and not always_defined:
966+
emitter.emit_attr_bitmap_set("tmp", "self", rtype, cl, attr)
967+
946968
if deletable:
947969
emitter.emit_line("} else")
948970
emitter.emit_line(f" self->{attr_field} = {emitter.c_undefined_value(rtype)};")
971+
if is_fixed_width_rtype(rtype):
972+
emitter.emit_attr_bitmap_clear("self", rtype, cl, attr)
949973
emitter.emit_line("return 0;")
950974
emitter.emit_line("}")
951975

mypyc/codegen/emitfunc.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
RStruct,
6161
RTuple,
6262
RType,
63+
is_fixed_width_rtype,
6364
is_int32_rprimitive,
6465
is_int64_rprimitive,
6566
is_int_rprimitive,
@@ -353,7 +354,9 @@ def visit_get_attr(self, op: GetAttr) -> None:
353354
always_defined = cl.is_always_defined(op.attr)
354355
merged_branch = None
355356
if not always_defined:
356-
self.emitter.emit_undefined_attr_check(attr_rtype, dest, "==", unlikely=True)
357+
self.emitter.emit_undefined_attr_check(
358+
attr_rtype, dest, "==", obj, op.attr, cl, unlikely=True
359+
)
357360
branch = self.next_branch()
358361
if branch is not None:
359362
if (
@@ -433,10 +436,17 @@ def visit_set_attr(self, op: SetAttr) -> None:
433436
# previously undefined), so decref the old value.
434437
always_defined = cl.is_always_defined(op.attr)
435438
if not always_defined:
436-
self.emitter.emit_undefined_attr_check(attr_rtype, attr_expr, "!=")
439+
self.emitter.emit_undefined_attr_check(
440+
attr_rtype, attr_expr, "!=", obj, op.attr, cl
441+
)
437442
self.emitter.emit_dec_ref(attr_expr, attr_rtype)
438443
if not always_defined:
439444
self.emitter.emit_line("}")
445+
elif is_fixed_width_rtype(attr_rtype) and not cl.is_always_defined(op.attr):
446+
# If there is overlap with the error value, update bitmap to mark
447+
# attribute as defined.
448+
self.emitter.emit_attr_bitmap_set(src, obj, attr_rtype, cl, op.attr)
449+
440450
# This steals the reference to src, so we don't need to increment the arg
441451
self.emitter.emit_line(f"{attr_expr} = {src};")
442452
if op.error_kind == ERR_FALSE:

mypyc/common.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@
5353
MAX_LITERAL_SHORT_INT: Final = sys.maxsize >> 1 if not IS_MIXED_32_64_BIT_BUILD else 2**30 - 1
5454
MIN_LITERAL_SHORT_INT: Final = -MAX_LITERAL_SHORT_INT - 1
5555

56+
# Decription of the C type used to track definedness of attributes
57+
# that have types with overlapping error values
58+
ATTR_BITMAP_TYPE: Final = "uint32_t"
59+
ATTR_BITMAP_BITS: Final = 32
60+
5661
# Runtime C library files
5762
RUNTIME_C_FILES: Final = [
5863
"init.c",

mypyc/ir/class_ir.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def __init__(
130130
self.builtin_base: str | None = None
131131
# Default empty constructor
132132
self.ctor = FuncDecl(name, None, module_name, FuncSignature([], RInstance(self)))
133-
133+
# Attributes defined in the class (not inherited)
134134
self.attributes: dict[str, RType] = {}
135135
# Deletable attributes
136136
self.deletable: list[str] = []
@@ -184,6 +184,13 @@ def __init__(
184184
# If True, __init__ can make 'self' visible to unanalyzed/arbitrary code
185185
self.init_self_leak = False
186186

187+
# Definedness of these attributes is backed by a bitmap. Index in the list
188+
# indicates the bit number. Includes inherited attributes. We need the
189+
# bitmap for types such as native ints that can't have a dedicated error
190+
# value that doesn't overlap a valid value. The bitmap is used if the
191+
# value of an attribute is the same as the error value.
192+
self.bitmap_attrs: List[str] = []
193+
187194
def __repr__(self) -> str:
188195
return (
189196
"ClassIR("

mypyc/ir/ops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,7 @@ def __init__(self, obj: Value, attr: str, line: int, *, borrow: bool = False) ->
633633
attr_type = obj.type.attr_type(attr)
634634
self.type = attr_type
635635
if is_fixed_width_rtype(attr_type):
636-
self.error_kind = ERR_NEVER
636+
self.error_kind = ERR_MAGIC_OVERLAPPING
637637
self.is_borrowed = borrow and attr_type.is_refcounted
638638

639639
def sources(self) -> list[Value]:

mypyc/irbuild/main.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,11 @@ def build_ir(
5757
options: CompilerOptions,
5858
errors: Errors,
5959
) -> ModuleIRs:
60-
"""Build IR for a set of modules that have been type-checked by mypy."""
60+
"""Build basic IR for a set of modules that have been type-checked by mypy.
61+
62+
The returned IR is not complete and requires additional
63+
transformations, such as the insertion of refcount handling.
64+
"""
6165

6266
build_type_map(mapper, modules, graph, types, options, errors)
6367
singledispatch_info = find_singledispatch_register_impls(modules, errors)

0 commit comments

Comments
 (0)