From ab8769217b309707372ff9c9a90821d9ed5ec64e Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Sat, 7 Mar 2020 18:09:36 +0000 Subject: [PATCH 1/5] Improve comments and docstrings in mypyc.ir.ops --- mypyc/ir/ops.py | 99 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 77 insertions(+), 22 deletions(-) diff --git a/mypyc/ir/ops.py b/mypyc/ir/ops.py index 58253ad6ef26..081e70d387df 100644 --- a/mypyc/ir/ops.py +++ b/mypyc/ir/ops.py @@ -63,6 +63,8 @@ class AssignmentTarget(object): + """Abstract base class for assignment targets in IR""" + type = None # type: RType @abstractmethod @@ -102,9 +104,11 @@ def __init__(self, obj: 'Value', attr: str) -> None: self.obj = obj self.attr = attr if isinstance(obj.type, RInstance) and obj.type.class_ir.has_attr(attr): + # Native attribute reference self.obj_type = obj.type # type: RType self.type = obj.type.attr_type(attr) else: + # Python attribute reference self.obj_type = object_rprimitive self.type = object_rprimitive @@ -134,6 +138,7 @@ def __init__(self, name: Optional[str] = None) -> None: self.indexes = OrderedDict() # type: Dict[Value, int] self.symtable = OrderedDict() # type: OrderedDict[SymbolNode, AssignmentTarget] self.temp_index = 0 + # All names genereted; value is the number of duplicates seen. self.names = {} # type: Dict[str, int] self.vars_needing_init = set() # type: Set[Value] @@ -155,6 +160,11 @@ def add(self, reg: 'Value', name: str) -> None: self.indexes[reg] = len(self.indexes) def add_local(self, symbol: SymbolNode, typ: RType, is_arg: bool = False) -> 'Register': + """Add register that represents a symbol to the symbol table. + + Args: + is_arg: is this a function argument + """ assert isinstance(symbol, SymbolNode) reg = Register(typ, symbol.line, is_arg=is_arg) self.symtable[symbol] = AssignmentTargetRegister(reg) @@ -163,6 +173,7 @@ def add_local(self, symbol: SymbolNode, typ: RType, is_arg: bool = False) -> 'Re def add_local_reg(self, symbol: SymbolNode, typ: RType, is_arg: bool = False) -> AssignmentTargetRegister: + """Like add_local, but return an assignment target instead of value.""" self.add_local(symbol, typ, is_arg) target = self.symtable[symbol] assert isinstance(target, AssignmentTargetRegister) @@ -175,14 +186,16 @@ def add_target(self, symbol: SymbolNode, target: AssignmentTarget) -> Assignment def lookup(self, symbol: SymbolNode) -> AssignmentTarget: return self.symtable[symbol] - def add_temp(self, typ: RType, is_arg: bool = False) -> 'Register': + def add_temp(self, typ: RType) -> 'Register': + """Add register that contains a temporary value with the given type.""" assert isinstance(typ, RType) - reg = Register(typ, is_arg=is_arg) + reg = Register(typ) self.add(reg, 'r%d' % self.temp_index) self.temp_index += 1 return reg def add_op(self, reg: 'RegisterOp') -> None: + """Record the value of an operation.""" if reg.is_void: return self.add(reg, 'r%d' % self.temp_index) @@ -266,6 +279,11 @@ def __init__(self, label: int = -1) -> None: @property def terminated(self) -> bool: + """Does the block end with a jump, branch or return? + + This should always be true after the basic block has been fully built, but + this is false during construction. + """ return bool(self.ops) and isinstance(self.ops[-1], ControlOp) @@ -276,11 +294,16 @@ def terminated(self) -> bool: # Generates false (bool) on exception ERR_FALSE = 2 # type: Final -# Hack: using this line number for an op will supress it in tracebacks +# Hack: using this line number for an op will suppress it in tracebacks NO_TRACEBACK_LINE_NO = -10000 class Value: + """Abstract base class for all values. + + These include references to registers, literals, and various operations. + """ + # Source line number line = -1 name = '?' @@ -300,6 +323,12 @@ def to_str(self, env: Environment) -> str: class Register(Value): + """A register holds a value of a specific type, and it can be read and mutated. + + Each local variable maps to a registers, and they are also used for some + (but not all) temporary values. + """ + def __init__(self, type: RType, line: int = -1, is_arg: bool = False, name: str = '') -> None: super().__init__(line) self.name = name @@ -316,6 +345,8 @@ def is_void(self) -> bool: class Op(Value): + """Abstract base class for all operations (as opposed to values).""" + def __init__(self, line: int) -> None: super().__init__(line) @@ -326,6 +357,7 @@ def can_raise(self) -> bool: @abstractmethod def sources(self) -> List[Value]: + """All the values the op may read.""" pass def stolen(self) -> List[Value]: @@ -387,16 +419,25 @@ class Branch(ControlOp): IS_ERROR: ('is_error(%r)', ''), } # type: Final - def __init__(self, left: Value, true_label: BasicBlock, - false_label: BasicBlock, op: int, line: int = -1, *, rare: bool = False) -> None: + def __init__(self, + left: Value, + true_label: BasicBlock, + false_label: BasicBlock, + op: int, + line: int = -1, + *, + rare: bool = False) -> None: super().__init__(line) + # Target value being checked self.left = left self.true = true_label self.false = false_label + # BOOL_EXPR (boolean check) or IS_ERROR (error value check self.op = op self.negated = False # If not None, the true label should generate a traceback entry (func name, line number) self.traceback_entry = None # type: Optional[Tuple[str, int]] + # If True, the condition is expected to be usually False (for optimization purposes) self.rare = rare def sources(self) -> List[Value]: @@ -424,6 +465,8 @@ def accept(self, visitor: 'OpVisitor[T]') -> T: class Return(ControlOp): + """Return a value from a function.""" + error_kind = ERR_NEVER def __init__(self, reg: Value, line: int = -1) -> None: @@ -469,7 +512,7 @@ def accept(self, visitor: 'OpVisitor[T]') -> T: class RegisterOp(Op): - """An operation that can be written as r1 = f(r2, ..., rn). + """Abstract base class for operations that can be written as r1 = f(r2, ..., rn). Takes some registers, performs an operation and generates an output. Doesn't do any control flow, but can raise an error. @@ -488,7 +531,7 @@ def can_raise(self) -> bool: class IncRef(RegisterOp): - """inc_ref r""" + """Increase reference count (inc_ref r).""" error_kind = ERR_NEVER @@ -511,7 +554,7 @@ def accept(self, visitor: 'OpVisitor[T]') -> T: class DecRef(RegisterOp): - """dec_ref r + """Decrease referece count and free object if zero (dec_ref r). The is_xdec flag says to use an XDECREF, which checks if the pointer is NULL first. @@ -542,7 +585,7 @@ def accept(self, visitor: 'OpVisitor[T]') -> T: class Call(RegisterOp): - """Native call f(arg, ...) + """Native call f(arg, ...). The call target can be a module-level function or a class. """ @@ -607,7 +650,7 @@ def accept(self, visitor: 'OpVisitor[T]') -> T: @trait -class EmitterInterface(): +class EmitterInterface: @abstractmethod def reg(self, name: Value) -> str: raise NotImplementedError @@ -638,6 +681,7 @@ def emit_declaration(self, line: str) -> None: # True steals all arguments, False steals none, a list steals those in matching positions StealsDescription = Union[bool, List[bool]] +# Description of a primitive operation OpDescription = NamedTuple( 'OpDescription', [('name', str), ('arg_types', List[RType]), @@ -712,7 +756,7 @@ def accept(self, visitor: 'OpVisitor[T]') -> T: class Assign(Op): - """dest = int""" + """Assign a value to a register (dest = int).""" error_kind = ERR_NEVER @@ -735,7 +779,7 @@ def accept(self, visitor: 'OpVisitor[T]') -> T: class LoadInt(RegisterOp): - """dest = int""" + """Load an integer literal.""" error_kind = ERR_NEVER @@ -755,7 +799,11 @@ def accept(self, visitor: 'OpVisitor[T]') -> T: class LoadErrorValue(RegisterOp): - """dest = """ + """Load an error value. + + Each type has one reserved value that signals an error (exception). This + loads the error value for a specific type. + """ error_kind = ERR_NEVER @@ -781,7 +829,7 @@ def accept(self, visitor: 'OpVisitor[T]') -> T: class GetAttr(RegisterOp): - """dest = obj.attr (for a native object)""" + """obj.attr (for a native object)""" error_kind = ERR_MAGIC @@ -833,13 +881,18 @@ def accept(self, visitor: 'OpVisitor[T]') -> T: return visitor.visit_set_attr(self) -NAMESPACE_STATIC = 'static' # type: Final # Default name space for statics, variables -NAMESPACE_TYPE = 'type' # type: Final # Static namespace for pointers to native type objects -NAMESPACE_MODULE = 'module' # type: Final # Namespace for modules +# Default name space for statics, variables +NAMESPACE_STATIC = 'static' # type: Final + +# Static namespace for pointers to native type objects +NAMESPACE_TYPE = 'type' # type: Final + +# Namespace for modules +NAMESPACE_MODULE = 'module' # type: Final class LoadStatic(RegisterOp): - """dest = name :: static + """Load a static name (name :: static). Load a C static variable/pointer. The namespace for statics is shared for the entire compilation group. You can optionally provide a module @@ -941,7 +994,7 @@ def accept(self, visitor: 'OpVisitor[T]') -> T: class TupleGet(RegisterOp): - """dest = src[n] (for fixed-length tuple)""" + """Get item of a fixed-length tuple (src[n]).""" error_kind = ERR_NEVER @@ -963,7 +1016,7 @@ def accept(self, visitor: 'OpVisitor[T]') -> T: class Cast(RegisterOp): - """dest = cast(type, src) + """cast(type, src) Perform a runtime type check (no representation or value conversion). @@ -991,7 +1044,7 @@ def accept(self, visitor: 'OpVisitor[T]') -> T: class Box(RegisterOp): - """dest = box(type, src) + """box(type, src) This converts from a potentially unboxed representation to a straight Python object. Only supported for types with an unboxed representation. @@ -1021,7 +1074,7 @@ def accept(self, visitor: 'OpVisitor[T]') -> T: class Unbox(RegisterOp): - """dest = unbox(type, src) + """unbox(type, src) This is similar to a cast, but it also changes to a (potentially) unboxed runtime representation. Only supported for types with an unboxed representation. @@ -1087,6 +1140,8 @@ def accept(self, visitor: 'OpVisitor[T]') -> T: @trait class OpVisitor(Generic[T]): + """Generic visitor over ops (uses the visitor design pattern).""" + @abstractmethod def visit_goto(self, op: Goto) -> T: raise NotImplementedError From 5d4de598c15df088e898c04a9b19d8b070aceeca Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Sat, 7 Mar 2020 18:10:01 +0000 Subject: [PATCH 2/5] Improve comments and docstrings in mypyc.ir.rtypes --- mypyc/ir/rtypes.py | 61 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 7 deletions(-) diff --git a/mypyc/ir/rtypes.py b/mypyc/ir/rtypes.py index 388900885079..315714310638 100644 --- a/mypyc/ir/rtypes.py +++ b/mypyc/ir/rtypes.py @@ -32,10 +32,16 @@ class RType: """Abstract base class for runtime types (erased, only concrete; no generics).""" name = None # type: str + # If True, the type has a special unboxed representation. If False, the type is + # represented as PyObject *. Even if True, the representation may contain pointers. is_unboxed = False + # This is the C undefined value for this type. It's used for initialization if there's + # no value yet. c_undefined = None # type: str - is_refcounted = True # If unboxed: does the unboxed version use reference counting? - _ctype = None # type: str # C type; use Emitter.ctype() to access + # If unboxed: does the unboxed version use reference counting? + is_refcounted = True + # C type; use Emitter.ctype() to access + _ctype = None # type: str @abstractmethod def accept(self, visitor: 'RTypeVisitor[T]') -> T: @@ -87,6 +93,8 @@ def deserialize_type(data: Union[JsonDict, str], ctx: 'DeserMaps') -> 'RType': class RTypeVisitor(Generic[T]): + """Generic visitor over RTypes (uses the visitor design pattern).""" + @abstractmethod def visit_rprimitive(self, typ: 'RPrimitive') -> T: raise NotImplementedError @@ -109,7 +117,11 @@ def visit_rvoid(self, typ: 'RVoid') -> T: class RVoid(RType): - """void""" + """The void type (no value). + + This is a singleton -- use void_rtype (below) to refer to this instead of + constructing a new instace. + """ is_unboxed = False name = 'void' @@ -122,13 +134,21 @@ def serialize(self) -> str: return 'void' +# Singleton instance of RVoid void_rtype = RVoid() # type: Final class RPrimitive(RType): """Primitive type such as 'object' or 'int'. - These often have custom ops associated with them. + These often have custom ops associated with them. The 'object' primitive + type can be used to hold arbitrary Python objects. + + Different primitive types have different C representations, and primitives may + be unboxed or boxed. Primitive types don't need to directly correspond to + Python types, but many do. + + NOTE: All supported primitive types are defined below (e.g. object_rprimitive). """ # Map from primitive names to primitive types and is used by deserialization @@ -164,35 +184,53 @@ def __repr__(self) -> str: return '' % self.name +# NOTE: All the possible instances of RPrimitive are defined below. Use these instead of +# creating new instances. + # Used to represent arbitrary objects and dynamically typed values object_rprimitive = RPrimitive('builtins.object', is_unboxed=False, is_refcounted=True) # type: Final +# Arbitrary-precision integer (corresponds to Python 'int'). Small enough values +# are stored unboxed, while large integers are represented as a tagged pointer to +# a Python 'int' PyObject. The lowest bit is used as the tag to decide whether it +# is a signed unboxed value (shifted left by one) or a pointer. +# +# This cannot represent a subclass of int. int_rprimitive = RPrimitive('builtins.int', is_unboxed=True, is_refcounted=True, ctype='CPyTagged') # type: Final +# An unboxed integer. The representation is the same as for unboxed int_rprimitive +# (shifted left by one). short_int_rprimitive = RPrimitive('short_int', is_unboxed=True, is_refcounted=False, ctype='CPyTagged') # type: Final +# Floats are represent as 'float' PyObject * values. (In the future we'll likely +# switch to an unboxed representation.) float_rprimitive = RPrimitive('builtins.float', is_unboxed=False, is_refcounted=True) # type: Final +# An unboxed boolean value. bool_rprimitive = RPrimitive('builtins.bool', is_unboxed=True, is_refcounted=False, ctype='char') # type: Final +# The 'None' value. It needs to have a representation for the undefined/error value. none_rprimitive = RPrimitive('builtins.None', is_unboxed=True, is_refcounted=False, ctype='char') # type: Final +# Python list object (or an instance of a subclass of list). list_rprimitive = RPrimitive('builtins.list', is_unboxed=False, is_refcounted=True) # type: Final +# Python dict object (or an instance of a subclass of dict). dict_rprimitive = RPrimitive('builtins.dict', is_unboxed=False, is_refcounted=True) # type: Final +# Python set object (or an instance of a subclass of set). set_rprimitive = RPrimitive('builtins.set', is_unboxed=False, is_refcounted=True) # type: Final -# At the C layer, str is refered to as unicode (PyUnicode) +# Python str object. At the C layer, str is referred to as unicode (PyUnicode). str_rprimitive = RPrimitive('builtins.str', is_unboxed=False, is_refcounted=True) # type: Final -# Tuple of an arbitrary length (corresponds to Tuple[t, ...], with explicit '...') +# Tuple of an arbitrary length (corresponds to Tuple[t, ...], with explicit '...'). tuple_rprimitive = RPrimitive('builtins.tuple', is_unboxed=False, is_refcounted=True) # type: Final @@ -318,7 +356,11 @@ def deserialize(cls, data: JsonDict, ctx: 'DeserMaps') -> 'RTuple': class RInstance(RType): - """Instance of user-defined class (compiled to C extension class).""" + """Instance of user-defined class (compiled to C extension class). + + The runtime representation is 'PyObject *', and these are always reference + counted. + """ is_unboxed = False @@ -392,6 +434,10 @@ def deserialize(cls, data: JsonDict, ctx: 'DeserMaps') -> 'RUnion': def optional_value_type(rtype: RType) -> Optional[RType]: + """If rtype is the union of none_rprimitive and another type X, return X. + + Otherwise return None. + """ if isinstance(rtype, RUnion) and len(rtype.items) == 2: if rtype.items[0] == none_rprimitive: return rtype.items[1] @@ -401,4 +447,5 @@ def optional_value_type(rtype: RType) -> Optional[RType]: def is_optional_type(rtype: RType) -> bool: + """Is rtype an optional type with exactly two union items?""" return optional_value_type(rtype) is not None From 465e80b6442fceea46de74a6cc26f69361f4663f Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Sat, 7 Mar 2020 18:29:11 +0000 Subject: [PATCH 3/5] Updated docs and comments, and minor refactoring --- mypyc/ir/class_ir.py | 80 +++++++++++++++++++++++--------------------- mypyc/ir/func_ir.py | 25 ++++++++++++-- 2 files changed, 64 insertions(+), 41 deletions(-) diff --git a/mypyc/ir/class_ir.py b/mypyc/ir/class_ir.py index c0d63abc05d4..f7a8b86e10d5 100644 --- a/mypyc/ir/class_ir.py +++ b/mypyc/ir/class_ir.py @@ -86,47 +86,12 @@ VTableEntries = List[VTableEntry] -def serialize_vtable_entry(entry: VTableEntry) -> JsonDict: - if isinstance(entry, VTableMethod): - return { - '.class': 'VTableMethod', - 'cls': entry.cls.fullname, - 'name': entry.name, - 'method': entry.method.decl.fullname, - 'shadow_method': entry.shadow_method.decl.fullname if entry.shadow_method else None, - } - else: - return { - '.class': 'VTableAttr', - 'cls': entry.cls.fullname, - 'name': entry.name, - 'is_setter': entry.is_setter, - } - - -def serialize_vtable(vtable: VTableEntries) -> List[JsonDict]: - return [serialize_vtable_entry(v) for v in vtable] - - -def deserialize_vtable_entry(data: JsonDict, ctx: 'DeserMaps') -> VTableEntry: - if data['.class'] == 'VTableMethod': - return VTableMethod( - ctx.classes[data['cls']], data['name'], ctx.functions[data['method']], - ctx.functions[data['shadow_method']] if data['shadow_method'] else None) - elif data['.class'] == 'VTableAttr': - return VTableAttr(ctx.classes[data['cls']], data['name'], data['is_setter']) - assert False, "Bogus vtable .class: %s" % data['.class'] - - -def deserialize_vtable(data: List[JsonDict], ctx: 'DeserMaps') -> VTableEntries: - return [deserialize_vtable_entry(x, ctx) for x in data] - - class ClassIR: """Intermediate representation of a class. This also describes the runtime structure of native instances. """ + def __init__(self, name: str, module_name: str, is_trait: bool = False, is_generated: bool = False, is_abstract: bool = False, is_ext_class: bool = True) -> None: @@ -139,7 +104,9 @@ def __init__(self, name: str, module_name: str, is_trait: bool = False, # An augmented class has additional methods separate from what mypyc generates. # Right now the only one is dataclasses. self.is_augmented = False + # Does this inherit from a Python class? self.inherits_python = False + # Do instances of this class have __dict__? self.has_dict = False # Do we allow interpreted subclasses? Derived from a mypyc_attr. self.allow_interpreted_subclasses = False @@ -147,7 +114,7 @@ def __init__(self, name: str, module_name: str, is_trait: bool = False, # of the object for that class. We currently only support this # in a few ad-hoc cases. self.builtin_base = None # type: Optional[str] - # Default empty ctor + # Default empty constructor self.ctor = FuncDecl(name, None, module_name, FuncSignature([], RInstance(self))) self.attributes = OrderedDict() # type: OrderedDict[str, RType] @@ -398,8 +365,7 @@ def deserialize(cls, data: JsonDict, ctx: 'DeserMaps') -> 'ClassIR': class NonExtClassInfo: - """Information needed to construct a non-extension class. - + """Information needed to construct a non-extension class (Python class). Includes the class dictionary, a tuple of base classes, the class annotations dictionary, and the metaclass. @@ -412,6 +378,42 @@ def __init__(self, dict: Value, bases: Value, anns: Value, metaclass: Value) -> self.metaclass = metaclass +def serialize_vtable_entry(entry: VTableEntry) -> JsonDict: + if isinstance(entry, VTableMethod): + return { + '.class': 'VTableMethod', + 'cls': entry.cls.fullname, + 'name': entry.name, + 'method': entry.method.decl.fullname, + 'shadow_method': entry.shadow_method.decl.fullname if entry.shadow_method else None, + } + else: + return { + '.class': 'VTableAttr', + 'cls': entry.cls.fullname, + 'name': entry.name, + 'is_setter': entry.is_setter, + } + + +def serialize_vtable(vtable: VTableEntries) -> List[JsonDict]: + return [serialize_vtable_entry(v) for v in vtable] + + +def deserialize_vtable_entry(data: JsonDict, ctx: 'DeserMaps') -> VTableEntry: + if data['.class'] == 'VTableMethod': + return VTableMethod( + ctx.classes[data['cls']], data['name'], ctx.functions[data['method']], + ctx.functions[data['shadow_method']] if data['shadow_method'] else None) + elif data['.class'] == 'VTableAttr': + return VTableAttr(ctx.classes[data['cls']], data['name'], data['is_setter']) + assert False, "Bogus vtable .class: %s" % data['.class'] + + +def deserialize_vtable(data: List[JsonDict], ctx: 'DeserMaps') -> VTableEntries: + return [deserialize_vtable_entry(x, ctx) for x in data] + + def all_concrete_classes(class_ir: ClassIR) -> Optional[List[ClassIR]]: """Return all concrete classes among the class itself and its subclasses.""" concrete = class_ir.concrete_subclasses() diff --git a/mypyc/ir/func_ir.py b/mypyc/ir/func_ir.py index f25b0356dc95..557ff8f7fa91 100644 --- a/mypyc/ir/func_ir.py +++ b/mypyc/ir/func_ir.py @@ -1,3 +1,5 @@ +"""Intermediate representation of functions.""" + from typing import List, Optional, Sequence, Dict from typing_extensions import Final @@ -12,6 +14,11 @@ class RuntimeArg: + """Representation of a function argument in IR. + + Argument kind is one of ARG_* constants defined in mypy.nodes. + """ + def __init__(self, name: str, typ: RType, kind: int = ARG_POS) -> None: self.name = name self.type = typ @@ -37,7 +44,10 @@ def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> 'RuntimeArg': class FuncSignature: - # TODO: track if method? + """Signature of a function in IR.""" + + # TODO: Track if method? + def __init__(self, args: Sequence[RuntimeArg], ret_type: RType) -> None: self.args = tuple(args) self.ret_type = ret_type @@ -62,6 +72,12 @@ def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> 'FuncSignature': class FuncDecl: + """Declaration of a function in IR (without body or implementation). + + A function can a regular module-level function, a method, a static method, + a class method, or a property getter/setter. + """ + def __init__(self, name: str, class_name: Optional[str], @@ -129,7 +145,11 @@ def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> 'FuncDecl': class FuncIR: - """Intermediate representation of a function with contextual information.""" + """Intermediate representation of a function with contextual information. + + Unlike FuncDecl, this includes the IR of the body (basic blocks) and an + environment. + """ def __init__(self, decl: FuncDecl, @@ -199,6 +219,7 @@ def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> 'FuncIR': def format_blocks(blocks: List[BasicBlock], env: Environment) -> List[str]: + """Format a list of IR basic blocks into a human-readable form.""" # First label all of the blocks for i, block in enumerate(blocks): block.label = i From e0d3738da9283a2c867ea19fb5369fa13c495f74 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Sun, 8 Mar 2020 12:00:32 +0000 Subject: [PATCH 4/5] More comment and docstring updates --- mypyc/ir/rtypes.py | 98 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 69 insertions(+), 29 deletions(-) diff --git a/mypyc/ir/rtypes.py b/mypyc/ir/rtypes.py index 315714310638..36ead5735773 100644 --- a/mypyc/ir/rtypes.py +++ b/mypyc/ir/rtypes.py @@ -1,9 +1,16 @@ """Types used in the intermediate representation. -These are runtime types (RTypes) as opposed to mypy Type objects. The -latter are only used during type checking and ignored at runtime. The -generated IR ensures some runtime type safety properties based on -RTypes. +These are runtime types (RTypes), as opposed to mypy Type objects. +The latter are only used during type checking and not directly used at +runtime. Runtime types are derived from mypy types, but there's no +simple one-to-one correspondence. (Here 'runtime' means 'runtime +checked'.) + +The generated IR ensures some runtime type safety properties based on +RTypes. Compiled code can assume that the runtime value matches the +static RType of a value. If the RType of a register is 'builtins.str' +(str_rprimitive), for example, the generated IR will ensure that the +register will have a 'str' object. RTypes are simpler and less expressive than mypy (or PEP 484) types. For example, all mypy types of form 'list[T]' (for arbitrary T) @@ -141,14 +148,15 @@ def serialize(self) -> str: class RPrimitive(RType): """Primitive type such as 'object' or 'int'. - These often have custom ops associated with them. The 'object' primitive - type can be used to hold arbitrary Python objects. + These often have custom ops associated with them. The 'object' + primitive type can be used to hold arbitrary Python objects. - Different primitive types have different C representations, and primitives may - be unboxed or boxed. Primitive types don't need to directly correspond to - Python types, but many do. + Different primitive types have different representations, and + primitives may be unboxed or boxed. Primitive types don't need to + directly correspond to Python types, but most do. - NOTE: All supported primitive types are defined below (e.g. object_rprimitive). + NOTE: All supported primitive types are defined below + (e.g. object_rprimitive). """ # Map from primitive names to primitive types and is used by deserialization @@ -168,6 +176,7 @@ def __init__(self, if ctype == 'CPyTagged': self.c_undefined = 'CPY_INT_TAG' elif ctype == 'PyObject *': + # Boxed types use the null pointer as the error value. self.c_undefined = 'NULL' elif ctype == 'char': self.c_undefined = '2' @@ -184,37 +193,54 @@ def __repr__(self) -> str: return '' % self.name -# NOTE: All the possible instances of RPrimitive are defined below. Use these instead of -# creating new instances. +# NOTE: All the supported instances of RPrimitive are defined +# below. Use these instead of creating new instances. -# Used to represent arbitrary objects and dynamically typed values +# Used to represent arbitrary objects and dynamically typed (Any) +# values. There are various ops that let you perform generic, runtime +# checked operations on these (that match Python semantics). See the +# ops in mypyc.primitives.misc_ops, including py_getattr_op, +# py_call_op, and many others. +# +# NOTE: Even though this is very flexible, this type should be used as +# little as possible, as generic ops are typically slow. Other types, +# including other primitive types and RInstance, are usually much +# faster. object_rprimitive = RPrimitive('builtins.object', is_unboxed=False, is_refcounted=True) # type: Final -# Arbitrary-precision integer (corresponds to Python 'int'). Small enough values -# are stored unboxed, while large integers are represented as a tagged pointer to -# a Python 'int' PyObject. The lowest bit is used as the tag to decide whether it -# is a signed unboxed value (shifted left by one) or a pointer. +# Arbitrary-precision integer (corresponds to Python 'int'). Small +# enough values are stored unboxed, while large integers are +# represented as a tagged pointer to a Python 'int' PyObject. The +# lowest bit is used as the tag to decide whether it is a signed +# unboxed value (shifted left by one) or a PyObject * pointing to an +# 'int' object. Pointers have the least significant bit set. # -# This cannot represent a subclass of int. +# The undefined/error value is the null pointer (1 -- only the least +# significant bit is set)). +# +# This cannot represent a subclass of int. An instance of a subclass +# of int is coerced to the corresponding 'int' value. int_rprimitive = RPrimitive('builtins.int', is_unboxed=True, is_refcounted=True, ctype='CPyTagged') # type: Final -# An unboxed integer. The representation is the same as for unboxed int_rprimitive -# (shifted left by one). +# An unboxed integer. The representation is the same as for unboxed +# int_rprimitive (shifted left by one). These can be used when an +# integer is known to be small enough to fit size_t (CPyTagged). short_int_rprimitive = RPrimitive('short_int', is_unboxed=True, is_refcounted=False, ctype='CPyTagged') # type: Final -# Floats are represent as 'float' PyObject * values. (In the future we'll likely -# switch to an unboxed representation.) +# Floats are represent as 'float' PyObject * values. (In the future +# we'll likely switch to a more efficient, unboxed representation.) float_rprimitive = RPrimitive('builtins.float', is_unboxed=False, is_refcounted=True) # type: Final -# An unboxed boolean value. +# An unboxed boolean value. This actually has three possible values +# (0 -> False, 1 -> True, 2 -> error). bool_rprimitive = RPrimitive('builtins.bool', is_unboxed=True, is_refcounted=False, ctype='char') # type: Final -# The 'None' value. It needs to have a representation for the undefined/error value. +# The 'None' value. The possible values are 0 -> None and 2 -> error. none_rprimitive = RPrimitive('builtins.None', is_unboxed=True, is_refcounted=False, ctype='char') # type: Final @@ -227,10 +253,12 @@ def __repr__(self) -> str: # Python set object (or an instance of a subclass of set). set_rprimitive = RPrimitive('builtins.set', is_unboxed=False, is_refcounted=True) # type: Final -# Python str object. At the C layer, str is referred to as unicode (PyUnicode). +# Python str object. At the C layer, str is referred to as unicode +# (PyUnicode). str_rprimitive = RPrimitive('builtins.str', is_unboxed=False, is_refcounted=True) # type: Final -# Tuple of an arbitrary length (corresponds to Tuple[t, ...], with explicit '...'). +# Tuple of an arbitrary length (corresponds to Tuple[t, ...], with +# explicit '...'). tuple_rprimitive = RPrimitive('builtins.tuple', is_unboxed=False, is_refcounted=True) # type: Final @@ -311,7 +339,19 @@ def visit_rvoid(self, t: 'RVoid') -> str: class RTuple(RType): - """Fixed-length unboxed tuple (represented as a C struct).""" + """Fixed-length unboxed tuple (represented as a C struct). + + These are used to represent mypy TupleType values (fixed-length + Python tuples). Since this is unboxed, the identity of a tuple + object is not preserved within compiled code. If the identity of a + tuple is important, or there is a need to have multiple references + to a single tuple object, a variable-length tuple should be used + (tuple_rprimitive or Tuple[T, ...] with explicit '...'), as they + are boxed. + + These aren't immutable. However, user code won't be able to mutate + individual tuple items. + """ is_unboxed = True @@ -358,8 +398,8 @@ def deserialize(cls, data: JsonDict, ctx: 'DeserMaps') -> 'RTuple': class RInstance(RType): """Instance of user-defined class (compiled to C extension class). - The runtime representation is 'PyObject *', and these are always reference - counted. + The runtime representation is 'PyObject *', and these are always + boxed and thus reference-counted. """ is_unboxed = False From fd142c2bb59e590a48d1507dd02b4de275be3bb1 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Sun, 8 Mar 2020 12:15:48 +0000 Subject: [PATCH 5/5] Minor updates --- mypyc/ir/func_ir.py | 4 ++-- mypyc/ir/ops.py | 4 ++-- mypyc/ir/rtypes.py | 21 +++++++++++++++++---- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/mypyc/ir/func_ir.py b/mypyc/ir/func_ir.py index 557ff8f7fa91..4c6d51ea564b 100644 --- a/mypyc/ir/func_ir.py +++ b/mypyc/ir/func_ir.py @@ -74,8 +74,8 @@ def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> 'FuncSignature': class FuncDecl: """Declaration of a function in IR (without body or implementation). - A function can a regular module-level function, a method, a static method, - a class method, or a property getter/setter. + A function can be a regular module-level function, a method, a + static method, a class method, or a property getter/setter. """ def __init__(self, diff --git a/mypyc/ir/ops.py b/mypyc/ir/ops.py index 081e70d387df..10b2feae409d 100644 --- a/mypyc/ir/ops.py +++ b/mypyc/ir/ops.py @@ -325,7 +325,7 @@ def to_str(self, env: Environment) -> str: class Register(Value): """A register holds a value of a specific type, and it can be read and mutated. - Each local variable maps to a registers, and they are also used for some + Each local variable maps to a register, and they are also used for some (but not all) temporary values. """ @@ -432,7 +432,7 @@ def __init__(self, self.left = left self.true = true_label self.false = false_label - # BOOL_EXPR (boolean check) or IS_ERROR (error value check + # BOOL_EXPR (boolean check) or IS_ERROR (error value check) self.op = op self.negated = False # If not None, the true label should generate a traceback entry (func name, line number) diff --git a/mypyc/ir/rtypes.py b/mypyc/ir/rtypes.py index 36ead5735773..ecfe2a1c1ff3 100644 --- a/mypyc/ir/rtypes.py +++ b/mypyc/ir/rtypes.py @@ -39,11 +39,12 @@ class RType: """Abstract base class for runtime types (erased, only concrete; no generics).""" name = None # type: str - # If True, the type has a special unboxed representation. If False, the type is - # represented as PyObject *. Even if True, the representation may contain pointers. + # If True, the type has a special unboxed representation. If False, the + # type is represented as PyObject *. Even if True, the representation + # may contain pointers. is_unboxed = False - # This is the C undefined value for this type. It's used for initialization if there's - # no value yet. + # This is the C undefined value for this type. It's used for initialization + # if there's no value yet, and for function return value on error/exception. c_undefined = None # type: str # If unboxed: does the unboxed version use reference counting? is_refcounted = True @@ -202,6 +203,9 @@ def __repr__(self) -> str: # ops in mypyc.primitives.misc_ops, including py_getattr_op, # py_call_op, and many others. # +# If there is no more specific RType available for some value, we fall +# back to using this type. +# # NOTE: Even though this is very flexible, this type should be used as # little as possible, as generic ops are typically slow. Other types, # including other primitive types and RInstance, are usually much @@ -400,6 +404,15 @@ class RInstance(RType): The runtime representation is 'PyObject *', and these are always boxed and thus reference-counted. + + These support fast method calls and fast attribute access using + vtables, and they usually use a dict-free, struct-based + representation of attributes. Method calls and attribute access + can skip the vtable if we know that there is no overriding. + + These are also sometimes called 'native' types, since these have + the most efficient representation and ops (along with certain + RPrimitive types and RTuple). """ is_unboxed = False