pytorch
diff --git a/‎helion/_compiler/ast_extension.py‎
Lines changed: 57 additions & 3 deletions b/‎helion/_compiler/ast_extension.py‎
Lines changed: 57 additions & 3 deletions
diff --git a/‎helion/_compiler/device_function.py‎
Lines changed: 2 additions & 2 deletions b/‎helion/_compiler/device_function.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎helion/_compiler/generate_ast.py‎
Lines changed: 3 additions & 1 deletion b/‎helion/_compiler/generate_ast.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎helion/_compiler/helper_function.py‎
Lines changed: 1 addition & 1 deletion b/‎helion/_compiler/helper_function.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎helion/_compiler/indexing_strategy.py‎
Lines changed: 13 additions & 13 deletions b/‎helion/_compiler/indexing_strategy.py‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎helion/_compiler/inductor_lowering.py‎
Lines changed: 28 additions & 20 deletions b/‎helion/_compiler/inductor_lowering.py‎
Lines changed: 28 additions & 20 deletions
@@ -2,6 +2,7 @@
 
 import ast
 import enum
+import re
 import threading
 import typing
 from typing import TYPE_CHECKING
@@ -158,16 +159,68 @@ def create_arguments(args: list[ast.arg]) -> ast.arguments:
 
 
 def statement_from_string(template: str, **placeholders: ast.AST) -> ast.stmt:
-    (statement,) = ast.parse(template).body
+    """
+    Create an AST statement from a template string with placeholders.
+
+    Uses {placeholder} syntax to mark placeholders that should be replaced with AST nodes.
+    This supports two common patterns:
+
+    1. Regular strings - placeholders use single braces:
+        expr_from_string("tl.load({ptr} + {offset}, {mask})",
+                        ptr=ptr_ast, offset=offset_ast, mask=mask_ast)
+
+    2. f-strings - placeholders use double braces (which become single braces):
+        name = "my_tensor"
+        expr_from_string(f"tl.load({name} + {{offset}}, {{mask}})",
+                        offset=offset_ast, mask=mask_ast)
+        # In the f-string, {name} is interpolated to "my_tensor",
+        # while {{offset}} becomes {offset} for placeholder replacement
+    """
     location: SourceLocation = current_location()
 
+    # Find all placeholders and validate
+    pattern = r"\{(\w+)\}(?!:)"  # {word} not followed by colon (avoid dict keys)
+    used = set(re.findall(pattern, template))
+    if missing := used - placeholders.keys():
+        raise KeyError(f"Missing placeholders: {sorted(missing)}")
+
+    # Replace placeholders with unique identifiers to avoid naming conflicts
+    # For example, "{x}" in "x = {x}" must not conflict with the variable "x"
+    mapping = {}
+
+    def make_unique(m: re.Match[str]) -> str:
+        # Extract placeholder name from the regex match (e.g., "offset" from "{offset}")
+        name = m.group(1)
+        # Create a unique identifier that can't exist in user code
+        # Using double underscores and "placeholder" to ensure uniqueness
+        uid = f"__placeholder_{len(mapping)}__"
+        # Store the mapping from unique ID to the actual AST node
+        mapping[uid] = placeholders[name]
+        return uid
+
+    # First pass: Replace all {placeholder} with __placeholder_N__ in the template
+    # This prevents conflicts and allows ast.parse to create a valid AST
+    modified_template = re.sub(pattern, make_unique, template)
+
+    # Parse the modified template into an AST
+    (statement,) = ast.parse(modified_template).body
+
+    # Second pass: Recursively walk the AST and replace __placeholder_N__ identifiers
+    # with the actual AST nodes provided by the user
     def _replace(node: _R) -> _R:
+        # Handle lists by recursively transforming each element
         if isinstance(node, list):
             return [_replace(item) for item in node]  # pyright: ignore[reportReturnType]
+
+        # Pass through non-AST nodes unchanged (e.g., strings, numbers)
         if not isinstance(node, ast.AST):
             return node
-        if isinstance(node, ast.Name) and node.id in placeholders:
-            return placeholders[node.id]  # pyright: ignore[reportReturnType]
+
+        # Replace placeholder names with their corresponding AST nodes
+        if isinstance(node, ast.Name) and node.id in mapping:
+            return mapping[node.id]  # pyright: ignore[reportReturnType]
+
+        # Recursively transform all child nodes and wrap in ExtendedAST subclass
         cls = get_wrapper_cls(type(node))
         return location.to_ast(  # pyright: ignore[reportReturnType]
             cls(
@@ -176,6 +229,7 @@ def _replace(node: _R) -> _R:
             )
         )
 
+    # Apply the second pass transformation to replace all placeholders
     return _replace(statement)
 
 
 
@@ -523,8 +523,8 @@ def codegen_function_call(self) -> ast.AST:
         assert pid is not None
         # TODO(jansel): we should run CSE this statement
         call_statement = statement_from_string(
-            f"_launcher({self.name}, __call_grid_expr, {', '.join(args)})",
-            __call_grid_expr=pid.codegen_grid(),
+            f"_launcher({self.name}, {{call_grid_expr}}, {', '.join(args)})",
+            call_grid_expr=pid.codegen_grid(),
         )
         assert isinstance(call_statement, ExtendedAST)
         # Mark the kernel call we can find it in codegen_precompile_def
 
@@ -80,7 +80,9 @@ def lift(self, expr: ast.AST, *, dce: bool = False, prefix: str = "v") -> ast.Na
         assert isinstance(expr, ExtendedAST), expr
         with expr:
             varname = self.tmpvar(dce=dce, prefix=prefix)
-            self.add_statement(statement_from_string(f"{varname} = expr", expr=expr))
+            self.add_statement(
+                statement_from_string(f"{varname} = {{expr}}", expr=expr)
+            )
             return create(ast.Name, id=varname, ctx=ast.Load())
 
     @contextlib.contextmanager
 
@@ -43,7 +43,7 @@ def lift(self, expr: ast.AST, *, dce: bool = False, prefix: str = "v") -> ast.Na
         if isinstance(expr, ast.Name):
             return expr
         varname = self.tmpvar(dce=dce, prefix=prefix)
-        self.add_statement(statement_from_string(f"{varname} = expr", expr=expr))
+        self.add_statement(statement_from_string(f"{varname} = {{expr}}", expr=expr))
         return create(ast.Name, id=varname, ctx=ast.Load())
 
 
 
@@ -87,7 +87,7 @@ def codegen_load(
                 extra = ", other=0"
         name = state.device_function.tensor_arg(fake_tensor).name
         return expr_from_string(
-            f"tl.load({name} + offset, mask{extra})",
+            f"tl.load({name} + {{offset}}, {{mask}}{extra})",
             offset=indexing.index_expr,
             mask=indexing.mask_expr,
         )
@@ -103,7 +103,7 @@ def codegen_store(
         indexing = SubscriptIndexing.create(state, fake_tensor, subscript, extra_mask)
         name = state.device_function.tensor_arg(fake_tensor).name
         return expr_from_string(
-            f"tl.store({name} + offset, value, mask)",
+            f"tl.store({name} + {{offset}}, {{value}}, {{mask}})",
             value=value,
             offset=indexing.index_expr,
             mask=indexing.mask_expr,
@@ -131,7 +131,7 @@ def codegen_load(
         return indexing.reshape_load(
             state,
             expr_from_string(
-                f"tl.load(block_ptr, boundary_check={indexing.boundary_check(state)}, padding_option='zero')",
+                f"tl.load({{block_ptr}}, boundary_check={indexing.boundary_check(state)}, padding_option='zero')",
                 block_ptr=indexing.make_block_ptr(state),
             ),
         )
@@ -153,7 +153,7 @@ def codegen_store(
         assert extra_mask is None
         indexing = BlockedSubscriptIndexing.create(state, fake_tensor, subscript)
         return expr_from_string(
-            f"tl.store(block_ptr, value, boundary_check={indexing.boundary_check(state)})",
+            f"tl.store({{block_ptr}}, {{value}}, boundary_check={indexing.boundary_check(state)})",
             block_ptr=indexing.make_block_ptr(state),
             value=indexing.reshape_store(state, value),
         )
@@ -268,7 +268,7 @@ def codegen_load(
         desc_arg = indexing.tensor_descriptor_arg(state)
         if desc_arg.permutation is not None:
             load_expr = expr_from_string(
-                f"tl.permute(load_result, {desc_arg.inverse_permutation!r})",
+                f"tl.permute({{load_result}}, {desc_arg.inverse_permutation!r})",
                 load_result=load_expr,
             )
 
@@ -296,12 +296,12 @@ def codegen_store(
         if desc_arg.permutation is not None:
             # Apply permutation to the value
             store_value = expr_from_string(
-                f"tl.permute(store_val, {desc_arg.permutation!r})",
+                f"tl.permute({{store_val}}, {desc_arg.permutation!r})",
                 store_val=store_value,
             )
 
         return expr_from_string(
-            f"{indexing.tensor_descriptor(state)}.store({indexing.offsets_str_permuted(state)}, value)",
+            f"{indexing.tensor_descriptor(state)}.store({indexing.offsets_str_permuted(state)}, {{value}})",
             value=store_value,
         )
 
@@ -372,7 +372,7 @@ def get_mask_expr(
             mask_exprs.append(dev_ptr_mask_expr)
 
         if indexing.has_mask():
-            mask_exprs.append(f"(tensor_mask){tensor_broadcast}")
+            mask_exprs.append(f"({{tensor_mask}}){tensor_broadcast}")
             return expr_from_string(
                 "&".join(mask_exprs), tensor_mask=indexing.mask_expr
             )
@@ -407,7 +407,7 @@ def codegen_load(
 
         dtype = triton_type(tensor_like.dtype)
         return expr_from_string(
-            f"tl.load((base.to(tl.pointer_type({dtype}))){stack_broadcast} + (offset){tensor_broadcast}, mask{extra})",
+            f"tl.load(({{base}}.to(tl.pointer_type({dtype}))){stack_broadcast} + ({{offset}}){tensor_broadcast}, {{mask}}{extra})",
             base=dev_ptrs_ast,
             offset=indexing.index_expr,
             mask=mask_expr,
@@ -439,7 +439,7 @@ def codegen_store(
 
         dtype = triton_type(tensor_like.dtype)
         return expr_from_string(
-            f"tl.store(base.to(tl.pointer_type({dtype})){stack_broadcast} + (offset){tensor_broadcast}, value, mask)",
+            f"tl.store({{base}}.to(tl.pointer_type({dtype})){stack_broadcast} + ({{offset}}){tensor_broadcast}, {{value}}, {{mask}})",
             base=dev_ptrs_ast,
             value=value,
             offset=indexing.index_expr,
@@ -616,7 +616,7 @@ def create(
 
         kwargs = {}
         if extra_mask is not None:
-            mask_values.setdefault("_extra_mask")
+            mask_values.setdefault("{_extra_mask}")
             kwargs["_extra_mask"] = extra_mask
         return SubscriptIndexing(
             expr_from_string("+".join(index_expr)),
@@ -710,13 +710,13 @@ def reshape_load(self, state: CodegenState, node: ast.AST) -> ast.AST:
         if not self.need_reshape(node):
             return node
         shape = state.tile_strategy.shape_str(self.reshaped_size)
-        return expr_from_string(f"tl.reshape(node, {shape})", node=node)
+        return expr_from_string(f"tl.reshape({{node}}, {shape})", node=node)
 
     def reshape_store(self, state: CodegenState, node: ast.AST) -> ast.AST:
         if not self.need_reshape(node):
             return node
         shape = state.tile_strategy.shape_str(self.block_shape)
-        return expr_from_string(f"tl.reshape(node, {shape})", node=node)
+        return expr_from_string(f"tl.reshape({{node}}, {shape})", node=node)
 
     @staticmethod
     def is_supported(
 
@@ -373,7 +373,7 @@ def visit(n: torch.fx.Node) -> None:
                     # Broadcast to force ranks to match
                     expand = ["None"] * (ndim - fake_val.ndim) + [":"] * fake_val.ndim
                     ast_val = expr_from_string(
-                        "tensor[" + ", ".join(expand) + "]", tensor=ast_val
+                        "{tensor}[" + ", ".join(expand) + "]", tensor=ast_val
                     )
             if (
                 isinstance(ast_val, ast.Name)
@@ -796,7 +796,7 @@ def codegen_unsqueeze(ctx: GraphInterpreter, node: torch.fx.Node) -> object:
     args = [":"] * ndim
     args.insert(dim, "None")
     return expr_from_string(
-        f"tensor[{', '.join(args)}]",
+        f"{{tensor}}[{', '.join(args)}]",
         tensor=tensor,
     )
 
@@ -817,7 +817,7 @@ def codegen_view(ctx: GraphInterpreter, node: torch.fx.Node) -> object:
     shape_str = ctx.cg.device_function.tile_strategy.shape_str(
         [*node.meta["val"].size()]
     )
-    return expr_from_string(f"tl.reshape(tensor, {shape_str})", tensor=tensor)
+    return expr_from_string(f"tl.reshape({{tensor}}, {shape_str})", tensor=tensor)
 
 
 @register_lowering(
@@ -831,7 +831,7 @@ def codegen_permute(ctx: GraphInterpreter, node: torch.fx.Node) -> object:
     dims = [*dims]  # pyright: ignore[reportGeneralTypeIssues,reportOptionalIterable]
     assert {*dims} == {*range(len(dims))}, dims
     return expr_from_string(
-        f"tl.permute(tensor, {dims!r})",
+        f"tl.permute({{tensor}}, {dims!r})",
         tensor=tensor,
     )
 
@@ -851,10 +851,12 @@ def codegen_expand(ctx: GraphInterpreter, node: torch.fx.Node) -> object:
         broadcasting = [":"] * len(shape)
         for i in range(len(shape) - node.args[0].meta["val"].ndim):  # pyright: ignore[reportAttributeAccessIssue,reportOptionalMemberAccess]
             broadcasting[i] = "None"
-        tensor = expr_from_string(f"tensor[{', '.join(broadcasting)}]", tensor=tensor)
+        tensor = expr_from_string(
+            f"{{tensor}}[{', '.join(broadcasting)}]", tensor=tensor
+        )
     shape_str = ctx.cg.device_function.tile_strategy.shape_str(shape)
     return expr_from_string(
-        f"tl.broadcast_to(tensor, {shape_str})",
+        f"tl.broadcast_to({{tensor}}, {shape_str})",
         tensor=tensor,
     )
 
@@ -945,7 +947,7 @@ def reduce_3d_dot(
                 f", input_precision={datatype!r}" if datatype is not None else ""
             )
             return expr_from_string(
-                f"tl.dot(lhs, rhs, acc=acc{precision_arg})",
+                f"tl.dot({{lhs}}, {{rhs}}, acc={{acc}}{precision_arg})",
                 lhs=lhs,
                 rhs=rhs,
                 acc=acc,  # pyright: ignore[reportArgumentType]
@@ -954,7 +956,9 @@ def reduce_3d_dot(
         precision_arg = (
             f", input_precision={datatype!r}" if datatype is not None else ""
         )
-        return expr_from_string(f"tl.dot(lhs, rhs{precision_arg})", lhs=lhs, rhs=rhs)
+        return expr_from_string(
+            f"tl.dot({{lhs}}, {{rhs}}{precision_arg})", lhs=lhs, rhs=rhs
+        )
 
     # create reshape, dot, then reshape
     lhs_shape_str = ctx.cg.device_function.tile_strategy.shape_str(
@@ -966,18 +970,18 @@ def reduce_3d_dot(
     out_shape_str = ctx.cg.device_function.tile_strategy.shape_str(
         [*node.meta["val"].size()]
     )
-    lhs_reshape = expr_from_string(f"tl.reshape(lhs, {lhs_shape_str})", lhs=lhs)
-    rhs_reshape = expr_from_string(f"tl.reshape(rhs, {rhs_shape_str})", rhs=rhs)
+    lhs_reshape = expr_from_string(f"tl.reshape({{lhs}}, {lhs_shape_str})", lhs=lhs)
+    rhs_reshape = expr_from_string(f"tl.reshape({{rhs}}, {rhs_shape_str})", rhs=rhs)
     if with_acc:
         acc_shape_str = ctx.cg.device_function.tile_strategy.shape_str(
             [*node.args[0].meta["val"].size()[1:]]  # pyright: ignore[reportAttributeAccessIssue,reportOptionalMemberAccess]
         )
-        acc_reshape = expr_from_string(f"tl.reshape(rhs, {acc_shape_str})", rhs=acc)  # pyright: ignore[reportArgumentType]
+        acc_reshape = expr_from_string(f"tl.reshape({{rhs}}, {acc_shape_str})", rhs=acc)  # pyright: ignore[reportArgumentType]
         precision_arg = (
             f", input_precision={datatype!r}" if datatype is not None else ""
         )
         comp = expr_from_string(
-            f"tl.dot(lhs, rhs, acc=acc{precision_arg})",
+            f"tl.dot({{lhs}}, {{rhs}}, acc={{acc}}{precision_arg})",
             lhs=lhs_reshape,
             rhs=rhs_reshape,
             acc=acc_reshape,
@@ -987,11 +991,11 @@ def reduce_3d_dot(
             f", input_precision={datatype!r}" if datatype is not None else ""
         )
         comp = expr_from_string(
-            f"tl.dot(lhs, rhs{precision_arg})",
+            f"tl.dot({{lhs}}, {{rhs}}{precision_arg})",
             lhs=lhs_reshape,
             rhs=rhs_reshape,
         )
-    return expr_from_string(f"tl.reshape(lhs, {out_shape_str})", lhs=comp)
+    return expr_from_string(f"tl.reshape({{lhs}}, {out_shape_str})", lhs=comp)
 
 
 @register_lowering(torch.ops.aten.bmm.default, apply_dot_requirements)  # pyright: ignore[reportAttributeAccessIssue]
@@ -1122,7 +1126,9 @@ def _create_named_result(self, node: Node, result: ast.expr) -> str:
 
         # Regular variable assignment
         name = self.cg.device_function.new_var(node.name)
-        self.cg.add_statement(statement_from_string(f"{name} = result", result=result))
+        self.cg.add_statement(
+            statement_from_string(f"{name} = {{result}}", result=result)
+        )
         return name
 
     def _collect_multi_outputs(
@@ -1160,7 +1166,7 @@ def _collect_multi_outputs(
             if not isinstance(result, ast.Name):
                 var_name = self.cg.device_function.new_var(f"{node.name}_output{i}")
                 self.cg.add_statement(
-                    statement_from_string(f"{var_name} = result", result=result)
+                    statement_from_string(f"{var_name} = {{result}}", result=result)
                 )
                 result = create(ast.Name, id=var_name, ctx=ast.Load())
             final_outputs.append(result)
@@ -1239,7 +1245,9 @@ def codegen_call_with_graph(
                 # Phi nodes will merge variable names from outside the loop, but the old value
                 # of those variables could have usages.
                 copy_name = cg.device_function.new_var(arg.id + "_copy")
-                cg.add_statement(statement_from_string(f"{copy_name} = arg", arg=arg))
+                cg.add_statement(
+                    statement_from_string(f"{copy_name} = {{arg}}", arg=arg)
+                )
                 new_args.append(expr_from_string(copy_name))
             else:
                 new_args.append(cg.lift(arg))
@@ -1296,11 +1304,11 @@ def codegen_iota(ctx: GraphInterpreter, node: torch.fx.Node) -> object:
     )
     assert isinstance(dtype, torch.dtype)
     (length_arg,) = node.args  # expecting a single argument for length
-    expr = "tl.arange(0, length)"
+    expr = "tl.arange(0, {length})"
     if step != 1:
-        expr = f"step * {expr}"
+        expr = f"{{step}} * {expr}"
     if start != 0:
-        expr = f"start + {expr}"
+        expr = f"{{start}} + {expr}"
     if dtype != torch.int32:
         expr = f"({expr}).to({triton_type(dtype)})"
     return expr_from_string(