Fix hl.rand to use tile specific offsets instead of fixed offsets, ensure unique random num per tile

karthickai · karthickai · commit 521dcc7f7dc4 · 2025-09-25T12:49:31.000-07:00
stack-info: PR: #685, branch: karthickai/stack/3
diff --git a/helion/language/random_ops.py b/helion/language/random_ops.py
@@ -81,12 +81,24 @@ def _rand_codegen(state: CodegenState) -> ast.AST:
     fake_value = state.fake_value
     assert isinstance(fake_value, torch.Tensor)
     shape_str = state.device_function.tile_strategy.shape_str(fake_value.size())
-
-    numel = " * ".join(shape_str.strip("[]").split(","))
     seed_ast = state.ast_arg(1)
-    offs_expr = f"tl.arange(0, {numel}).reshape({shape_str})"
+    offs_expr = None
+    env = CompileEnvironment.current()
+    for size in fake_value.size():
+        block_id = env.get_block_id(size)
+        if block_id is not None:
+            if len(fake_value.size()) == 1:
+                # 1D: use indices_0 directly, it already has the right values
+                index_var = state.codegen.index_var(block_id)
+                offs_expr = f"{index_var}.reshape({shape_str})"
+            else:
+                # N_D: use offset_0 + full range
+                offset_var = state.codegen.offset_var(block_id)
+                numel = " * ".join(shape_str.strip("[]").split(","))
+                offs_expr = (
+                    f"({offset_var} + tl.arange(0, {numel})).reshape({shape_str})"
+                )
     expr = f"tl.rand({{seed}}, {offs_expr})"
-
     return expr_from_string(expr, seed=seed_ast)
 
 
diff --git a/test/test_rng.py b/test/test_rng.py
@@ -366,7 +366,7 @@ def rand_kernel_tiled_1d(x: torch.Tensor, seed: int) -> torch.Tensor:
             "Different seeds should produce different outputs",
         )
 
-        _, output3 = code_and_output(rand_kernel_tiled_1d, (x_small, 42))
+        code3, output3 = code_and_output(rand_kernel_tiled_1d, (x_small, 42))
         self.assertTrue(
             torch.allclose(output, output3),
             "Same seed should produce identical outputs",
@@ -376,6 +376,8 @@ def rand_kernel_tiled_1d(x: torch.Tensor, seed: int) -> torch.Tensor:
         self.assertTrue(torch.all(output >= 0.0), "All values should be >= 0")
         self.assertTrue(torch.all(output < 1.0), "All values should be < 1")
 
+        self.assertIn("tl.rand(seed, indices_0", code3)
+
     def test_hl_rand_2d(self):
         @helion.kernel
         def rand_kernel_tiled_2d(x: torch.Tensor, seed: int) -> torch.Tensor:
@@ -394,14 +396,15 @@ def rand_kernel_tiled_2d(x: torch.Tensor, seed: int) -> torch.Tensor:
             "Different seeds should produce different outputs",
         )
 
-        _, output3 = code_and_output(rand_kernel_tiled_2d, (x_small, 42))
+        code3, output3 = code_and_output(rand_kernel_tiled_2d, (x_small, 42))
         self.assertTrue(
             torch.allclose(output, output3),
             "Same seed should produce identical outputs",
         )
 
         self.assertTrue(torch.all(output >= 0.0), "All values should be >= 0")
         self.assertTrue(torch.all(output < 1.0), "All values should be < 1")
+        self.assertIn("tl.rand(seed, (offset_0 + tl.arange(0,", code3)
 
     def test_hl_rand_3d(self):
         @helion.kernel
@@ -423,7 +426,7 @@ def rand_kernel_tiled_3d(x: torch.Tensor, seed: int) -> torch.Tensor:
             "Different seeds should produce different outputs",
         )
 
-        _, output3 = code_and_output(rand_kernel_tiled_3d, (x_small, 42))
+        code3, output3 = code_and_output(rand_kernel_tiled_3d, (x_small, 42))
         self.assertTrue(
             torch.allclose(output, output3),
             "Same seed should produce identical outputs",
@@ -438,6 +441,7 @@ def rand_kernel_tiled_3d(x: torch.Tensor, seed: int) -> torch.Tensor:
             0.4 < mean_val < 0.6,
             f"Mean {mean_val:.3f} should be around 0.5 for uniform distribution",
         )
+        self.assertIn("tl.rand(seed, (offset_0 + tl.arange(0,", code3)
 
 
 if __name__ == "__main__":