diff --git a/helion/_testing.py b/helion/_testing.py index 6b56a1adc..07cf55894 100644 --- a/helion/_testing.py +++ b/helion/_testing.py @@ -209,6 +209,8 @@ def save(self) -> None: f"--- assertExpectedJournal({name})\n{expected}\n\n" for expected in expected_values ) + # Remove the last newline to play nicer with some people's editors + f.truncate(f.tell() - 1) os.rename(tmp, self.filename) @staticmethod diff --git a/test/test_associative_scan.expected b/test/test_associative_scan.expected index 25e577690..44919a446 100644 --- a/test/test_associative_scan.expected +++ b/test/test_associative_scan.expected @@ -1648,4 +1648,3 @@ def _test_cumsum_reverse_kernel_make_precompiler(x: torch.Tensor): _RDIM_SIZE_1 = triton.next_power_of_2(x.size(1)) from helion.runtime.precompile_shim import make_precompiler return make_precompiler(_test_cumsum_reverse_kernel_kernel)(x, result, x.size(1), result.stride(1), x.stride(1), _RDIM_SIZE_1, num_warps=4, num_stages=3) - diff --git a/test/test_atomic_add.expected b/test/test_atomic_add.expected index b46fc923a..2d84db281 100644 --- a/test/test_atomic_add.expected +++ b/test/test_atomic_add.expected @@ -147,4 +147,3 @@ def _atomic_add_overlap_kernel_make_precompiler(x: torch.Tensor, y: torch.Tensor _BLOCK_SIZE_0 = 32 from helion.runtime.precompile_shim import make_precompiler return make_precompiler(_atomic_add_overlap_kernel_kernel)(indices, y, x, _BLOCK_SIZE_0, num_warps=4, num_stages=3) - diff --git a/test/test_broadcasting.expected b/test/test_broadcasting.expected index 8304b0a6b..a05121513 100644 --- a/test/test_broadcasting.expected +++ b/test/test_broadcasting.expected @@ -295,4 +295,3 @@ def _fn_make_precompiler(a, b): _BLOCK_SIZE_1 = 16 from helion.runtime.precompile_shim import make_precompiler return make_precompiler(_fn_kernel)(a, b, out, a.size(0), a.size(1), a.stride(0), a.stride(1), b.stride(0), out.stride(0), out.stride(1), _BLOCK_SIZE_0, _BLOCK_SIZE_1, num_warps=4, num_stages=3) - diff --git a/test/test_closures.expected b/test/test_closures.expected index 3e03b9fc4..4b250f190 100644 --- a/test/test_closures.expected +++ b/test/test_closures.expected @@ -205,4 +205,3 @@ def _call_func_arg_on_host_make_precompiler(a, alloc): _BLOCK_SIZE_0 = 512 from helion.runtime.precompile_shim import make_precompiler return make_precompiler(_call_func_arg_on_host_kernel)(a, out, a.size(0), a.stride(0), out.stride(0), _BLOCK_SIZE_0, num_warps=4, num_stages=3) - diff --git a/test/test_constexpr.expected b/test/test_constexpr.expected index 487379539..f100f3bab 100644 --- a/test/test_constexpr.expected +++ b/test/test_constexpr.expected @@ -116,4 +116,3 @@ def _fn_make_precompiler(x: torch.Tensor, s: hl.constexpr): _BLOCK_SIZE_1 = 16 from helion.runtime.precompile_shim import make_precompiler return make_precompiler(_fn_kernel)(x, out, out.stride(0), out.stride(1), x.stride(0), b, _BLOCK_SIZE_0, _BLOCK_SIZE_1, num_warps=4, num_stages=3) - diff --git a/test/test_control_flow.expected b/test/test_control_flow.expected index 8e89f957e..9bd0fdb35 100644 --- a/test/test_control_flow.expected +++ b/test/test_control_flow.expected @@ -214,4 +214,3 @@ def _fn_make_precompiler(x: torch.Tensor, y: torch.Tensor): output = torch.zeros_like(x) from helion.runtime.precompile_shim import make_precompiler return make_precompiler(_fn_kernel)(x, y, output, output.stride(0), x.stride(0), y.stride(0), num_warps=4, num_stages=3) - diff --git a/test/test_generate_ast.expected b/test/test_generate_ast.expected index 0a3ee96e4..6c45de372 100644 --- a/test/test_generate_ast.expected +++ b/test/test_generate_ast.expected @@ -548,4 +548,3 @@ def _torch_ops_pointwise_make_precompiler(x, y): _BLOCK_SIZE_0 = 128 from helion.runtime.precompile_shim import make_precompiler return make_precompiler(_torch_ops_pointwise_kernel)(x, y, out, x.size(0), out.stride(0), x.stride(0), y.stride(0), _BLOCK_SIZE_0, num_warps=4, num_stages=3) - diff --git a/test/test_indexing.expected b/test/test_indexing.expected index 3919d2b23..abc5bd5d8 100644 --- a/test/test_indexing.expected +++ b/test/test_indexing.expected @@ -171,4 +171,3 @@ def _pairwise_add_make_precompiler(x: torch.Tensor): _BLOCK_SIZE_0 = 32 from helion.runtime.precompile_shim import make_precompiler return make_precompiler(_pairwise_add_kernel)(out, x, out.size(0), out.stride(0), x.stride(0), _BLOCK_SIZE_0, num_warps=4, num_stages=3) - diff --git a/test/test_reduce.expected b/test/test_reduce.expected index 24aef8100..e36ac00f4 100644 --- a/test/test_reduce.expected +++ b/test/test_reduce.expected @@ -590,4 +590,3 @@ def _test_reduce_keep_dims_kernel_make_precompiler(x: torch.Tensor): _RDIM_SIZE_1 = triton.next_power_of_2(x.size(1)) from helion.runtime.precompile_shim import make_precompiler return make_precompiler(_test_reduce_keep_dims_kernel_kernel)(x, result, x.size(0), x.size(1), result.stride(0), x.stride(0), x.stride(1), _BLOCK_SIZE_0, _RDIM_SIZE_1, num_warps=4, num_stages=3) - diff --git a/test/test_specialize.expected b/test/test_specialize.expected index 4ef67bdcc..c0da5d9eb 100644 --- a/test/test_specialize.expected +++ b/test/test_specialize.expected @@ -184,4 +184,3 @@ def _fn_make_precompiler(x: torch.Tensor): _BLOCK_SIZE_0_1 = 32 from helion.runtime.precompile_shim import make_precompiler return make_precompiler(_fn_kernel)(x, out, x.size(0), x.size(1), out.stride(0), out.stride(1), x.stride(0), x.stride(1), scale, _BLOCK_SIZE_0_1, num_warps=4, num_stages=3) - diff --git a/test/test_type_propagation.expected b/test/test_type_propagation.expected index cfac32bec..3b6f7f0cc 100644 --- a/test/test_type_propagation.expected +++ b/test/test_type_propagation.expected @@ -822,4 +822,3 @@ def root_graph_0(): out: "i32[s77]" = helion_language__tracing_ops__host_tensor('out') store = helion_language_memory_ops_store(out, [block_size_0], convert_element_type, None); out = block_size_0 = convert_element_type = store = None return None - diff --git a/test/test_views.expected b/test/test_views.expected index 0b5346ce9..54b6948c0 100644 --- a/test/test_views.expected +++ b/test/test_views.expected @@ -117,4 +117,3 @@ def _fn_make_precompiler(x: torch.Tensor, y: torch.Tensor): _BLOCK_SIZE_1 = 32 from helion.runtime.precompile_shim import make_precompiler return make_precompiler(_fn_kernel)(x, y, out, out.size(0), out.size(1), x.size(0), x.size(1), y.size(0), out.stride(0), out.stride(1), x.stride(0), x.stride(1), y.stride(0), y.stride(1), _BLOCK_SIZE_0, _BLOCK_SIZE_1, num_warps=4, num_stages=3) -