pytorch · choijon5 · Oct 14, 2025 · Oct 14, 2025 · Oct 14, 2025
diff --git a/README.md b/README.md
@@ -275,8 +275,8 @@ implementations from a single Helion kernel.
 ## Settings for Development and Debugging
 
 When developing kernels with Helion, you might prefer skipping autotuning for faster iteration. To
-do this, set the environment variable `HELION_USE_DEFAULT_CONFIG=1` or use the decorator argument
-`@helion.kernel(use_default_config=True)`. **Warning:** The default configuration is slow and not intended for
+do this, set the environment variable `HELION_AUTOTUNE_EFFORT=none` or use the decorator argument
+`@helion.kernel(autotune_effort="none")`. **Warning:** The default configuration is slow and not intended for
 production or performance testing.
 
 To view the generated Triton code, set the environment variable `HELION_PRINT_OUTPUT_CODE=1` or include

diff --git a/benchmarks/run.py b/benchmarks/run.py
@@ -880,9 +880,9 @@ def helion_method(
                     attr = getattr(mod, attr_name)
                     if isinstance(attr, Kernel):
                         attr.reset()
-                        # Force autotuning unless HELION_USE_DEFAULT_CONFIG=1 is set
+                        # Force autotuning unless HELION_AUTOTUNE_EFFORT=none is set
                         # This ensures we run autotuning even if the kernel has pre-specified configs
-                        if os.environ.get("HELION_USE_DEFAULT_CONFIG", "0") != "1":
+                        if os.environ.get("HELION_AUTOTUNE_EFFORT", "") != "none":
                             # Only force full autotuning if no configs are provided
                             if not attr.configs:
                                 attr.settings.force_autotune = True

diff --git a/docs/api/config.md b/docs/api/config.md
@@ -27,7 +27,7 @@ The `Config` class represents kernel optimization parameters that control how He
 |--------|--------|----------|
 | **Purpose** | Control execution performance | Control compilation behavior |
 | **Autotuning** | ✅ Automatically optimized | ❌ Never autotuned |
-| **Examples** | `block_sizes`, `num_warps`, `indexing` | `print_output_code`, `use_default_config` |
+| **Examples** | `block_sizes`, `num_warps`, `indexing` | `print_output_code`, `autotune_effort` |
 | **When to use** | Performance optimization | Development, debugging, environment setup |
 
 

diff --git a/docs/api/kernel.md b/docs/api/kernel.md
@@ -47,7 +47,7 @@ c = vector_add(a, b)  # Automatically compiles and executes
 
 ```python
 @helion.kernel(
-    use_default_config=True,    # Skip autotuning
+    autotune_effort="none",    # Skip autotuning
     print_output_code=True      # Debug generated code
 )
 def my_kernel(x: torch.Tensor) -> torch.Tensor:
@@ -154,7 +154,7 @@ Settings control **how the kernel is compiled** and the development environment:
 ```python
 @helion.kernel(
     # Settings parameters
-    use_default_config=True,      # Skip autotuning for development
+    autotune_effort="none",      # Skip autotuning for development
     autotune_effort="quick",     # Smaller autotuning budget when search is enabled
     print_output_code=True,       # Debug: show generated Triton code
     static_shapes=True,           # Compilation optimization strategy

diff --git a/docs/api/settings.md b/docs/api/settings.md
@@ -27,7 +27,7 @@ The `Settings` class controls compilation behavior and debugging options for Hel
 |--------|----------|--------|
 | **Purpose** | Control compilation behavior | Control execution performance |
 | **Autotuning** | ❌ Never autotuned | ✅ Automatically optimized |
-| **Examples** | `print_output_code`, `use_default_config` | `block_sizes`, `num_warps` |
+| **Examples** | `print_output_code`, `autotune_effort` | `block_sizes`, `num_warps` |
 | **When to use** | Development, debugging, environment setup | Performance optimization |
 
 Settings can be configured via:
@@ -41,7 +41,7 @@ Settings can be configured via:
 ### Using Environment Variables
 
 ```bash
-env HELION_PRINT_OUTPUT_CODE=1  HELION_USE_DEFAULT_CONFIG=1 my_kernel.py
+env HELION_PRINT_OUTPUT_CODE=1  HELION_AUTOTUNE_EFFORT=none my_kernel.py
 ```
 
 ### Using Decorator Arguments
@@ -52,7 +52,7 @@ import helion
 import helion.language as hl
 
 @helion.kernel(
-    use_default_config=True,           # Skip autotuning
+    autotune_effort="none",           # Skip autotuning
     print_output_code=True,            # Debug output
 )
 def my_kernel(x: torch.Tensor) -> torch.Tensor:
@@ -104,10 +104,6 @@ with helion.set_default_settings(
 ### Autotuning Settings
 
 ```{eval-rst}
-.. autoattribute:: Settings.use_default_config
-
-   Skip autotuning and use default configuration. Default is ``False``. Controlled by ``HELION_USE_DEFAULT_CONFIG=1``.
-
 .. autoattribute:: Settings.force_autotune
 
    Force autotuning even when explicit configs are provided. Default is ``False``. Controlled by ``HELION_FORCE_AUTOTUNE=1``.
@@ -165,7 +161,7 @@ with helion.set_default_settings(
 
    Select the autotuning effort preset. Available values:
 
-   - ``"none"`` – skip autotuning and run the default configuration (equivalent to ``use_default_config=True``).
+   - ``"none"`` – skip autotuning and run the default configuration.
    - ``"quick"`` – limited search for faster runs with decent performance.
    - ``"full"`` – exhaustive autotuning (current default behavior).
 
@@ -234,12 +230,12 @@ Built-in values for ``HELION_AUTOTUNER`` include ``"PatternSearch"``, ``"Differe
 | Environment Variable | Maps To | Description |
 |----------------------|---------|-------------|
 | ``TRITON_F32_DEFAULT`` | ``dot_precision`` | Sets default floating-point precision for Triton dot products (``"tf32"``, ``"tf32x3"``, ``"ieee"``). |
-| ``HELION_USE_DEFAULT_CONFIG`` | ``use_default_config`` | Skip autotuning entirely and rely on the default (debug) configuration. |
 | ``HELION_FORCE_AUTOTUNE`` | ``force_autotune`` | Force the autotuner to run even when explicit configs are provided. |
 | ``HELION_AUTOTUNE_COMPILE_TIMEOUT`` | ``autotune_compile_timeout`` | Maximum seconds to wait for Triton compilation during autotuning. |
 | ``HELION_AUTOTUNE_RANDOM_SEED`` | ``autotune_random_seed`` | Seed used for randomized autotuning searches. |
 | ``HELION_AUTOTUNE_MAX_GENERATIONS`` | ``autotune_max_generations`` | Upper bound on generations for Pattern Search and Differential Evolution. |
 | ``HELION_AUTOTUNE_ACCURACY_CHECK`` | ``autotune_accuracy_check`` | Toggle baseline validation for candidate configs. |
+| ``HELION_AUTOTUNE_EFFORT`` | ``autotune_effort`` | Select autotuning preset (``"none"``, ``"quick"``, ``"full"``). |
 | ``HELION_REBENCHMARK_THRESHOLD`` | ``autotune_rebenchmark_threshold`` | Re-run configs whose performance is within a multiplier of the current best. |
 | ``HELION_AUTOTUNE_PROGRESS_BAR`` | ``autotune_progress_bar`` | Enable or disable the progress bar UI during autotuning. |
 | ``HELION_PRINT_OUTPUT_CODE`` | ``print_output_code`` | Print generated Triton code to stderr for inspection. |

diff --git a/docs/index.md b/docs/index.md
@@ -212,7 +212,7 @@ Example combining both:
 ```python
 @helion.kernel(
     # Settings: Control compilation behavior
-    use_default_config=True,      # Skip autotuning for development
+    autotune_effort="none",      # Skip autotuning for development
     print_output_code=True,       # Debug: show generated code
     # Config: Control GPU execution (when not using default)
     # config=helion.Config(block_sizes=[64, 32], num_warps=8)
@@ -225,8 +225,8 @@ def debug_kernel(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 ## Settings for Development and Debugging
 
 When developing kernels with Helion, you might prefer skipping autotuning for faster iteration. To
-do this, set the environment variable `HELION_USE_DEFAULT_CONFIG=1` or use the decorator argument
-`@helion.kernel(use_default_config=True)`. **Warning:** The default configuration is slow and not intended for
+do this, set the environment variable `HELION_AUTOTUNE_EFFORT=none` or use the decorator argument
+`@helion.kernel(autotune_effort="none")`. **Warning:** The default configuration is slow and not intended for
 production or performance testing.
 
 To view the generated Triton code, set the environment variable `HELION_PRINT_OUTPUT_CODE=1` or include

diff --git a/docs/installation.md b/docs/installation.md
@@ -133,7 +133,7 @@ import torch
 import helion
 import helion.language as hl
 
-@helion.kernel(use_default_config=True)
+@helion.kernel(autotune_effort="none")
 def test_kernel(x: torch.Tensor) -> torch.Tensor:
     out = torch.empty_like(x)
     for tile in hl.tile(x.shape[0]):

diff --git a/examples/fp8_gemm.py b/examples/fp8_gemm.py
@@ -22,7 +22,7 @@
 # Override default config to work around Triton tl.dot requirement:
 # `AssertionError: Input shapes should have M >= 16, N >= 16 and K >= 32`
 config = None
-if os.environ.get("HELION_USE_DEFAULT_CONFIG") == "1":
+if os.environ.get("HELION_AUTOTUNE_EFFORT") == "none":
     config = helion.Config(block_sizes=[32, 32, 32])
 
 

diff --git a/examples/jagged_layer_norm.py b/examples/jagged_layer_norm.py
@@ -33,7 +33,7 @@
 
 
 # %%
-@helion.kernel(use_default_config=True)
+@helion.kernel(autotune_effort="none")
 def jagged_layer_norm_kernel(
     x_values: torch.Tensor,  # [total_L, M] - compressed values
     x_offsets: torch.Tensor,  # [B+1] - sequence start offsets

diff --git a/helion/runtime/settings.py b/helion/runtime/settings.py
@@ -222,14 +222,6 @@ def __init__(self, **settings: object) -> None:
             settings: Keyword arguments representing various settings.
         """
 
-        # Translate use_default_config to autotune_effort='none' for backward compatibility
-        if (
-            settings.get("use_default_config")
-            or os.environ.get("HELION_USE_DEFAULT_CONFIG") == "1"
-        ):
-            settings.setdefault("autotune_effort", "none")
-        settings.pop("use_default_config", None)
-
         if defaults := getattr(_tls, "default_settings", None):
             settings = {**defaults.to_dict(), **settings}