Adding new setting, autotune_max_generations, that allows user to set the maximum number of generations for autotuning (#796)

choijon5 · web-flow · commit 4e7321f8239f · 2025-10-05T10:21:17.000-07:00
diff --git a/docs/api/settings.md b/docs/api/settings.md
@@ -134,6 +134,12 @@ with helion.set_default_settings(
 .. autoattribute:: Settings.autotune_random_seed
 
    Seed used for autotuner random number generation. Defaults to ``HELION_AUTOTUNE_RANDOM_SEED`` if set, otherwise a time-based value.
+
+.. autoattribute:: Settings.autotune_max_generations
+
+   Override the default number of generations set for Pattern Search and Differential Evolution Search autotuning algorithms with HELION_AUTOTUNE_MAX_GENERATIONS=N or @helion.kernel(autotune_max_generations=N).
+
+   Lower values result in faster autotuning but may find less optimal configurations.
 ```
 
 ### Debugging and Development
diff --git a/helion/autotuner/differential_evolution.py b/helion/autotuner/differential_evolution.py
@@ -27,15 +27,15 @@ def __init__(
         kernel: BoundKernel,
         args: Sequence[object],
         population_size: int = 40,
-        num_generations: int = 40,
+        max_generations: int = 40,
         crossover_rate: float = 0.8,
         immediate_update: bool | None = None,
     ) -> None:
         super().__init__(kernel, args)
         if immediate_update is None:
             immediate_update = not kernel.settings.autotune_precompile
         self.population_size = population_size
-        self.num_generations = num_generations
+        self.max_generations = max_generations
         self.crossover_rate = crossover_rate
         self.immediate_update = immediate_update
 
@@ -90,11 +90,11 @@ def _autotune(self) -> Config:
         self.log(
             lambda: (
                 f"Starting DifferentialEvolutionSearch with population={self.population_size}, "
-                f"generations={self.num_generations}, crossover_rate={self.crossover_rate}"
+                f"generations={self.max_generations}, crossover_rate={self.crossover_rate}"
             )
         )
         self.initial_two_generations()
-        for i in range(2, self.num_generations):
+        for i in range(2, self.max_generations):
             replaced = self.evolve_population()
             self.log(f"Generation {i}: replaced={replaced}", self.statistics)
         self.rebenchmark_population()
diff --git a/helion/runtime/settings.py b/helion/runtime/settings.py
@@ -73,6 +73,14 @@ def default_autotuner_fn(
             f"Unknown HELION_AUTOTUNER value: {autotuner_name}, valid options are: "
             f"{', '.join(search_algorithms.keys())}"
         )
+
+    # Use autotune_max_generations from settings if kwarg is not explicitly provided
+    if autotuner_name in ("PatternSearch", "DifferentialEvolutionSearch"):
+        if bound_kernel.settings.autotune_max_generations is not None:
+            kwargs.setdefault(
+                "max_generations", bound_kernel.settings.autotune_max_generations
+            )
+
     return LocalAutotuneCache(autotuner_cls(bound_kernel, args, **kwargs))  # pyright: ignore[reportArgumentType]
 
 
@@ -83,6 +91,13 @@ def _get_autotune_random_seed() -> int:
     return int(time.time() * 1000) % 2**32
 
 
+def _get_autotune_max_generations() -> int | None:
+    value = os.environ.get("HELION_AUTOTUNE_MAX_GENERATIONS")
+    if value is not None:
+        return int(value)
+    return None
+
+
 @dataclasses.dataclass
 class _Settings:
     # see __slots__ below for the doc strings that show up in help(Settings)
@@ -114,6 +129,9 @@ class _Settings:
     autotune_progress_bar: bool = (
         os.environ.get("HELION_AUTOTUNE_PROGRESS_BAR", "1") == "1"
     )
+    autotune_max_generations: int | None = dataclasses.field(
+        default_factory=_get_autotune_max_generations
+    )
     print_output_code: bool = os.environ.get("HELION_PRINT_OUTPUT_CODE", "0") == "1"
     force_autotune: bool = os.environ.get("HELION_FORCE_AUTOTUNE", "0") == "1"
     autotune_config_overrides: dict[str, object] = dataclasses.field(
@@ -149,6 +167,7 @@ class Settings(_Settings):
         "autotune_accuracy_check": "If True, validate candidate configs against the baseline kernel output before accepting them during autotuning.",
         "autotune_rebenchmark_threshold": "If a config is within threshold*best_perf, re-benchmark it to avoid outliers. Default is 1.5x.  Set to <1 to disable.",
         "autotune_progress_bar": "If True, show progress bar during autotuning. Default is True. Set HELION_AUTOTUNE_PROGRESS_BAR=0 to disable.",
+        "autotune_max_generations": "Override the maximum number of generations for Pattern Search and Differential Evolution Search autotuning algorithms with HELION_AUTOTUNE_MAX_GENERATIONS=N or @helion.kernel(autotune_max_generations=N).",
         "print_output_code": "If True, print the output code of the kernel to stderr.",
         "force_autotune": "If True, force autotuning even if a config is provided.",
         "autotune_config_overrides": "Dictionary of config key/value pairs forced during autotuning.",
diff --git a/test/test_autotuner.py b/test/test_autotuner.py
@@ -207,7 +207,7 @@ def test_differential_evolution_search(self):
         bound_kernel = examples_matmul.bind(args)
         random.seed(123)
         best = DifferentialEvolutionSearch(
-            bound_kernel, args, 5, num_generations=3
+            bound_kernel, args, 5, max_generations=3
         ).autotune()
         fn = bound_kernel.compile_config(best)
         torch.testing.assert_close(fn(*args), args[0] @ args[1], rtol=1e-2, atol=1e-1)
@@ -373,6 +373,34 @@ def wrong_fn(*fn_args, **fn_kwargs):
             self.assertEqual(best, good_config)
             self.assertGreaterEqual(search.counters.get("accuracy_mismatch", 0), 1)
 
+    def test_max_generations(self):
+        """Autotuner max generation respects explicit kwargs then setting override."""
+
+        with patch.dict(os.environ, {"HELION_AUTOTUNER": "PatternSearch"}):
+
+            @helion.kernel(autotune_max_generations=1)
+            def add(a, b):
+                out = torch.empty_like(a)
+                for tile in hl.tile(out.size()):
+                    out[tile] = a[tile] + b[tile]
+                return out
+
+            args = (
+                torch.randn([8], device=DEVICE),
+                torch.randn([8], device=DEVICE),
+            )
+
+            bound = add.bind(args)
+            autotuner_factory = bound.settings.autotuner_fn
+
+            # Settings override defaults
+            autotuner = autotuner_factory(bound, args)
+            self.assertEqual(autotuner.autotuner.max_generations, 1)
+
+            # Explicit constructor value wins
+            autotuner_override = autotuner_factory(bound, args, max_generations=2)
+            self.assertEqual(autotuner_override.autotuner.max_generations, 2)
+
     def test_use_default_config(self):
         @helion.kernel(use_default_config=True)
         def add(a, b):