added small N PL estimator

charlesmartin14 · charlesmartin14 · commit cde5e26c4cc7 · 2025-11-12T20:57:42.000-08:00
diff --git a/Changelog.txt b/Changelog.txt
@@ -4,6 +4,9 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.7.6]
+Added small_N power law estimator
+
 ## [0.7.5.5]
 Fixed bug in reading large safetensors files
 
diff --git a/README.md b/README.md
@@ -40,7 +40,7 @@ It can be used to:
 
 And in the notebooks provided in the [examples](https://github.com/CalculatedContent/WeightWatcher/tree/master/examples) directory
 
-## Installation:  Version 0.7.5.5
+## Installation:  Version 0.7.6
 
 ```sh
 pip install weightwatcher
diff --git a/examples/MLP3-MNIST-AdamW.ipynb b/examples/MLP3-MNIST-AdamW.ipynb
diff --git a/examples/MLP3-MNIST-Muon.ipynb b/examples/MLP3-MNIST-Muon.ipynb
diff --git a/tests/test.py b/tests/test.py
@@ -4539,8 +4539,7 @@ def test_compute_alphas(self):
         self.assertAlmostEqual(a[1],1.66595, places=3)
         self.assertAlmostEqual(a[3],1.43459, places=3)
 
-        
-        
+    
     #
     # TODO: check if xmax='force' does anything ?
     #
@@ -5594,6 +5593,79 @@ def test_smooth_W_numpy_singular_values(self):
 
 
 
+from weightwatcher.WW_powerlaw import WWFit
+
+
+def sample_pareto(alpha: float, xmin: float, n: int, seed: int = 123) -> np.ndarray:
+    """
+    Sample n values from a continuous Pareto distribution with exponent alpha
+    and minimum value xmin, using inverse-CDF sampling.
+    """
+    rng = np.random.default_rng(seed)
+    u = rng.random(n)
+    # Pareto(xmin, alpha): X = xmin * U^(-1 / (alpha - 1))
+    return xmin * (u ** (-1.0 / (alpha - 1.0)))
+
+
+class TestSmallNPowerLaw(unittest.TestCase):
+    """
+    Unit tests for the small-N power-law fitter (fit_powerlaw_smallN).
+    """
+
+    def test_smalln_path_is_used(self):
+        """
+        Ensure that for small N, WWFit routes through fit_powerlaw_smallN.
+        """
+        # small-N sample, should be below SMALL_N_CUTOFF in WWFit
+        data = sample_pareto(alpha=2.2, xmin=1.0, n=10, seed=42)
+
+        called = {"hit": False}
+
+        # Monkeypatch fit_powerlaw_smallN to detect that it was called
+        original_smallN = WWFit.fit_powerlaw_smallN
+
+        def wrapped_smallN(self, *args, **kwargs):
+            called["hit"] = True
+            return original_smallN(self, *args, **kwargs)
+
+        WWFit.fit_powerlaw_smallN = wrapped_smallN
+        try:
+            _ = WWFit(data, distribution="power_law")
+        finally:
+            # Always restore the original method
+            WWFit.fit_powerlaw_smallN = original_smallN
+
+        self.assertTrue(called["hit"], "fit_powerlaw_smallN was not called for small N")
+
+    def test_smalln_alpha_reasonable_on_pareto(self):
+        """
+        For a small-N Pareto sample with known alpha, the fitted alpha
+        should be in the right ballpark (within a loose tolerance).
+        """
+        true_alpha = 2.2
+        xmin = 1.0
+        n = 10  # small-N regime
+
+        data = sample_pareto(true_alpha, xmin, n, seed=123)
+        fit = WWFit(data, distribution="power_law")  # will use small-N path
+
+        est_alpha = fit.alpha
+        est_xmin = fit.xmin
+        
+        print(f" estimated small N alpha {est_alpha:0.2f}")
+
+        # Sanity checks
+        self.assertTrue(np.isfinite(est_alpha), "Estimated alpha is not finite")
+        self.assertGreater(est_alpha, 1.0, "Estimated alpha must be > 1 for a valid power law")
+        self.assertTrue(np.isfinite(est_xmin), "Estimated xmin is not finite")
+
+        # Loose accuracy check: small N is noisy, so don't demand perfection
+        self.assertLess(
+            abs(est_alpha - true_alpha),
+            0.4,
+            f"Estimated alpha {est_alpha:.3f} too far from true alpha {true_alpha:.3f}",
+        )
+
     
 
 # TODO
diff --git a/weightwatcher/WW_powerlaw.py b/weightwatcher/WW_powerlaw.py
@@ -23,6 +23,7 @@
     'lognormal_positive':       powerlaw.Lognormal_Positive,
 }
 
+SMALL_N_CUTOFF = 20
 
 import logging
 logger = logging.getLogger(WW_NAME) 
@@ -65,6 +66,15 @@ def __str__(self):
         return f"WWFit({self.distribution} xmin: {self.xmin:0.04f}, alpha: {self.alpha:0.04f}, sigma: {self.sigma:0.04f}, data: {len(self.data)})"
 
     def fit_power_law(self):
+        if self.N < SMALL_N_CUTOFF:
+            print("SMALL N PL FIT")
+            logger.info("SMALL N PL FIT")
+            self.fit_powerlaw_smallN()
+            return 
+        
+        return self.fit_power_law_standard()
+
+    def fit_power_law_standard(self):
         log_data    = np. log(self.data, dtype=np.float64)
         self.alphas = np.zeros(self.N-1, dtype=np.float64)
         self.Ds     = np. ones(self.N-1, dtype=np.float64)
@@ -80,6 +90,108 @@ def fit_power_law(self):
                 ))
 
         self.sigmas = (self.alphas - 1) / np.sqrt(self.N - np.arange(self.N-1))
+        
+            
+    
+    def fit_powerlaw_smallN(self, k_min: int = 8, lambda_prior: float = 0.0):
+        """
+        Small-N continuous power-law fit:
+
+          - Bias-corrected MLE: alpha_bc = 1 + (n - 1) / sum_j log(x_j / xmin)
+          - Objective for xmin selection:
+                J = D_ks - 0.868 / sqrt(n_tail) + lambda_prior * prior_pen
+            where prior_pen = (alpha_bc - 2)^2  (ultra-local prior, off if lambda_prior=0)
+
+        No trace-log gate, no eigenvalue rescaling, no lock-to-2.
+        """
+
+        log_data = np.log(self.data, dtype=np.float64)
+
+        # Arrays similar to fit_power_law
+        self.alphas = np.zeros(self.N - 1, dtype=np.float64)
+        self.Ds     = np.ones(self.N - 1, dtype=np.float64)
+        # Objective values (for internal selection)
+        self.Js     = np.full(self.N - 1, np.inf, dtype=np.float64)
+
+        for i, xmin in enumerate(self.data[:-1]):
+            n_int = self.N - i        # tail size as int
+            if n_int < k_min:
+                continue
+            n = float(n_int)
+
+            # sum_j log(x_j / xmin) for j >= i
+            s = np.sum(log_data[i:]) - n * log_data[i]
+            if s <= 1e-12:
+                # degenerate tail; skip
+                continue
+
+            # --- bias-corrected MLE (n-1 correction) ---
+            alpha_bc = 1.0 + (n - 1.0) / s
+            self.alphas[i] = alpha_bc
+
+            if alpha_bc <= 1.0:
+                # invalid exponent for continuous power law; skip
+                continue
+
+            # Tail data for this xmin
+            tail = self.data[i:]
+
+            # Theoretical CDF for continuous power law on [xmin, ∞):
+            # F_fit(x) = 1 - (x/xmin)^(1 - alpha), x >= xmin
+            F_fit = 1.0 - (tail / xmin) ** (1.0 - alpha_bc)
+
+            # Empirical CDF: 0, 1/n, ..., (n-1)/n  (matches your original style)
+            F_emp = np.arange(n_int, dtype=np.float64) / n
+            Dks = float(np.max(np.abs(F_emp - F_fit)))
+            self.Ds[i] = Dks
+
+            # --- Objective 1A: KS-scaled tail-size encouragement ---
+            prior_pen = (alpha_bc - 2.0) ** 2   # ultra-local prior (if lambda_prior > 0)
+            J = Dks - 0.868 / np.sqrt(n) + lambda_prior * prior_pen
+            self.Js[i] = J
+
+        # Sigma like the original code (for reporting)
+        self.sigmas = (self.alphas - 1.0) / np.sqrt(self.N - np.arange(self.N - 1))
+
+        # ----- Choose best xmin by J; no fallback to fit_power_law -----
+        if np.isfinite(self.Js).any():
+            j_best = int(np.nanargmin(self.Js))
+        else:
+            # If k_min was too strict and no candidate survived, use all data as tail (i=0)
+            j_best = 0
+            xmin = self.data[0]
+            n_int = self.N
+            n = float(n_int)
+            s = np.sum(log_data) - n * log_data[0]
+            if s <= 1e-12:
+                # pathological case; keep trivial defaults
+                self.xmin  = xmin
+                self.alpha = 1.0
+                self.sigma = 0.0
+                self.D     = 1.0
+                self.data  = self.data[self.data >= self.xmin]
+                return
+
+            alpha_bc = 1.0 + (n - 1.0) / s
+            self.alphas[j_best] = alpha_bc
+
+            tail = self.data
+            F_fit = 1.0 - (tail / xmin) ** (1.0 - alpha_bc)
+            F_emp = np.arange(n_int, dtype=np.float64) / n
+            Dks = float(np.max(np.abs(F_emp - F_fit)))
+            self.Ds[j_best] = Dks
+
+            prior_pen = (alpha_bc - 2.0) ** 2
+            self.Js[j_best] = Dks - 0.868 / np.sqrt(n) + lambda_prior * prior_pen
+
+        # Commit winner (similar to what __init__ does after fit_power_law)
+        self.xmin  = self.data[j_best]
+        self.alpha = self.alphas[j_best]
+        self.sigma = self.sigmas[j_best]
+        self.D     = self.Ds[j_best]
+
+        # Match powerlaw package behavior: restrict data to data >= xmin
+        self.data = self.data[self.data >= self.xmin]
 
     def __getattr__(self, item):
         """ Needed for replicating the behavior of the powerlaw.Fit class"""
diff --git a/weightwatcher/__init__.py b/weightwatcher/__init__.py
@@ -18,7 +18,7 @@
 
 
 __name__ = "weightwatcher"
-__version__ = "0.7.5.5"
+__version__ = "0.7.6"
 __license__ = "Apache License, Version 2.0"
 __description__ = "Diagnostic Tool for Deep Neural Networks"
 __url__ = "https://calculationconsulting.com/"
diff --git a/weightwatcher/constants.py b/weightwatcher/constants.py
@@ -141,8 +141,8 @@
 
 
 MIN_EVALS = 'min_evals'
-DEFAULT_MIN_EVALS = 10
-MIN_NUM_EVALS = 10
+DEFAULT_MIN_EVALS = 8
+MIN_NUM_EVALS = 8
 
 MAX_EVALS = 'max_evals'
 DEFAULT_MAX_EVALS = 15000