diff --git a/maths/sieve_of_atkin.py b/maths/sieve_of_atkin.py new file mode 100644 index 000000000000..e1f57706655f --- /dev/null +++ b/maths/sieve_of_atkin.py @@ -0,0 +1,75 @@ +import math + + +def sieve_of_atkin(limit: int) -> list[int]: + """ + Compute all prime numbers up to the given limit using the Sieve of Atkin. + + Parameters + ---------- + limit : int + Upper bound of primes to generate (inclusive). + + Returns + ------- + list[int] + A list of prime numbers <= limit. + + Raises + ------ + ValueError + If limit is not an integer >= 2. + + References + ---------- + https://en.wikipedia.org/wiki/Sieve_of_Atkin + + Examples + -------- + >>> sieve_of_atkin(10) + [2, 3, 5, 7] + >>> sieve_of_atkin(1) + Traceback (most recent call last): + ... + ValueError: limit must be an integer >= 2 + """ + if not isinstance(limit, int) or limit < 2: + raise ValueError("limit must be an integer >= 2") + + # Initialize the sieve array + sieve = [False] * (limit + 1) + results: list[int] = [] + + # Preliminary marking based on quadratic forms + sqrt_limit = int(math.sqrt(limit)) + 1 + for x in range(1, sqrt_limit): + for y in range(1, sqrt_limit): + n = 4 * x * x + y * y + if n <= limit and n % 12 in (1, 5): + sieve[n] = not sieve[n] + n = 3 * x * x + y * y + if n <= limit and n % 12 == 7: + sieve[n] = not sieve[n] + n = 3 * x * x - y * y + if x > y and n <= limit and n % 12 == 11: + sieve[n] = not sieve[n] + + # Mark all multiples of squares as non-prime + for n in range(5, sqrt_limit): + if sieve[n]: + step = n * n + for k in range(step, limit + 1, step): + sieve[k] = False + + # Compile the list of primes + if limit >= 2: + results.extend([2, 3]) + results.extend([i for i in range(5, limit + 1) if sieve[i]]) + return results + + +if __name__ == "__main__": + import doctest + + doctest.testmod() + print("All doctests passed!") diff --git a/maths/test_sieve_of_atkin.py b/maths/test_sieve_of_atkin.py new file mode 100644 index 000000000000..4b668aa3876b --- /dev/null +++ b/maths/test_sieve_of_atkin.py @@ -0,0 +1,16 @@ +import pytest + +from maths.sieve_of_atkin import sieve_of_atkin + + +def test_small_primes(): + assert sieve_of_atkin(10) == [2, 3, 5, 7] + + +def test_medium_primes(): + assert sieve_of_atkin(20) == [2, 3, 5, 7, 11, 13, 17, 19] + + +def test_invalid_limit(): + with pytest.raises(ValueError): + sieve_of_atkin(1) diff --git a/strings/suffix_array.py b/strings/suffix_array.py new file mode 100644 index 000000000000..f2b786e7bd2a --- /dev/null +++ b/strings/suffix_array.py @@ -0,0 +1,66 @@ +def build_suffix_array(s: str) -> list[int]: + # Append a sentinel that is lexicographically smaller than all other characters + s += "\0" + n = len(s) + # Initial ranking by character code + ranks = [ord(c) for c in s] + sa = list(range(n)) + tmp = [0] * n + k = 1 + # Doubling loop + while k < n: + # Sort by (rank[i], rank[i+k]) pairs + sa.sort(key=lambda i: (ranks[i], ranks[i + k] if i + k < n else -1)) + # Temporary array for new ranks + tmp[sa[0]] = 0 + for i in range(1, n): + prev, curr = sa[i - 1], sa[i] + # Compare pair (rank, next rank) + r_prev = (ranks[prev], ranks[prev + k] if prev + k < n else -1) + r_curr = (ranks[curr], ranks[curr + k] if curr + k < n else -1) + tmp[curr] = tmp[prev] + (1 if r_curr != r_prev else 0) + ranks, tmp = tmp, ranks # reuse lists to save memory + k <<= 1 + if ranks[sa[-1]] == n - 1: + break + # Drop the sentinel index + return sa[1:] + + +def build_lcp_array(s: str, sa: list[int]) -> list[int]: + n = len(sa) + # Inverse of suffix array: pos[i] gives rank of suffix at i + pos = [0] * n + for i, suf in enumerate(sa): + pos[suf] = i + lcp = [0] * n + k = 0 + for i in range(len(s)): + if pos[i] == 0: + k = 0 + continue + j = sa[pos[i] - 1] + # Compare characters starting from k + while i + k < len(s) and j + k < len(s) and s[i + k] == s[j + k]: + k += 1 + lcp[pos[i]] = k + if k: + k -= 1 + return lcp[1:] + + +if __name__ == "__main__": + # Example usage and simple tests + test_strings = ["banana", "abracadabra", "mississippi"] + for s in test_strings: + sa = build_suffix_array(s) + lcp = build_lcp_array(s, sa) + print(f"String: {s}") + print(f"Suffix Array: {sa}") + print(f"LCP Array : {lcp}\n") + + # Assertions for correctness + s = "banana" + expected_sa = [5, 3, 1, 0, 4, 2] # indices of sorted suffixes + assert build_suffix_array(s) == expected_sa, "SA test failed" + print("All tests passed!")