diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..59d0684
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,2 @@
+BasedOnStyle: Google
+SortIncludes: Never
diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml
new file mode 100644
index 0000000..0241386
--- /dev/null
+++ b/.github/workflows/linter.yml
@@ -0,0 +1,38 @@
+name: Linter
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+    paths-ignore:
+      - '**.md'
+      - 'docs/**'
+  push:
+    branches:
+      - main
+    paths-ignore:
+      - '**.md'
+      - 'docs/**'
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+
+      - name: Run clang-format
+        uses: jidicula/clang-format-action@c74383674bf5f7c69f60ce562019c1c94bc1421a # v4.13.0
+        with:
+          clang-format-version: '17'
+          fallback-style: 'Google'
+
+      - uses: chartboost/ruff-action@e18ae971ccee1b2d7bbef113930f00c670b78da4 # v1.0.0
+        name: Lint with Ruff
+        with:
+          version: 0.5.1
diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml
new file mode 100644
index 0000000..8285461
--- /dev/null
+++ b/.github/workflows/macos.yml
@@ -0,0 +1,33 @@
+name: macos CI
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+    paths-ignore:
+      - '**.md'
+      - 'docs/**'
+  push:
+    branches:
+      - main
+    paths-ignore:
+      - '**.md'
+      - 'docs/**'
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  ubuntu-build:
+    runs-on: macos-latest
+    steps:
+      - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4
+      - name: Prepare
+        run: cmake -B build
+      - name: Build
+        run: cmake --build build -j=2
+      - name: Test
+        run: ctest --output-on-failure --test-dir build
diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml
new file mode 100644
index 0000000..6b5c572
--- /dev/null
+++ b/.github/workflows/ubuntu.yml
@@ -0,0 +1,41 @@
+name: Ubuntu 24.04 CI
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+    paths-ignore:
+      - '**.md'
+      - 'docs/**'
+  push:
+    branches:
+      - main
+    paths-ignore:
+      - '**.md'
+      - 'docs/**'
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  ubuntu-build:
+    runs-on: ubuntu-24.04
+    strategy:
+      matrix:
+        shared: [ON, OFF]
+        cxx: [g++-14]
+    steps:
+      - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4
+      - name: Setup Ninja
+        run: sudo apt-get install ninja-build
+      - name: Prepare
+        run: cmake -DBUILD_SHARED_LIBS=${{matrix.shared}} -G Ninja -B build
+        env:
+          CXX: ${{matrix.cxx}}
+      - name: Build
+        run: cmake --build build -j=2
+      - name: Test
+        run: ctest --output-on-failure --test-dir build
diff --git a/.github/workflows/visual-studio.yml b/.github/workflows/visual-studio.yml
new file mode 100644
index 0000000..34a5fe3
--- /dev/null
+++ b/.github/workflows/visual-studio.yml
@@ -0,0 +1,42 @@
+name: VS17 CI
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+    paths-ignore:
+      - '**.md'
+      - 'docs/**'
+  push:
+    branches:
+      - main
+    paths-ignore:
+      - '**.md'
+      - 'docs/**'
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  ci:
+    name: windows-vs17
+    runs-on: windows-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - {gen: Visual Studio 17 2022, arch: x64, config: Release}
+          - {gen: Visual Studio 17 2022, arch: x64, config: Debug}
+    steps:
+    - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4
+    - name: Configure
+      run: |
+        cmake -G "${{matrix.gen}}" -A ${{matrix.arch}}  -B build
+    - name: Build
+      run: cmake --build build --config "${{matrix.config}}" --verbose
+    - name: Run  tests
+      working-directory: build
+      run: ctest -C "${{matrix.config}}"  --output-on-failure
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f1a825f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+cmake-build-debug
+build
diff --git a/.python-version b/.python-version
new file mode 100644
index 0000000..e4fba21
--- /dev/null
+++ b/.python-version
@@ -0,0 +1 @@
+3.12
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..f2efa3c
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,42 @@
+cmake_minimum_required(VERSION 3.28)
+project(nbytes)
+
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED True)
+if (NOT CMAKE_BUILD_TYPE)
+  message(STATUS "No build type selected, default to Release")
+  set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
+endif()
+
+option(NBYTES_DEVELOPMENT_CHECKS "Enable development checks" OFF)
+
+include(GNUInstallDirs)
+include(FetchContent)
+
+FetchContent_Declare(
+  googletest
+  URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
+)
+# For Windows: Prevent overriding the parent project's compiler/linker settings
+set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+FetchContent_MakeAvailable(googletest)
+
+add_subdirectory(src)
+enable_testing()
+add_subdirectory(tests)
+
+install(
+  FILES include/nbytes.h
+  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
+  COMPONENT nbytes_development
+)
+
+install(
+  TARGETS nbytes
+  EXPORT nbytes_targets
+  RUNTIME COMPONENT nbytes_runtime
+  LIBRARY COMPONENT nbytes_runtime
+  NAMELINK_COMPONENT nbytes_development
+  ARCHIVE COMPONENT nbytes_development
+  INCLUDES DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
+)
diff --git a/README.md b/README.md
index 976c4f0..abafc11 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,16 @@
 # nbytes
+
 A library of byte handling functions extracted from Node.js core
+
+## Building
+
+```bash
+cmake -B build
+cmake --build build
+```
+
+## Testing
+
+```bash
+ctest --test-dir build
+```
diff --git a/include/nbytes.h b/include/nbytes.h
new file mode 100644
index 0000000..28c7bf9
--- /dev/null
+++ b/include/nbytes.h
@@ -0,0 +1,840 @@
+#ifndef NBYTES_H
+#define NBYTES_H
+#include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <string>
+
+namespace nbytes {
+
+#if NBYTES_DEVELOPMENT_CHECKS
+#define NBYTES_STR(x) #x
+#define NBYTES_REQUIRE(EXPR) \
+  {                          \
+    if (!(EXPR) { abort(); }) }
+
+#define NBYTES_FAIL(MESSAGE)                         \
+  do {                                               \
+    std::cerr << "FAIL: " << (MESSAGE) << std::endl; \
+    abort();                                         \
+  } while (0);
+#define NBYTES_ASSERT_EQUAL(LHS, RHS, MESSAGE)                                 \
+  do {                                                                         \
+    if (LHS != RHS) {                                                          \
+      std::cerr << "Mismatch: '" << LHS << "' - '" << RHS << "'" << std::endl; \
+      NBYTES_FAIL(MESSAGE);                                                    \
+    }                                                                          \
+  } while (0);
+#define NBYTES_ASSERT_TRUE(COND)                                            \
+  do {                                                                      \
+    if (!(COND)) {                                                          \
+      std::cerr << "Assert at line " << __LINE__ << " of file " << __FILE__ \
+                << std::endl;                                               \
+      NBYTES_FAIL(NBYTES_STR(COND));                                        \
+    }                                                                       \
+  } while (0);
+#else
+#define NBYTES_FAIL(MESSAGE)
+#define NBYTES_ASSERT_EQUAL(LHS, RHS, MESSAGE)
+#define NBYTES_ASSERT_TRUE(COND)
+#endif
+
+[[noreturn]] inline void unreachable() {
+#ifdef __GNUC__
+  __builtin_unreachable();
+#elif defined(_MSC_VER)
+  __assume(false);
+#else
+#endif
+}
+
+// The nbytes (short for "node bytes") is a set of utility helpers for
+// working with bytes that are extracted from Node.js' internals. The
+// motivation for extracting these into a separate library is to make it
+// easier for other projects to implement functionality that is compatible
+// with Node.js' implementation of various byte manipulation functions.
+
+// Round up a to the next highest multiple of b.
+template <typename T>
+constexpr T RoundUp(T a, T b) {
+  return a % b != 0 ? a + b - (a % b) : a;
+}
+
+// Align ptr to an `alignment`-bytes boundary.
+template <typename T, typename U>
+constexpr T *AlignUp(T *ptr, U alignment) {
+  return reinterpret_cast<T *>(
+      RoundUp(reinterpret_cast<uintptr_t>(ptr), alignment));
+}
+
+template <typename T, typename U>
+inline T AlignDown(T value, U alignment) {
+  return reinterpret_cast<T>(
+      (reinterpret_cast<uintptr_t>(value) & ~(alignment - 1)));
+}
+
+template <typename T>
+inline T MultiplyWithOverflowCheck(T a, T b) {
+  auto ret = a * b;
+  if (a != 0) {
+    NBYTES_ASSERT_TRUE(b == ret / a);
+  }
+
+  return ret;
+}
+
+void ForceAsciiSlow(const char *src, char *dst, size_t len);
+void ForceAscii(const char *src, char *dst, size_t len);
+
+// ============================================================================
+// Byte Swapping
+
+// Swaps bytes in place. nbytes is the number of bytes to swap and must be a
+// multiple of the word size (checked by function).
+bool SwapBytes16(void *data, size_t nbytes);
+bool SwapBytes32(void *data, size_t nbytes);
+bool SwapBytes64(void *data, size_t nbytes);
+
+// ============================================================================
+// Base64 (legacy)
+
+#ifdef _MSC_VER
+#pragma warning(push)
+// MSVC C4003: not enough actual parameters for macro 'identifier'
+#pragma warning(disable : 4003)
+#endif
+
+extern const int8_t unbase64_table[256];
+
+template <typename TypeName>
+bool Base64DecodeGroupSlow(char *const dst, const size_t dstlen,
+                           const TypeName *const src, const size_t srclen,
+                           size_t *const i, size_t *const k) {
+  uint8_t hi;
+  uint8_t lo;
+#define V(expr)                                                        \
+  for (;;) {                                                           \
+    const uint8_t c = static_cast<uint8_t>(src[*i]);                   \
+    lo = unbase64_table[c];                                            \
+    *i += 1;                                                           \
+    if (lo < 64) break;                         /* Legal character. */ \
+    if (c == '=' || *i >= srclen) return false; /* Stop decoding. */   \
+  }                                                                    \
+  expr;                                                                \
+  if (*i >= srclen) return false;                                      \
+  if (*k >= dstlen) return false;                                      \
+  hi = lo;
+  V(/* Nothing. */);
+  V(dst[(*k)++] = ((hi & 0x3F) << 2) | ((lo & 0x30) >> 4));
+  V(dst[(*k)++] = ((hi & 0x0F) << 4) | ((lo & 0x3C) >> 2));
+  V(dst[(*k)++] = ((hi & 0x03) << 6) | ((lo & 0x3F) >> 0));
+#undef V
+  return true;  // Continue decoding.
+}
+
+enum class Base64Mode { NORMAL, URL };
+
+inline constexpr size_t Base64EncodedSize(
+    size_t size, Base64Mode mode = Base64Mode::NORMAL) {
+  return mode == Base64Mode::NORMAL ? ((size + 2) / 3 * 4)
+                                    : static_cast<size_t>(std::ceil(
+                                          static_cast<double>(size * 4) / 3));
+}
+
+// Doesn't check for padding at the end.  Can be 1-2 bytes over.
+inline constexpr size_t Base64DecodedSizeFast(size_t size) {
+  // 1-byte input cannot be decoded
+  return size > 1 ? (size / 4) * 3 + (size % 4 + 1) / 2 : 0;
+}
+
+inline uint32_t ReadUint32BE(const unsigned char *p) {
+  return static_cast<uint32_t>(p[0] << 24U) |
+         static_cast<uint32_t>(p[1] << 16U) |
+         static_cast<uint32_t>(p[2] << 8U) | static_cast<uint32_t>(p[3]);
+}
+
+template <typename TypeName>
+size_t Base64DecodedSize(const TypeName *src, size_t size) {
+  // 1-byte input cannot be decoded
+  if (size < 2) return 0;
+
+  if (src[size - 1] == '=') {
+    size--;
+    if (src[size - 1] == '=') size--;
+  }
+  return Base64DecodedSizeFast(size);
+}
+
+template <typename TypeName>
+size_t Base64DecodeFast(char *const dst, const size_t dstlen,
+                        const TypeName *const src, const size_t srclen,
+                        const size_t decoded_size) {
+  const size_t available = dstlen < decoded_size ? dstlen : decoded_size;
+  const size_t max_k = available / 3 * 3;
+  size_t max_i = srclen / 4 * 4;
+  size_t i = 0;
+  size_t k = 0;
+  while (i < max_i && k < max_k) {
+    const unsigned char txt[] = {
+        static_cast<unsigned char>(
+            unbase64_table[static_cast<uint8_t>(src[i + 0])]),
+        static_cast<unsigned char>(
+            unbase64_table[static_cast<uint8_t>(src[i + 1])]),
+        static_cast<unsigned char>(
+            unbase64_table[static_cast<uint8_t>(src[i + 2])]),
+        static_cast<unsigned char>(
+            unbase64_table[static_cast<uint8_t>(src[i + 3])]),
+    };
+
+    const uint32_t v = ReadUint32BE(txt);
+    // If MSB is set, input contains whitespace or is not valid base64.
+    if (v & 0x80808080) {
+      if (!Base64DecodeGroupSlow(dst, dstlen, src, srclen, &i, &k)) return k;
+      max_i = i + (srclen - i) / 4 * 4;  // Align max_i again.
+    } else {
+      dst[k + 0] = ((v >> 22) & 0xFC) | ((v >> 20) & 0x03);
+      dst[k + 1] = ((v >> 12) & 0xF0) | ((v >> 10) & 0x0F);
+      dst[k + 2] = ((v >> 2) & 0xC0) | ((v >> 0) & 0x3F);
+      i += 4;
+      k += 3;
+    }
+  }
+  if (i < srclen && k < dstlen) {
+    Base64DecodeGroupSlow(dst, dstlen, src, srclen, &i, &k);
+  }
+  return k;
+}
+
+template <typename TypeName>
+size_t Base64Decode(char *const dst, const size_t dstlen,
+                    const TypeName *const src, const size_t srclen) {
+  const size_t decoded_size = Base64DecodedSize(src, srclen);
+  return Base64DecodeFast(dst, dstlen, src, srclen, decoded_size);
+}
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+// ============================================================================
+// Hex (legacy)
+
+extern const int8_t unhex_table[256];
+
+template <typename TypeName>
+static size_t HexDecode(char *buf, size_t len, const TypeName *src,
+                        const size_t srcLen) {
+  size_t i;
+  for (i = 0; i < len && i * 2 + 1 < srcLen; ++i) {
+    unsigned a = unhex_table[static_cast<uint8_t>(src[i * 2 + 0])];
+    unsigned b = unhex_table[static_cast<uint8_t>(src[i * 2 + 1])];
+    if (!~a || !~b) return i;
+    buf[i] = (a << 4) | b;
+  }
+
+  return i;
+}
+
+size_t HexEncode(const char *src, size_t slen, char *dst, size_t dlen);
+
+std::string HexEncode(const char *src, size_t slen);
+
+// ============================================================================
+// StringSearch
+
+namespace stringsearch {
+
+template <typename T>
+class Vector {
+ public:
+  Vector(T *data, size_t length, bool isForward)
+      : start_(data), length_(length), is_forward_(isForward) {
+    CHECK(length > 0 && data != nullptr);
+  }
+
+  // Returns the start of the memory range.
+  // For vector v this is NOT necessarily &v[0], see forward().
+  const T *start() const { return start_; }
+
+  // Returns the length of the vector, in characters.
+  size_t length() const { return length_; }
+
+  // Returns true if the Vector is front-to-back, false if back-to-front.
+  // In the latter case, v[0] corresponds to the *end* of the memory range.
+  bool forward() const { return is_forward_; }
+
+  // Access individual vector elements - checks bounds in debug mode.
+  T &operator[](size_t index) const {
+    NBYTES_ASSERT_TRUE(index < length_);
+    return start_[is_forward_ ? index : (length_ - index - 1)];
+  }
+
+ private:
+  T *start_;
+  size_t length_;
+  bool is_forward_;
+};
+
+//---------------------------------------------------------------------
+// String Search object.
+//---------------------------------------------------------------------
+
+// Class holding constants and methods that apply to all string search variants,
+// independently of subject and pattern char size.
+class StringSearchBase {
+ protected:
+  // Cap on the maximal shift in the Boyer-Moore implementation. By setting a
+  // limit, we can fix the size of tables. For a needle longer than this limit,
+  // search will not be optimal, since we only build tables for a suffix
+  // of the string, but it is a safe approximation.
+  static const int kBMMaxShift = 250;
+
+  // Reduce alphabet to this size.
+  // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size
+  // proportional to the input alphabet. We reduce the alphabet size by
+  // equating input characters modulo a smaller alphabet size. This gives
+  // a potentially less efficient searching, but is a safe approximation.
+  // For needles using only characters in the same Unicode 256-code point page,
+  // there is no search speed degradation.
+  static const int kLatin1AlphabetSize = 256;
+  static const int kUC16AlphabetSize = 256;
+
+  // Bad-char shift table stored in the state. It's length is the alphabet size.
+  // For patterns below this length, the skip length of Boyer-Moore is too short
+  // to compensate for the algorithmic overhead compared to simple brute force.
+  static const int kBMMinPatternLength = 8;
+
+  // Store for the BoyerMoore(Horspool) bad char shift table.
+  int bad_char_shift_table_[kUC16AlphabetSize];
+  // Store for the BoyerMoore good suffix shift table.
+  int good_suffix_shift_table_[kBMMaxShift + 1];
+  // Table used temporarily while building the BoyerMoore good suffix
+  // shift table.
+  int suffix_table_[kBMMaxShift + 1];
+};
+
+template <typename Char>
+class StringSearch : private StringSearchBase {
+ public:
+  typedef stringsearch::Vector<const Char> Vector;
+
+  explicit StringSearch(Vector pattern) : pattern_(pattern), start_(0) {
+    if (pattern.length() >= kBMMaxShift) {
+      start_ = pattern.length() - kBMMaxShift;
+    }
+
+    size_t pattern_length = pattern_.length();
+    NBYTES_ASSERT_TRUE(pattern_length > 0);
+    if (pattern_length < kBMMinPatternLength) {
+      if (pattern_length == 1) {
+        strategy_ = SearchStrategy::kSingleChar;
+        return;
+      }
+      strategy_ = SearchStrategy::kLinear;
+      return;
+    }
+    strategy_ = SearchStrategy::kInitial;
+  }
+
+  size_t Search(Vector subject, size_t index) {
+    switch (strategy_) {
+      case kBoyerMooreHorspool:
+        return BoyerMooreHorspoolSearch(subject, index);
+      case kBoyerMoore:
+        return BoyerMooreSearch(subject, index);
+      case kInitial:
+        return InitialSearch(subject, index);
+      case kLinear:
+        return LinearSearch(subject, index);
+      case kSingleChar:
+        return SingleCharSearch(subject, index);
+    }
+    unreachable();
+  }
+
+  static inline int AlphabetSize() {
+    if (sizeof(Char) == 1) {
+      // Latin1 needle.
+      return kLatin1AlphabetSize;
+    } else {
+      // UC16 needle.
+      return kUC16AlphabetSize;
+    }
+
+    static_assert(
+        sizeof(Char) == sizeof(uint8_t) || sizeof(Char) == sizeof(uint16_t),
+        "sizeof(Char) == sizeof(uint16_t) || sizeof(uint8_t)");
+  }
+
+ private:
+  typedef size_t (StringSearch::*SearchFunction)(Vector, size_t);
+  size_t SingleCharSearch(Vector subject, size_t start_index);
+  size_t LinearSearch(Vector subject, size_t start_index);
+  size_t InitialSearch(Vector subject, size_t start_index);
+  size_t BoyerMooreHorspoolSearch(Vector subject, size_t start_index);
+  size_t BoyerMooreSearch(Vector subject, size_t start_index);
+
+  void PopulateBoyerMooreHorspoolTable();
+
+  void PopulateBoyerMooreTable();
+
+  static inline int CharOccurrence(int *bad_char_occurrence, Char char_code) {
+    if (sizeof(Char) == 1) {
+      return bad_char_occurrence[static_cast<int>(char_code)];
+    }
+    // Both pattern and subject are UC16. Reduce character to equivalence class.
+    int equiv_class = char_code % kUC16AlphabetSize;
+    return bad_char_occurrence[equiv_class];
+  }
+
+  enum SearchStrategy {
+    kBoyerMooreHorspool,
+    kBoyerMoore,
+    kInitial,
+    kLinear,
+    kSingleChar,
+  };
+
+  // The pattern to search for.
+  Vector pattern_;
+  SearchStrategy strategy_;
+  // Cache value of Max(0, pattern_length() - kBMMaxShift)
+  size_t start_;
+};
+
+inline uint8_t GetHighestValueByte(uint16_t character) {
+  return std::max(static_cast<uint8_t>(character & 0xFF),
+                  static_cast<uint8_t>(character >> 8));
+}
+
+inline uint8_t GetHighestValueByte(uint8_t character) { return character; }
+
+// Searches for a byte value in a memory buffer, back to front.
+// Uses memrchr(3) on systems which support it, for speed.
+// Falls back to a vanilla for loop on non-GNU systems such as Windows.
+inline const void *MemrchrFill(const void *haystack, uint8_t needle,
+                               size_t haystack_len) {
+#ifdef _GNU_SOURCE
+  return memrchr(haystack, needle, haystack_len);
+#else
+  const uint8_t *haystack8 = static_cast<const uint8_t *>(haystack);
+  for (size_t i = haystack_len - 1; i != static_cast<size_t>(-1); i--) {
+    if (haystack8[i] == needle) {
+      return haystack8 + i;
+    }
+  }
+  return nullptr;
+#endif
+}
+
+// Finds the first occurrence of *two-byte* character pattern[0] in the string
+// `subject`. Does not check that the whole pattern matches.
+template <typename Char>
+inline size_t FindFirstCharacter(Vector<const Char> pattern,
+                                 Vector<const Char> subject, size_t index) {
+  const Char pattern_first_char = pattern[0];
+  const size_t max_n = (subject.length() - pattern.length() + 1);
+
+  // For speed, search for the more `rare` of the two bytes in pattern[0]
+  // using memchr / memrchr (which are much faster than a simple for loop).
+  const uint8_t search_byte = GetHighestValueByte(pattern_first_char);
+  size_t pos = index;
+  do {
+    const size_t bytes_to_search = (max_n - pos) * sizeof(Char);
+    const void *void_pos;
+    if (subject.forward()) {
+      // Assert that bytes_to_search won't overflow
+      NBYTES_ASSERT_TRUE(pos <= max_n);
+      NBYTES_ASSERT_TRUE(max_n - pos <= SIZE_MAX / sizeof(Char));
+      void_pos = memchr(subject.start() + pos, search_byte, bytes_to_search);
+    } else {
+      NBYTES_ASSERT_TRUE(pos <= subject.length());
+      NBYTES_ASSERT_TRUE(subject.length() - pos <= SIZE_MAX / sizeof(Char));
+      void_pos = MemrchrFill(subject.start() + pattern.length() - 1,
+                             search_byte, bytes_to_search);
+    }
+    const Char *char_pos = static_cast<const Char *>(void_pos);
+    if (char_pos == nullptr) return subject.length();
+
+    // Then, for each match, verify that the full two bytes match pattern[0].
+    char_pos = AlignDown(char_pos, sizeof(Char));
+    size_t raw_pos = static_cast<size_t>(char_pos - subject.start());
+    pos = subject.forward() ? raw_pos : (subject.length() - raw_pos - 1);
+    if (subject[pos] == pattern_first_char) {
+      // Match found, hooray.
+      return pos;
+    }
+    // Search byte matched, but the other byte of pattern[0] didn't. Keep going.
+  } while (++pos < max_n);
+
+  return subject.length();
+}
+
+// Finds the first occurrence of the byte pattern[0] in string `subject`.
+// Does not verify that the whole pattern matches.
+template <>
+inline size_t FindFirstCharacter(Vector<const uint8_t> pattern,
+                                 Vector<const uint8_t> subject, size_t index) {
+  const uint8_t pattern_first_char = pattern[0];
+  const size_t subj_len = subject.length();
+  const size_t max_n = (subject.length() - pattern.length() + 1);
+
+  const void *pos;
+  if (subject.forward()) {
+    pos = memchr(subject.start() + index, pattern_first_char, max_n - index);
+  } else {
+    pos = MemrchrFill(subject.start() + pattern.length() - 1,
+                      pattern_first_char, max_n - index);
+  }
+  const uint8_t *char_pos = static_cast<const uint8_t *>(pos);
+  if (char_pos == nullptr) {
+    return subj_len;
+  }
+
+  size_t raw_pos = static_cast<size_t>(char_pos - subject.start());
+  return subject.forward() ? raw_pos : (subj_len - raw_pos - 1);
+}
+
+//---------------------------------------------------------------------
+// Single Character Pattern Search Strategy
+//---------------------------------------------------------------------
+
+template <typename Char>
+size_t StringSearch<Char>::SingleCharSearch(Vector subject, size_t index) {
+  NBYTES_ASSERT_TRUE(1 == pattern_.length());
+  return FindFirstCharacter(pattern_, subject, index);
+}
+
+//---------------------------------------------------------------------
+// Linear Search Strategy
+//---------------------------------------------------------------------
+
+// Simple linear search for short patterns. Never bails out.
+template <typename Char>
+size_t StringSearch<Char>::LinearSearch(Vector subject, size_t index) {
+  NBYTES_ASSERT_TRUE(pattern_.length() > 1);
+  const size_t n = subject.length() - pattern_.length();
+  for (size_t i = index; i <= n; i++) {
+    i = FindFirstCharacter(pattern_, subject, i);
+    if (i == subject.length()) return subject.length();
+    NBYTES_ASSERT_TRUE(i <= n);
+
+    bool matches = true;
+    for (size_t j = 1; j < pattern_.length(); j++) {
+      if (pattern_[j] != subject[i + j]) {
+        matches = false;
+        break;
+      }
+    }
+    if (matches) {
+      return i;
+    }
+  }
+  return subject.length();
+}
+
+//---------------------------------------------------------------------
+// Boyer-Moore string search
+//---------------------------------------------------------------------
+
+template <typename Char>
+size_t StringSearch<Char>::BoyerMooreSearch(Vector subject,
+                                            size_t start_index) {
+  const size_t subject_length = subject.length();
+  const size_t pattern_length = pattern_.length();
+  // Only preprocess at most kBMMaxShift last characters of pattern.
+  size_t start = start_;
+
+  int *bad_char_occurrence = bad_char_shift_table_;
+  int *good_suffix_shift = good_suffix_shift_table_ - start_;
+
+  Char last_char = pattern_[pattern_length - 1];
+  size_t index = start_index;
+  // Continue search from i.
+  while (index <= subject_length - pattern_length) {
+    size_t j = pattern_length - 1;
+    int c;
+    while (last_char != (c = subject[index + j])) {
+      int shift = j - CharOccurrence(bad_char_occurrence, c);
+      index += shift;
+      if (index > subject_length - pattern_length) {
+        return subject.length();
+      }
+    }
+    while (pattern_[j] == (c = subject[index + j])) {
+      if (j == 0) {
+        return index;
+      }
+      j--;
+    }
+    if (j < start) {
+      // we have matched more than our tables allow us to be smart about.
+      // Fall back on BMH shift.
+      index +=
+          pattern_length - 1 - CharOccurrence(bad_char_occurrence, last_char);
+    } else {
+      int gs_shift = good_suffix_shift[j + 1];
+      int bc_occ = CharOccurrence(bad_char_occurrence, c);
+      int shift = j - bc_occ;
+      if (gs_shift > shift) {
+        shift = gs_shift;
+      }
+      index += shift;
+    }
+  }
+
+  return subject.length();
+}
+
+template <typename Char>
+void StringSearch<Char>::PopulateBoyerMooreTable() {
+  const size_t pattern_length = pattern_.length();
+  // Only look at the last kBMMaxShift characters of pattern (from start_
+  // to pattern_length).
+  const size_t start = start_;
+  const size_t length = pattern_length - start;
+
+  // Biased tables so that we can use pattern indices as table indices,
+  // even if we only cover the part of the pattern from offset start.
+  int *shift_table = good_suffix_shift_table_ - start_;
+  int *suffix_table = suffix_table_ - start_;
+
+  // Initialize table.
+  for (size_t i = start; i < pattern_length; i++) {
+    shift_table[i] = length;
+  }
+  shift_table[pattern_length] = 1;
+  suffix_table[pattern_length] = pattern_length + 1;
+
+  if (pattern_length <= start) {
+    return;
+  }
+
+  // Find suffixes.
+  Char last_char = pattern_[pattern_length - 1];
+  size_t suffix = pattern_length + 1;
+  {
+    size_t i = pattern_length;
+    while (i > start) {
+      Char c = pattern_[i - 1];
+      while (suffix <= pattern_length && c != pattern_[suffix - 1]) {
+        if (static_cast<size_t>(shift_table[suffix]) == length) {
+          shift_table[suffix] = suffix - i;
+        }
+        suffix = suffix_table[suffix];
+      }
+      suffix_table[--i] = --suffix;
+      if (suffix == pattern_length) {
+        // No suffix to extend, so we check against last_char only.
+        while ((i > start) && (pattern_[i - 1] != last_char)) {
+          if (static_cast<size_t>(shift_table[pattern_length]) == length) {
+            shift_table[pattern_length] = pattern_length - i;
+          }
+          suffix_table[--i] = pattern_length;
+        }
+        if (i > start) {
+          suffix_table[--i] = --suffix;
+        }
+      }
+    }
+  }
+  // Build shift table using suffixes.
+  if (suffix < pattern_length) {
+    for (size_t i = start; i <= pattern_length; i++) {
+      if (static_cast<size_t>(shift_table[i]) == length) {
+        shift_table[i] = suffix - start;
+      }
+      if (i == suffix) {
+        suffix = suffix_table[suffix];
+      }
+    }
+  }
+}
+
+//---------------------------------------------------------------------
+// Boyer-Moore-Horspool string search.
+//---------------------------------------------------------------------
+
+template <typename Char>
+size_t StringSearch<Char>::BoyerMooreHorspoolSearch(Vector subject,
+                                                    size_t start_index) {
+  const size_t subject_length = subject.length();
+  const size_t pattern_length = pattern_.length();
+  int *char_occurrences = bad_char_shift_table_;
+  int64_t badness = -static_cast<int64_t>(pattern_length);
+
+  // How bad we are doing without a good-suffix table.
+  Char last_char = pattern_[pattern_length - 1];
+  int last_char_shift =
+      pattern_length - 1 - CharOccurrence(char_occurrences, last_char);
+
+  // Perform search
+  size_t index = start_index;  // No matches found prior to this index.
+  while (index <= subject_length - pattern_length) {
+    size_t j = pattern_length - 1;
+    int subject_char;
+    while (last_char != (subject_char = subject[index + j])) {
+      int bc_occ = CharOccurrence(char_occurrences, subject_char);
+      int shift = j - bc_occ;
+      index += shift;
+      badness += 1 - shift;  // at most zero, so badness cannot increase.
+      if (index > subject_length - pattern_length) {
+        return subject_length;
+      }
+    }
+    j--;
+    while (pattern_[j] == (subject[index + j])) {
+      if (j == 0) {
+        return index;
+      }
+      j--;
+    }
+    index += last_char_shift;
+    // Badness increases by the number of characters we have
+    // checked, and decreases by the number of characters we
+    // can skip by shifting. It's a measure of how we are doing
+    // compared to reading each character exactly once.
+    badness += (pattern_length - j) - last_char_shift;
+    if (badness > 0) {
+      PopulateBoyerMooreTable();
+      strategy_ = SearchStrategy::kBoyerMoore;
+      return BoyerMooreSearch(subject, index);
+    }
+  }
+  return subject.length();
+}
+
+template <typename Char>
+void StringSearch<Char>::PopulateBoyerMooreHorspoolTable() {
+  const size_t pattern_length = pattern_.length();
+
+  int *bad_char_occurrence = bad_char_shift_table_;
+
+  // Only preprocess at most kBMMaxShift last characters of pattern.
+  const size_t start = start_;
+  // Run forwards to populate bad_char_table, so that *last* instance
+  // of character equivalence class is the one registered.
+  // Notice: Doesn't include the last character.
+  const size_t table_size = AlphabetSize();
+  if (start == 0) {
+    // All patterns less than kBMMaxShift in length.
+    memset(bad_char_occurrence, -1, table_size * sizeof(*bad_char_occurrence));
+  } else {
+    for (size_t i = 0; i < table_size; i++) {
+      bad_char_occurrence[i] = start - 1;
+    }
+  }
+  for (size_t i = start; i < pattern_length - 1; i++) {
+    Char c = pattern_[i];
+    int bucket = (sizeof(Char) == 1) ? c : c % AlphabetSize();
+    bad_char_occurrence[bucket] = i;
+  }
+}
+
+//---------------------------------------------------------------------
+// Linear string search with bailout to BMH.
+//---------------------------------------------------------------------
+
+// Simple linear search for short patterns, which bails out if the string
+// isn't found very early in the subject. Upgrades to BoyerMooreHorspool.
+template <typename Char>
+size_t StringSearch<Char>::InitialSearch(Vector subject, size_t index) {
+  const size_t pattern_length = pattern_.length();
+  // Badness is a count of how much work we have done.  When we have
+  // done enough work we decide it's probably worth switching to a better
+  // algorithm.
+  int64_t badness = -10 - (pattern_length << 2);
+
+  // We know our pattern is at least 2 characters, we cache the first so
+  // the common case of the first character not matching is faster.
+  for (size_t i = index, n = subject.length() - pattern_length; i <= n; i++) {
+    badness++;
+    if (badness <= 0) {
+      i = FindFirstCharacter(pattern_, subject, i);
+      if (i == subject.length()) return subject.length();
+      NBYTES_ASSERT_TRUE(i <= n);
+      size_t j = 1;
+      do {
+        if (pattern_[j] != subject[i + j]) {
+          break;
+        }
+        j++;
+      } while (j < pattern_length);
+      if (j == pattern_length) {
+        return i;
+      }
+      badness += j;
+    } else {
+      PopulateBoyerMooreHorspoolTable();
+      strategy_ = SearchStrategy::kBoyerMooreHorspool;
+      return BoyerMooreHorspoolSearch(subject, i);
+    }
+  }
+  return subject.length();
+}
+
+// Perform a single stand-alone search.
+// If searching multiple times for the same pattern, a search
+// object should be constructed once and the Search function then called
+// for each search.
+template <typename Char>
+size_t SearchString(Vector<const Char> subject, Vector<const Char> pattern,
+                    size_t start_index) {
+  StringSearch<Char> search(pattern);
+  return search.Search(subject, start_index);
+}
+}  // namespace stringsearch
+
+template <typename Char>
+size_t SearchString(const Char *haystack, size_t haystack_length,
+                    const Char *needle, size_t needle_length,
+                    size_t start_index, bool is_forward) {
+  if (haystack_length < needle_length) return haystack_length;
+  // To do a reverse search (lastIndexOf instead of indexOf) without redundant
+  // code, create two vectors that are reversed views into the input strings.
+  // For example, v_needle[0] would return the *last* character of the needle.
+  // So we're searching for the first instance of rev(needle) in rev(haystack)
+  stringsearch::Vector<const Char> v_needle(needle, needle_length, is_forward);
+  stringsearch::Vector<const Char> v_haystack(haystack, haystack_length,
+                                              is_forward);
+  size_t diff = haystack_length - needle_length;
+  size_t relative_start_index;
+  if (is_forward) {
+    relative_start_index = start_index;
+  } else if (diff < start_index) {
+    relative_start_index = 0;
+  } else {
+    relative_start_index = diff - start_index;
+  }
+  size_t pos =
+      stringsearch::SearchString(v_haystack, v_needle, relative_start_index);
+  if (pos == haystack_length) {
+    // not found
+    return pos;
+  }
+  return is_forward ? pos : (haystack_length - needle_length - pos);
+}
+
+template <size_t N>
+size_t SearchString(const char *haystack, size_t haystack_length,
+                    const char (&needle)[N]) {
+  return SearchString(
+      reinterpret_cast<const uint8_t *>(haystack), haystack_length,
+      reinterpret_cast<const uint8_t *>(needle), N - 1, 0, true);
+}
+
+// ============================================================================
+// Version metadata
+#define NBYTES_VERSION "0.0.1"
+
+enum {
+  NBYTES_VERSION_MAJOR = 0,
+  NBYTES_VERSION_MINOR = 0,
+  NBYTES_VERSION_REVISION = 1,
+};
+
+}  // namespace nbytes
+
+#endif  // NBYTES_H
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..5e107c7
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,38 @@
+[project]
+name = "nbytes"
+requires-python = ">=3.12"
+
+[tool.ruff]
+line-length = 120
+target-version = "py312"
+
+[tool.ruff.format]
+quote-style = "single"
+indent-style = "space"
+docstring-code-format = true
+
+[tool.ruff.lint]
+select = [
+  "C90",    # McCabe cyclomatic complexity
+  "E",      # pycodestyle
+  "F",      # Pyflakes
+  "ICN",    # flake8-import-conventions
+  "INT",    # flake8-gettext
+  "PLC",    # Pylint conventions
+  "PLE",    # Pylint errors
+  "PLR09",  # Pylint refactoring: max-args, max-branches, max returns, max-statements
+  "PYI",    # flake8-pyi
+  "RSE",    # flake8-raise
+  "RUF",    # Ruff-specific rules
+  "T10",    # flake8-debugger
+  "TCH",    # flake8-type-checking
+  "TID",    # flake8-tidy-imports
+  "W",      # pycodestyle
+  "YTT",    # flake8-2020
+  "ANN"     # flake8-annotations
+]
+ignore = [
+  "E722",   # Do not use bare `except`
+  "ANN101", # Missing type annotation for self in method
+  "TID252", # Prefer absolute imports over relative imports from parent modules
+]
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000..554fb4a
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_library(nbytes nbytes.cpp)
+target_include_directories(nbytes
+  PUBLIC
+   $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
+   $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
+)
diff --git a/src/nbytes.cpp b/src/nbytes.cpp
new file mode 100644
index 0000000..149f216
--- /dev/null
+++ b/src/nbytes.cpp
@@ -0,0 +1,238 @@
+#include "nbytes.h"
+#include <cmath>
+#include <cstddef>
+#include <string.h>
+
+namespace nbytes {
+
+// ============================================================================
+// Byte Swapping
+
+namespace {
+// These are defined by <sys/byteorder.h> or <netinet/in.h> on some systems.
+// To avoid warnings, undefine them before redefining them.
+#ifdef BSWAP_2
+#undef BSWAP_2
+#endif
+#ifdef BSWAP_4
+#undef BSWAP_4
+#endif
+#ifdef BSWAP_8
+#undef BSWAP_8
+#endif
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#define BSWAP_2(x) _byteswap_ushort(x)
+#define BSWAP_4(x) _byteswap_ulong(x)
+#define BSWAP_8(x) _byteswap_uint64(x)
+#else
+#define BSWAP_2(x) ((x) << 8) | ((x) >> 8)
+#define BSWAP_4(x)                                                       \
+  (((x) & 0xFF) << 24) | (((x) & 0xFF00) << 8) | (((x) >> 8) & 0xFF00) | \
+      (((x) >> 24) & 0xFF)
+#define BSWAP_8(x)                            \
+  (((x) & 0xFF00000000000000ull) >> 56) |     \
+      (((x) & 0x00FF000000000000ull) >> 40) | \
+      (((x) & 0x0000FF0000000000ull) >> 24) | \
+      (((x) & 0x000000FF00000000ull) >> 8) |  \
+      (((x) & 0x00000000FF000000ull) << 8) |  \
+      (((x) & 0x0000000000FF0000ull) << 24) | \
+      (((x) & 0x000000000000FF00ull) << 40) | \
+      (((x) & 0x00000000000000FFull) << 56)
+#endif
+}  // namespace
+
+bool SwapBytes16(void *data, size_t nbytes) {
+  if (nbytes % sizeof(uint16_t) != 0) return false;
+
+#if defined(_MSC_VER)
+  if (AlignUp(data, sizeof(uint16_t)) == data) {
+    // MSVC has no strict aliasing, and is able to highly optimize this case.
+    uint16_t *data16 = reinterpret_cast<uint16_t *>(data);
+    size_t len16 = nbytes / sizeof(uint16_t);
+    for (size_t i = 0; i < len16; i++) {
+      data16[i] = BSWAP_2(data16[i]);
+    }
+    return true;
+  }
+#endif
+
+  uint16_t temp;
+  uint8_t *ptr = reinterpret_cast<uint8_t *>(data);
+  for (size_t i = 0; i < nbytes; i += sizeof(uint16_t)) {
+    memcpy(&temp, &ptr[i], sizeof(uint16_t));
+    temp = BSWAP_2(temp);
+    memcpy(&ptr[i], &temp, sizeof(uint16_t));
+  }
+
+  return true;
+}
+
+bool SwapBytes32(void *data, size_t nbytes) {
+  if (nbytes % sizeof(uint32_t) != 0) return false;
+
+#if defined(_MSC_VER)
+  // MSVC has no strict aliasing, and is able to highly optimize this case.
+  if (AlignUp(data, sizeof(uint32_t)) == data) {
+    uint32_t *data32 = reinterpret_cast<uint32_t *>(data);
+    size_t len32 = nbytes / sizeof(uint32_t);
+    for (size_t i = 0; i < len32; i++) {
+      data32[i] = BSWAP_4(data32[i]);
+    }
+    return true;
+  }
+#endif
+
+  uint32_t temp = 0;
+  uint8_t *ptr = reinterpret_cast<uint8_t *>(data);
+  for (size_t i = 0; i < nbytes; i += sizeof(uint32_t)) {
+    memcpy(&temp, &ptr[i], sizeof(uint32_t));
+    temp = BSWAP_4(temp);
+    memcpy(&ptr[i], &temp, sizeof(uint32_t));
+  }
+
+  return true;
+}
+
+bool SwapBytes64(void *data, size_t nbytes) {
+  if (nbytes % sizeof(uint64_t) != 0) return false;
+
+#if defined(_MSC_VER)
+  if (AlignUp(data, sizeof(uint64_t)) == data) {
+    // MSVC has no strict aliasing, and is able to highly optimize this case.
+    uint64_t *data64 = reinterpret_cast<uint64_t *>(data);
+    size_t len64 = nbytes / sizeof(uint64_t);
+    for (size_t i = 0; i < len64; i++) {
+      data64[i] = BSWAP_8(data64[i]);
+    }
+    return true;
+  }
+#endif
+
+  uint64_t temp = 0;
+  uint8_t *ptr = reinterpret_cast<uint8_t *>(data);
+  for (size_t i = 0; i < nbytes; i += sizeof(uint64_t)) {
+    memcpy(&temp, &ptr[i], sizeof(uint64_t));
+    temp = BSWAP_8(temp);
+    memcpy(&ptr[i], &temp, sizeof(uint64_t));
+  }
+
+  return true;
+}
+
+// ============================================================================
+// Base64 (legacy)
+
+// supports regular and URL-safe base64
+const int8_t unbase64_table[256] = {
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, -2, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+    61, -1, -1, -1, -1, -1, -1, -1, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10,
+    11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1,
+    63, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
+    43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1};
+
+// ============================================================================
+// Hex
+
+const int8_t unhex_table[256] = {
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0,  1,  2,  3,  4,  5,  6,  7,  8,
+    9,  -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1};
+
+size_t HexEncode(const char *src, size_t slen, char *dst, size_t dlen) {
+  // We know how much we'll write, just make sure that there's space.
+  NBYTES_ASSERT_TRUE(dlen >= MultiplyWithOverflowCheck<size_t>(slen, 2u) &&
+                     "not enough space provided for hex encode");
+
+  dlen = slen * 2;
+  for (size_t i = 0, k = 0; k < dlen; i += 1, k += 2) {
+    static const char hex[] = "0123456789abcdef";
+    uint8_t val = static_cast<uint8_t>(src[i]);
+    dst[k + 0] = hex[val >> 4];
+    dst[k + 1] = hex[val & 15];
+  }
+
+  return dlen;
+}
+
+std::string HexEncode(const char *src, size_t slen) {
+  size_t dlen = slen * 2;
+  std::string dst(dlen, '\0');
+  HexEncode(src, slen, dst.data(), dlen);
+  return dst;
+}
+
+// ============================================================================
+
+void ForceAsciiSlow(const char *src, char *dst, size_t len) {
+  for (size_t i = 0; i < len; ++i) {
+    dst[i] = src[i] & 0x7f;
+  }
+}
+
+void ForceAscii(const char *src, char *dst, size_t len) {
+  if (len < 16) {
+    ForceAsciiSlow(src, dst, len);
+    return;
+  }
+
+  const unsigned bytes_per_word = sizeof(uintptr_t);
+  const unsigned align_mask = bytes_per_word - 1;
+  const unsigned src_unalign = reinterpret_cast<uintptr_t>(src) & align_mask;
+  const unsigned dst_unalign = reinterpret_cast<uintptr_t>(dst) & align_mask;
+
+  if (src_unalign > 0) {
+    if (src_unalign == dst_unalign) {
+      const unsigned unalign = bytes_per_word - src_unalign;
+      ForceAsciiSlow(src, dst, unalign);
+      src += unalign;
+      dst += unalign;
+      len -= src_unalign;
+    } else {
+      ForceAsciiSlow(src, dst, len);
+      return;
+    }
+  }
+
+#if defined(_WIN64) || defined(_LP64)
+  const uintptr_t mask = ~0x8080808080808080ll;
+#else
+  const uintptr_t mask = ~0x80808080l;
+#endif
+
+  const uintptr_t *srcw = reinterpret_cast<const uintptr_t *>(src);
+  uintptr_t *dstw = reinterpret_cast<uintptr_t *>(dst);
+
+  for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
+    dstw[i] = srcw[i] & mask;
+  }
+
+  const unsigned remainder = len & align_mask;
+  if (remainder > 0) {
+    const size_t offset = len - remainder;
+    ForceAsciiSlow(src + offset, dst + offset, remainder);
+  }
+}
+
+}  // namespace nbytes
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
new file mode 100644
index 0000000..00fc8cb
--- /dev/null
+++ b/tests/CMakeLists.txt
@@ -0,0 +1,10 @@
+include(GoogleTest)
+include(CTest)
+add_executable(basic basic.cpp)
+target_link_libraries(
+  basic
+  GTest::gtest_main
+)
+target_link_libraries(basic nbytes)
+add_test(basic_test basic)
+gtest_discover_tests(basic)
diff --git a/tests/basic.cpp b/tests/basic.cpp
new file mode 100644
index 0000000..8f0a863
--- /dev/null
+++ b/tests/basic.cpp
@@ -0,0 +1,7 @@
+#include <format>
+#include <nbytes.h>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+TEST(basic, it_works) { SUCCEED(); }
diff --git a/tools/run-clang-format.sh b/tools/run-clang-format.sh
new file mode 100755
index 0000000..ce1a664
--- /dev/null
+++ b/tools/run-clang-format.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+
+# Copyright 2023 Yagiz Nizipli and Daniel Lemire
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+set -e
+COMMAND=$*
+SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
+MAINSOURCE=$SCRIPTPATH/..
+ALL_FILES=$(cd $MAINSOURCE && git ls-tree --full-tree --name-only -r HEAD | grep -e ".*\.\(c\|h\|cc\|cpp\|hh\)\$")
+
+if clang-format-17 --version  2>/dev/null | grep -qF 'version 17.'; then
+  cd $MAINSOURCE; clang-format-17 --style=file --verbose -i "$@" $ALL_FILES
+  exit 0
+elif clang-format --version  2>/dev/null | grep -qF 'version 17.'; then
+  cd $MAINSOURCE; clang-format --style=file --verbose -i "$@" $ALL_FILES
+  exit 0
+fi
+echo "Trying to use docker"
+command -v docker >/dev/null 2>&1 || { echo >&2 "Please install docker. E.g., go to https://www.docker.com/products/docker-desktop Type 'docker' to diagnose the problem."; exit 1; }
+docker info >/dev/null 2>&1 || { echo >&2 "Docker server is not running? type 'docker info'."; exit 1; }
+
+if [ -t 0 ]; then DOCKER_ARGS=-it; fi
+docker pull kszonek/clang-format-17
+
+docker run --rm $DOCKER_ARGS -v "$MAINSOURCE":"$MAINSOURCE":Z  -w "$MAINSOURCE" -u "$(id -u $USER):$(id -g $USER)" kszonek/clang-format-17 --style=file --verbose -i "$@" $ALL_FILES