diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..59d0684 --- /dev/null +++ b/.clang-format @@ -0,0 +1,2 @@ +BasedOnStyle: Google +SortIncludes: Never diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml new file mode 100644 index 0000000..0241386 --- /dev/null +++ b/.github/workflows/linter.yml @@ -0,0 +1,38 @@ +name: Linter + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - name: Run clang-format + uses: jidicula/clang-format-action@c74383674bf5f7c69f60ce562019c1c94bc1421a # v4.13.0 + with: + clang-format-version: '17' + fallback-style: 'Google' + + - uses: chartboost/ruff-action@e18ae971ccee1b2d7bbef113930f00c670b78da4 # v1.0.0 + name: Lint with Ruff + with: + version: 0.5.1 diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml new file mode 100644 index 0000000..8285461 --- /dev/null +++ b/.github/workflows/macos.yml @@ -0,0 +1,33 @@ +name: macos CI + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + ubuntu-build: + runs-on: macos-latest + steps: + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + - name: Prepare + run: cmake -B build + - name: Build + run: cmake --build build -j=2 + - name: Test + run: ctest --output-on-failure --test-dir build diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml new file mode 100644 index 0000000..6b5c572 --- /dev/null +++ b/.github/workflows/ubuntu.yml @@ -0,0 +1,41 @@ +name: Ubuntu 24.04 CI + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + ubuntu-build: + runs-on: ubuntu-24.04 + strategy: + matrix: + shared: [ON, OFF] + cxx: [g++-14] + steps: + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + - name: Setup Ninja + run: sudo apt-get install ninja-build + - name: Prepare + run: cmake -DBUILD_SHARED_LIBS=${{matrix.shared}} -G Ninja -B build + env: + CXX: ${{matrix.cxx}} + - name: Build + run: cmake --build build -j=2 + - name: Test + run: ctest --output-on-failure --test-dir build diff --git a/.github/workflows/visual-studio.yml b/.github/workflows/visual-studio.yml new file mode 100644 index 0000000..34a5fe3 --- /dev/null +++ b/.github/workflows/visual-studio.yml @@ -0,0 +1,42 @@ +name: VS17 CI + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + ci: + name: windows-vs17 + runs-on: windows-latest + strategy: + fail-fast: false + matrix: + include: + - {gen: Visual Studio 17 2022, arch: x64, config: Release} + - {gen: Visual Studio 17 2022, arch: x64, config: Debug} + steps: + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + - name: Configure + run: | + cmake -G "${{matrix.gen}}" -A ${{matrix.arch}} -B build + - name: Build + run: cmake --build build --config "${{matrix.config}}" --verbose + - name: Run tests + working-directory: build + run: ctest -C "${{matrix.config}}" --output-on-failure diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f1a825f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +cmake-build-debug +build diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..e4fba21 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..f2efa3c --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,42 @@ +cmake_minimum_required(VERSION 3.28) +project(nbytes) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED True) +if (NOT CMAKE_BUILD_TYPE) + message(STATUS "No build type selected, default to Release") + set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE) +endif() + +option(NBYTES_DEVELOPMENT_CHECKS "Enable development checks" OFF) + +include(GNUInstallDirs) +include(FetchContent) + +FetchContent_Declare( + googletest + URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip +) +# For Windows: Prevent overriding the parent project's compiler/linker settings +set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +FetchContent_MakeAvailable(googletest) + +add_subdirectory(src) +enable_testing() +add_subdirectory(tests) + +install( + FILES include/nbytes.h + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" + COMPONENT nbytes_development +) + +install( + TARGETS nbytes + EXPORT nbytes_targets + RUNTIME COMPONENT nbytes_runtime + LIBRARY COMPONENT nbytes_runtime + NAMELINK_COMPONENT nbytes_development + ARCHIVE COMPONENT nbytes_development + INCLUDES DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" +) diff --git a/README.md b/README.md index 976c4f0..abafc11 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,16 @@ # nbytes + A library of byte handling functions extracted from Node.js core + +## Building + +```bash +cmake -B build +cmake --build build +``` + +## Testing + +```bash +ctest --test-dir build +``` diff --git a/include/nbytes.h b/include/nbytes.h new file mode 100644 index 0000000..28c7bf9 --- /dev/null +++ b/include/nbytes.h @@ -0,0 +1,840 @@ +#ifndef NBYTES_H +#define NBYTES_H +#include +#include +#include +#include +#include +#include + +namespace nbytes { + +#if NBYTES_DEVELOPMENT_CHECKS +#define NBYTES_STR(x) #x +#define NBYTES_REQUIRE(EXPR) \ + { \ + if (!(EXPR) { abort(); }) } + +#define NBYTES_FAIL(MESSAGE) \ + do { \ + std::cerr << "FAIL: " << (MESSAGE) << std::endl; \ + abort(); \ + } while (0); +#define NBYTES_ASSERT_EQUAL(LHS, RHS, MESSAGE) \ + do { \ + if (LHS != RHS) { \ + std::cerr << "Mismatch: '" << LHS << "' - '" << RHS << "'" << std::endl; \ + NBYTES_FAIL(MESSAGE); \ + } \ + } while (0); +#define NBYTES_ASSERT_TRUE(COND) \ + do { \ + if (!(COND)) { \ + std::cerr << "Assert at line " << __LINE__ << " of file " << __FILE__ \ + << std::endl; \ + NBYTES_FAIL(NBYTES_STR(COND)); \ + } \ + } while (0); +#else +#define NBYTES_FAIL(MESSAGE) +#define NBYTES_ASSERT_EQUAL(LHS, RHS, MESSAGE) +#define NBYTES_ASSERT_TRUE(COND) +#endif + +[[noreturn]] inline void unreachable() { +#ifdef __GNUC__ + __builtin_unreachable(); +#elif defined(_MSC_VER) + __assume(false); +#else +#endif +} + +// The nbytes (short for "node bytes") is a set of utility helpers for +// working with bytes that are extracted from Node.js' internals. The +// motivation for extracting these into a separate library is to make it +// easier for other projects to implement functionality that is compatible +// with Node.js' implementation of various byte manipulation functions. + +// Round up a to the next highest multiple of b. +template +constexpr T RoundUp(T a, T b) { + return a % b != 0 ? a + b - (a % b) : a; +} + +// Align ptr to an `alignment`-bytes boundary. +template +constexpr T *AlignUp(T *ptr, U alignment) { + return reinterpret_cast( + RoundUp(reinterpret_cast(ptr), alignment)); +} + +template +inline T AlignDown(T value, U alignment) { + return reinterpret_cast( + (reinterpret_cast(value) & ~(alignment - 1))); +} + +template +inline T MultiplyWithOverflowCheck(T a, T b) { + auto ret = a * b; + if (a != 0) { + NBYTES_ASSERT_TRUE(b == ret / a); + } + + return ret; +} + +void ForceAsciiSlow(const char *src, char *dst, size_t len); +void ForceAscii(const char *src, char *dst, size_t len); + +// ============================================================================ +// Byte Swapping + +// Swaps bytes in place. nbytes is the number of bytes to swap and must be a +// multiple of the word size (checked by function). +bool SwapBytes16(void *data, size_t nbytes); +bool SwapBytes32(void *data, size_t nbytes); +bool SwapBytes64(void *data, size_t nbytes); + +// ============================================================================ +// Base64 (legacy) + +#ifdef _MSC_VER +#pragma warning(push) +// MSVC C4003: not enough actual parameters for macro 'identifier' +#pragma warning(disable : 4003) +#endif + +extern const int8_t unbase64_table[256]; + +template +bool Base64DecodeGroupSlow(char *const dst, const size_t dstlen, + const TypeName *const src, const size_t srclen, + size_t *const i, size_t *const k) { + uint8_t hi; + uint8_t lo; +#define V(expr) \ + for (;;) { \ + const uint8_t c = static_cast(src[*i]); \ + lo = unbase64_table[c]; \ + *i += 1; \ + if (lo < 64) break; /* Legal character. */ \ + if (c == '=' || *i >= srclen) return false; /* Stop decoding. */ \ + } \ + expr; \ + if (*i >= srclen) return false; \ + if (*k >= dstlen) return false; \ + hi = lo; + V(/* Nothing. */); + V(dst[(*k)++] = ((hi & 0x3F) << 2) | ((lo & 0x30) >> 4)); + V(dst[(*k)++] = ((hi & 0x0F) << 4) | ((lo & 0x3C) >> 2)); + V(dst[(*k)++] = ((hi & 0x03) << 6) | ((lo & 0x3F) >> 0)); +#undef V + return true; // Continue decoding. +} + +enum class Base64Mode { NORMAL, URL }; + +inline constexpr size_t Base64EncodedSize( + size_t size, Base64Mode mode = Base64Mode::NORMAL) { + return mode == Base64Mode::NORMAL ? ((size + 2) / 3 * 4) + : static_cast(std::ceil( + static_cast(size * 4) / 3)); +} + +// Doesn't check for padding at the end. Can be 1-2 bytes over. +inline constexpr size_t Base64DecodedSizeFast(size_t size) { + // 1-byte input cannot be decoded + return size > 1 ? (size / 4) * 3 + (size % 4 + 1) / 2 : 0; +} + +inline uint32_t ReadUint32BE(const unsigned char *p) { + return static_cast(p[0] << 24U) | + static_cast(p[1] << 16U) | + static_cast(p[2] << 8U) | static_cast(p[3]); +} + +template +size_t Base64DecodedSize(const TypeName *src, size_t size) { + // 1-byte input cannot be decoded + if (size < 2) return 0; + + if (src[size - 1] == '=') { + size--; + if (src[size - 1] == '=') size--; + } + return Base64DecodedSizeFast(size); +} + +template +size_t Base64DecodeFast(char *const dst, const size_t dstlen, + const TypeName *const src, const size_t srclen, + const size_t decoded_size) { + const size_t available = dstlen < decoded_size ? dstlen : decoded_size; + const size_t max_k = available / 3 * 3; + size_t max_i = srclen / 4 * 4; + size_t i = 0; + size_t k = 0; + while (i < max_i && k < max_k) { + const unsigned char txt[] = { + static_cast( + unbase64_table[static_cast(src[i + 0])]), + static_cast( + unbase64_table[static_cast(src[i + 1])]), + static_cast( + unbase64_table[static_cast(src[i + 2])]), + static_cast( + unbase64_table[static_cast(src[i + 3])]), + }; + + const uint32_t v = ReadUint32BE(txt); + // If MSB is set, input contains whitespace or is not valid base64. + if (v & 0x80808080) { + if (!Base64DecodeGroupSlow(dst, dstlen, src, srclen, &i, &k)) return k; + max_i = i + (srclen - i) / 4 * 4; // Align max_i again. + } else { + dst[k + 0] = ((v >> 22) & 0xFC) | ((v >> 20) & 0x03); + dst[k + 1] = ((v >> 12) & 0xF0) | ((v >> 10) & 0x0F); + dst[k + 2] = ((v >> 2) & 0xC0) | ((v >> 0) & 0x3F); + i += 4; + k += 3; + } + } + if (i < srclen && k < dstlen) { + Base64DecodeGroupSlow(dst, dstlen, src, srclen, &i, &k); + } + return k; +} + +template +size_t Base64Decode(char *const dst, const size_t dstlen, + const TypeName *const src, const size_t srclen) { + const size_t decoded_size = Base64DecodedSize(src, srclen); + return Base64DecodeFast(dst, dstlen, src, srclen, decoded_size); +} + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +// ============================================================================ +// Hex (legacy) + +extern const int8_t unhex_table[256]; + +template +static size_t HexDecode(char *buf, size_t len, const TypeName *src, + const size_t srcLen) { + size_t i; + for (i = 0; i < len && i * 2 + 1 < srcLen; ++i) { + unsigned a = unhex_table[static_cast(src[i * 2 + 0])]; + unsigned b = unhex_table[static_cast(src[i * 2 + 1])]; + if (!~a || !~b) return i; + buf[i] = (a << 4) | b; + } + + return i; +} + +size_t HexEncode(const char *src, size_t slen, char *dst, size_t dlen); + +std::string HexEncode(const char *src, size_t slen); + +// ============================================================================ +// StringSearch + +namespace stringsearch { + +template +class Vector { + public: + Vector(T *data, size_t length, bool isForward) + : start_(data), length_(length), is_forward_(isForward) { + CHECK(length > 0 && data != nullptr); + } + + // Returns the start of the memory range. + // For vector v this is NOT necessarily &v[0], see forward(). + const T *start() const { return start_; } + + // Returns the length of the vector, in characters. + size_t length() const { return length_; } + + // Returns true if the Vector is front-to-back, false if back-to-front. + // In the latter case, v[0] corresponds to the *end* of the memory range. + bool forward() const { return is_forward_; } + + // Access individual vector elements - checks bounds in debug mode. + T &operator[](size_t index) const { + NBYTES_ASSERT_TRUE(index < length_); + return start_[is_forward_ ? index : (length_ - index - 1)]; + } + + private: + T *start_; + size_t length_; + bool is_forward_; +}; + +//--------------------------------------------------------------------- +// String Search object. +//--------------------------------------------------------------------- + +// Class holding constants and methods that apply to all string search variants, +// independently of subject and pattern char size. +class StringSearchBase { + protected: + // Cap on the maximal shift in the Boyer-Moore implementation. By setting a + // limit, we can fix the size of tables. For a needle longer than this limit, + // search will not be optimal, since we only build tables for a suffix + // of the string, but it is a safe approximation. + static const int kBMMaxShift = 250; + + // Reduce alphabet to this size. + // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size + // proportional to the input alphabet. We reduce the alphabet size by + // equating input characters modulo a smaller alphabet size. This gives + // a potentially less efficient searching, but is a safe approximation. + // For needles using only characters in the same Unicode 256-code point page, + // there is no search speed degradation. + static const int kLatin1AlphabetSize = 256; + static const int kUC16AlphabetSize = 256; + + // Bad-char shift table stored in the state. It's length is the alphabet size. + // For patterns below this length, the skip length of Boyer-Moore is too short + // to compensate for the algorithmic overhead compared to simple brute force. + static const int kBMMinPatternLength = 8; + + // Store for the BoyerMoore(Horspool) bad char shift table. + int bad_char_shift_table_[kUC16AlphabetSize]; + // Store for the BoyerMoore good suffix shift table. + int good_suffix_shift_table_[kBMMaxShift + 1]; + // Table used temporarily while building the BoyerMoore good suffix + // shift table. + int suffix_table_[kBMMaxShift + 1]; +}; + +template +class StringSearch : private StringSearchBase { + public: + typedef stringsearch::Vector Vector; + + explicit StringSearch(Vector pattern) : pattern_(pattern), start_(0) { + if (pattern.length() >= kBMMaxShift) { + start_ = pattern.length() - kBMMaxShift; + } + + size_t pattern_length = pattern_.length(); + NBYTES_ASSERT_TRUE(pattern_length > 0); + if (pattern_length < kBMMinPatternLength) { + if (pattern_length == 1) { + strategy_ = SearchStrategy::kSingleChar; + return; + } + strategy_ = SearchStrategy::kLinear; + return; + } + strategy_ = SearchStrategy::kInitial; + } + + size_t Search(Vector subject, size_t index) { + switch (strategy_) { + case kBoyerMooreHorspool: + return BoyerMooreHorspoolSearch(subject, index); + case kBoyerMoore: + return BoyerMooreSearch(subject, index); + case kInitial: + return InitialSearch(subject, index); + case kLinear: + return LinearSearch(subject, index); + case kSingleChar: + return SingleCharSearch(subject, index); + } + unreachable(); + } + + static inline int AlphabetSize() { + if (sizeof(Char) == 1) { + // Latin1 needle. + return kLatin1AlphabetSize; + } else { + // UC16 needle. + return kUC16AlphabetSize; + } + + static_assert( + sizeof(Char) == sizeof(uint8_t) || sizeof(Char) == sizeof(uint16_t), + "sizeof(Char) == sizeof(uint16_t) || sizeof(uint8_t)"); + } + + private: + typedef size_t (StringSearch::*SearchFunction)(Vector, size_t); + size_t SingleCharSearch(Vector subject, size_t start_index); + size_t LinearSearch(Vector subject, size_t start_index); + size_t InitialSearch(Vector subject, size_t start_index); + size_t BoyerMooreHorspoolSearch(Vector subject, size_t start_index); + size_t BoyerMooreSearch(Vector subject, size_t start_index); + + void PopulateBoyerMooreHorspoolTable(); + + void PopulateBoyerMooreTable(); + + static inline int CharOccurrence(int *bad_char_occurrence, Char char_code) { + if (sizeof(Char) == 1) { + return bad_char_occurrence[static_cast(char_code)]; + } + // Both pattern and subject are UC16. Reduce character to equivalence class. + int equiv_class = char_code % kUC16AlphabetSize; + return bad_char_occurrence[equiv_class]; + } + + enum SearchStrategy { + kBoyerMooreHorspool, + kBoyerMoore, + kInitial, + kLinear, + kSingleChar, + }; + + // The pattern to search for. + Vector pattern_; + SearchStrategy strategy_; + // Cache value of Max(0, pattern_length() - kBMMaxShift) + size_t start_; +}; + +inline uint8_t GetHighestValueByte(uint16_t character) { + return std::max(static_cast(character & 0xFF), + static_cast(character >> 8)); +} + +inline uint8_t GetHighestValueByte(uint8_t character) { return character; } + +// Searches for a byte value in a memory buffer, back to front. +// Uses memrchr(3) on systems which support it, for speed. +// Falls back to a vanilla for loop on non-GNU systems such as Windows. +inline const void *MemrchrFill(const void *haystack, uint8_t needle, + size_t haystack_len) { +#ifdef _GNU_SOURCE + return memrchr(haystack, needle, haystack_len); +#else + const uint8_t *haystack8 = static_cast(haystack); + for (size_t i = haystack_len - 1; i != static_cast(-1); i--) { + if (haystack8[i] == needle) { + return haystack8 + i; + } + } + return nullptr; +#endif +} + +// Finds the first occurrence of *two-byte* character pattern[0] in the string +// `subject`. Does not check that the whole pattern matches. +template +inline size_t FindFirstCharacter(Vector pattern, + Vector subject, size_t index) { + const Char pattern_first_char = pattern[0]; + const size_t max_n = (subject.length() - pattern.length() + 1); + + // For speed, search for the more `rare` of the two bytes in pattern[0] + // using memchr / memrchr (which are much faster than a simple for loop). + const uint8_t search_byte = GetHighestValueByte(pattern_first_char); + size_t pos = index; + do { + const size_t bytes_to_search = (max_n - pos) * sizeof(Char); + const void *void_pos; + if (subject.forward()) { + // Assert that bytes_to_search won't overflow + NBYTES_ASSERT_TRUE(pos <= max_n); + NBYTES_ASSERT_TRUE(max_n - pos <= SIZE_MAX / sizeof(Char)); + void_pos = memchr(subject.start() + pos, search_byte, bytes_to_search); + } else { + NBYTES_ASSERT_TRUE(pos <= subject.length()); + NBYTES_ASSERT_TRUE(subject.length() - pos <= SIZE_MAX / sizeof(Char)); + void_pos = MemrchrFill(subject.start() + pattern.length() - 1, + search_byte, bytes_to_search); + } + const Char *char_pos = static_cast(void_pos); + if (char_pos == nullptr) return subject.length(); + + // Then, for each match, verify that the full two bytes match pattern[0]. + char_pos = AlignDown(char_pos, sizeof(Char)); + size_t raw_pos = static_cast(char_pos - subject.start()); + pos = subject.forward() ? raw_pos : (subject.length() - raw_pos - 1); + if (subject[pos] == pattern_first_char) { + // Match found, hooray. + return pos; + } + // Search byte matched, but the other byte of pattern[0] didn't. Keep going. + } while (++pos < max_n); + + return subject.length(); +} + +// Finds the first occurrence of the byte pattern[0] in string `subject`. +// Does not verify that the whole pattern matches. +template <> +inline size_t FindFirstCharacter(Vector pattern, + Vector subject, size_t index) { + const uint8_t pattern_first_char = pattern[0]; + const size_t subj_len = subject.length(); + const size_t max_n = (subject.length() - pattern.length() + 1); + + const void *pos; + if (subject.forward()) { + pos = memchr(subject.start() + index, pattern_first_char, max_n - index); + } else { + pos = MemrchrFill(subject.start() + pattern.length() - 1, + pattern_first_char, max_n - index); + } + const uint8_t *char_pos = static_cast(pos); + if (char_pos == nullptr) { + return subj_len; + } + + size_t raw_pos = static_cast(char_pos - subject.start()); + return subject.forward() ? raw_pos : (subj_len - raw_pos - 1); +} + +//--------------------------------------------------------------------- +// Single Character Pattern Search Strategy +//--------------------------------------------------------------------- + +template +size_t StringSearch::SingleCharSearch(Vector subject, size_t index) { + NBYTES_ASSERT_TRUE(1 == pattern_.length()); + return FindFirstCharacter(pattern_, subject, index); +} + +//--------------------------------------------------------------------- +// Linear Search Strategy +//--------------------------------------------------------------------- + +// Simple linear search for short patterns. Never bails out. +template +size_t StringSearch::LinearSearch(Vector subject, size_t index) { + NBYTES_ASSERT_TRUE(pattern_.length() > 1); + const size_t n = subject.length() - pattern_.length(); + for (size_t i = index; i <= n; i++) { + i = FindFirstCharacter(pattern_, subject, i); + if (i == subject.length()) return subject.length(); + NBYTES_ASSERT_TRUE(i <= n); + + bool matches = true; + for (size_t j = 1; j < pattern_.length(); j++) { + if (pattern_[j] != subject[i + j]) { + matches = false; + break; + } + } + if (matches) { + return i; + } + } + return subject.length(); +} + +//--------------------------------------------------------------------- +// Boyer-Moore string search +//--------------------------------------------------------------------- + +template +size_t StringSearch::BoyerMooreSearch(Vector subject, + size_t start_index) { + const size_t subject_length = subject.length(); + const size_t pattern_length = pattern_.length(); + // Only preprocess at most kBMMaxShift last characters of pattern. + size_t start = start_; + + int *bad_char_occurrence = bad_char_shift_table_; + int *good_suffix_shift = good_suffix_shift_table_ - start_; + + Char last_char = pattern_[pattern_length - 1]; + size_t index = start_index; + // Continue search from i. + while (index <= subject_length - pattern_length) { + size_t j = pattern_length - 1; + int c; + while (last_char != (c = subject[index + j])) { + int shift = j - CharOccurrence(bad_char_occurrence, c); + index += shift; + if (index > subject_length - pattern_length) { + return subject.length(); + } + } + while (pattern_[j] == (c = subject[index + j])) { + if (j == 0) { + return index; + } + j--; + } + if (j < start) { + // we have matched more than our tables allow us to be smart about. + // Fall back on BMH shift. + index += + pattern_length - 1 - CharOccurrence(bad_char_occurrence, last_char); + } else { + int gs_shift = good_suffix_shift[j + 1]; + int bc_occ = CharOccurrence(bad_char_occurrence, c); + int shift = j - bc_occ; + if (gs_shift > shift) { + shift = gs_shift; + } + index += shift; + } + } + + return subject.length(); +} + +template +void StringSearch::PopulateBoyerMooreTable() { + const size_t pattern_length = pattern_.length(); + // Only look at the last kBMMaxShift characters of pattern (from start_ + // to pattern_length). + const size_t start = start_; + const size_t length = pattern_length - start; + + // Biased tables so that we can use pattern indices as table indices, + // even if we only cover the part of the pattern from offset start. + int *shift_table = good_suffix_shift_table_ - start_; + int *suffix_table = suffix_table_ - start_; + + // Initialize table. + for (size_t i = start; i < pattern_length; i++) { + shift_table[i] = length; + } + shift_table[pattern_length] = 1; + suffix_table[pattern_length] = pattern_length + 1; + + if (pattern_length <= start) { + return; + } + + // Find suffixes. + Char last_char = pattern_[pattern_length - 1]; + size_t suffix = pattern_length + 1; + { + size_t i = pattern_length; + while (i > start) { + Char c = pattern_[i - 1]; + while (suffix <= pattern_length && c != pattern_[suffix - 1]) { + if (static_cast(shift_table[suffix]) == length) { + shift_table[suffix] = suffix - i; + } + suffix = suffix_table[suffix]; + } + suffix_table[--i] = --suffix; + if (suffix == pattern_length) { + // No suffix to extend, so we check against last_char only. + while ((i > start) && (pattern_[i - 1] != last_char)) { + if (static_cast(shift_table[pattern_length]) == length) { + shift_table[pattern_length] = pattern_length - i; + } + suffix_table[--i] = pattern_length; + } + if (i > start) { + suffix_table[--i] = --suffix; + } + } + } + } + // Build shift table using suffixes. + if (suffix < pattern_length) { + for (size_t i = start; i <= pattern_length; i++) { + if (static_cast(shift_table[i]) == length) { + shift_table[i] = suffix - start; + } + if (i == suffix) { + suffix = suffix_table[suffix]; + } + } + } +} + +//--------------------------------------------------------------------- +// Boyer-Moore-Horspool string search. +//--------------------------------------------------------------------- + +template +size_t StringSearch::BoyerMooreHorspoolSearch(Vector subject, + size_t start_index) { + const size_t subject_length = subject.length(); + const size_t pattern_length = pattern_.length(); + int *char_occurrences = bad_char_shift_table_; + int64_t badness = -static_cast(pattern_length); + + // How bad we are doing without a good-suffix table. + Char last_char = pattern_[pattern_length - 1]; + int last_char_shift = + pattern_length - 1 - CharOccurrence(char_occurrences, last_char); + + // Perform search + size_t index = start_index; // No matches found prior to this index. + while (index <= subject_length - pattern_length) { + size_t j = pattern_length - 1; + int subject_char; + while (last_char != (subject_char = subject[index + j])) { + int bc_occ = CharOccurrence(char_occurrences, subject_char); + int shift = j - bc_occ; + index += shift; + badness += 1 - shift; // at most zero, so badness cannot increase. + if (index > subject_length - pattern_length) { + return subject_length; + } + } + j--; + while (pattern_[j] == (subject[index + j])) { + if (j == 0) { + return index; + } + j--; + } + index += last_char_shift; + // Badness increases by the number of characters we have + // checked, and decreases by the number of characters we + // can skip by shifting. It's a measure of how we are doing + // compared to reading each character exactly once. + badness += (pattern_length - j) - last_char_shift; + if (badness > 0) { + PopulateBoyerMooreTable(); + strategy_ = SearchStrategy::kBoyerMoore; + return BoyerMooreSearch(subject, index); + } + } + return subject.length(); +} + +template +void StringSearch::PopulateBoyerMooreHorspoolTable() { + const size_t pattern_length = pattern_.length(); + + int *bad_char_occurrence = bad_char_shift_table_; + + // Only preprocess at most kBMMaxShift last characters of pattern. + const size_t start = start_; + // Run forwards to populate bad_char_table, so that *last* instance + // of character equivalence class is the one registered. + // Notice: Doesn't include the last character. + const size_t table_size = AlphabetSize(); + if (start == 0) { + // All patterns less than kBMMaxShift in length. + memset(bad_char_occurrence, -1, table_size * sizeof(*bad_char_occurrence)); + } else { + for (size_t i = 0; i < table_size; i++) { + bad_char_occurrence[i] = start - 1; + } + } + for (size_t i = start; i < pattern_length - 1; i++) { + Char c = pattern_[i]; + int bucket = (sizeof(Char) == 1) ? c : c % AlphabetSize(); + bad_char_occurrence[bucket] = i; + } +} + +//--------------------------------------------------------------------- +// Linear string search with bailout to BMH. +//--------------------------------------------------------------------- + +// Simple linear search for short patterns, which bails out if the string +// isn't found very early in the subject. Upgrades to BoyerMooreHorspool. +template +size_t StringSearch::InitialSearch(Vector subject, size_t index) { + const size_t pattern_length = pattern_.length(); + // Badness is a count of how much work we have done. When we have + // done enough work we decide it's probably worth switching to a better + // algorithm. + int64_t badness = -10 - (pattern_length << 2); + + // We know our pattern is at least 2 characters, we cache the first so + // the common case of the first character not matching is faster. + for (size_t i = index, n = subject.length() - pattern_length; i <= n; i++) { + badness++; + if (badness <= 0) { + i = FindFirstCharacter(pattern_, subject, i); + if (i == subject.length()) return subject.length(); + NBYTES_ASSERT_TRUE(i <= n); + size_t j = 1; + do { + if (pattern_[j] != subject[i + j]) { + break; + } + j++; + } while (j < pattern_length); + if (j == pattern_length) { + return i; + } + badness += j; + } else { + PopulateBoyerMooreHorspoolTable(); + strategy_ = SearchStrategy::kBoyerMooreHorspool; + return BoyerMooreHorspoolSearch(subject, i); + } + } + return subject.length(); +} + +// Perform a single stand-alone search. +// If searching multiple times for the same pattern, a search +// object should be constructed once and the Search function then called +// for each search. +template +size_t SearchString(Vector subject, Vector pattern, + size_t start_index) { + StringSearch search(pattern); + return search.Search(subject, start_index); +} +} // namespace stringsearch + +template +size_t SearchString(const Char *haystack, size_t haystack_length, + const Char *needle, size_t needle_length, + size_t start_index, bool is_forward) { + if (haystack_length < needle_length) return haystack_length; + // To do a reverse search (lastIndexOf instead of indexOf) without redundant + // code, create two vectors that are reversed views into the input strings. + // For example, v_needle[0] would return the *last* character of the needle. + // So we're searching for the first instance of rev(needle) in rev(haystack) + stringsearch::Vector v_needle(needle, needle_length, is_forward); + stringsearch::Vector v_haystack(haystack, haystack_length, + is_forward); + size_t diff = haystack_length - needle_length; + size_t relative_start_index; + if (is_forward) { + relative_start_index = start_index; + } else if (diff < start_index) { + relative_start_index = 0; + } else { + relative_start_index = diff - start_index; + } + size_t pos = + stringsearch::SearchString(v_haystack, v_needle, relative_start_index); + if (pos == haystack_length) { + // not found + return pos; + } + return is_forward ? pos : (haystack_length - needle_length - pos); +} + +template +size_t SearchString(const char *haystack, size_t haystack_length, + const char (&needle)[N]) { + return SearchString( + reinterpret_cast(haystack), haystack_length, + reinterpret_cast(needle), N - 1, 0, true); +} + +// ============================================================================ +// Version metadata +#define NBYTES_VERSION "0.0.1" + +enum { + NBYTES_VERSION_MAJOR = 0, + NBYTES_VERSION_MINOR = 0, + NBYTES_VERSION_REVISION = 1, +}; + +} // namespace nbytes + +#endif // NBYTES_H diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..5e107c7 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,38 @@ +[project] +name = "nbytes" +requires-python = ">=3.12" + +[tool.ruff] +line-length = 120 +target-version = "py312" + +[tool.ruff.format] +quote-style = "single" +indent-style = "space" +docstring-code-format = true + +[tool.ruff.lint] +select = [ + "C90", # McCabe cyclomatic complexity + "E", # pycodestyle + "F", # Pyflakes + "ICN", # flake8-import-conventions + "INT", # flake8-gettext + "PLC", # Pylint conventions + "PLE", # Pylint errors + "PLR09", # Pylint refactoring: max-args, max-branches, max returns, max-statements + "PYI", # flake8-pyi + "RSE", # flake8-raise + "RUF", # Ruff-specific rules + "T10", # flake8-debugger + "TCH", # flake8-type-checking + "TID", # flake8-tidy-imports + "W", # pycodestyle + "YTT", # flake8-2020 + "ANN" # flake8-annotations +] +ignore = [ + "E722", # Do not use bare `except` + "ANN101", # Missing type annotation for self in method + "TID252", # Prefer absolute imports over relative imports from parent modules +] diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..554fb4a --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,6 @@ +add_library(nbytes nbytes.cpp) +target_include_directories(nbytes + PUBLIC + $ + $ +) diff --git a/src/nbytes.cpp b/src/nbytes.cpp new file mode 100644 index 0000000..149f216 --- /dev/null +++ b/src/nbytes.cpp @@ -0,0 +1,238 @@ +#include "nbytes.h" +#include +#include +#include + +namespace nbytes { + +// ============================================================================ +// Byte Swapping + +namespace { +// These are defined by or on some systems. +// To avoid warnings, undefine them before redefining them. +#ifdef BSWAP_2 +#undef BSWAP_2 +#endif +#ifdef BSWAP_4 +#undef BSWAP_4 +#endif +#ifdef BSWAP_8 +#undef BSWAP_8 +#endif + +#if defined(_MSC_VER) +#include +#define BSWAP_2(x) _byteswap_ushort(x) +#define BSWAP_4(x) _byteswap_ulong(x) +#define BSWAP_8(x) _byteswap_uint64(x) +#else +#define BSWAP_2(x) ((x) << 8) | ((x) >> 8) +#define BSWAP_4(x) \ + (((x) & 0xFF) << 24) | (((x) & 0xFF00) << 8) | (((x) >> 8) & 0xFF00) | \ + (((x) >> 24) & 0xFF) +#define BSWAP_8(x) \ + (((x) & 0xFF00000000000000ull) >> 56) | \ + (((x) & 0x00FF000000000000ull) >> 40) | \ + (((x) & 0x0000FF0000000000ull) >> 24) | \ + (((x) & 0x000000FF00000000ull) >> 8) | \ + (((x) & 0x00000000FF000000ull) << 8) | \ + (((x) & 0x0000000000FF0000ull) << 24) | \ + (((x) & 0x000000000000FF00ull) << 40) | \ + (((x) & 0x00000000000000FFull) << 56) +#endif +} // namespace + +bool SwapBytes16(void *data, size_t nbytes) { + if (nbytes % sizeof(uint16_t) != 0) return false; + +#if defined(_MSC_VER) + if (AlignUp(data, sizeof(uint16_t)) == data) { + // MSVC has no strict aliasing, and is able to highly optimize this case. + uint16_t *data16 = reinterpret_cast(data); + size_t len16 = nbytes / sizeof(uint16_t); + for (size_t i = 0; i < len16; i++) { + data16[i] = BSWAP_2(data16[i]); + } + return true; + } +#endif + + uint16_t temp; + uint8_t *ptr = reinterpret_cast(data); + for (size_t i = 0; i < nbytes; i += sizeof(uint16_t)) { + memcpy(&temp, &ptr[i], sizeof(uint16_t)); + temp = BSWAP_2(temp); + memcpy(&ptr[i], &temp, sizeof(uint16_t)); + } + + return true; +} + +bool SwapBytes32(void *data, size_t nbytes) { + if (nbytes % sizeof(uint32_t) != 0) return false; + +#if defined(_MSC_VER) + // MSVC has no strict aliasing, and is able to highly optimize this case. + if (AlignUp(data, sizeof(uint32_t)) == data) { + uint32_t *data32 = reinterpret_cast(data); + size_t len32 = nbytes / sizeof(uint32_t); + for (size_t i = 0; i < len32; i++) { + data32[i] = BSWAP_4(data32[i]); + } + return true; + } +#endif + + uint32_t temp = 0; + uint8_t *ptr = reinterpret_cast(data); + for (size_t i = 0; i < nbytes; i += sizeof(uint32_t)) { + memcpy(&temp, &ptr[i], sizeof(uint32_t)); + temp = BSWAP_4(temp); + memcpy(&ptr[i], &temp, sizeof(uint32_t)); + } + + return true; +} + +bool SwapBytes64(void *data, size_t nbytes) { + if (nbytes % sizeof(uint64_t) != 0) return false; + +#if defined(_MSC_VER) + if (AlignUp(data, sizeof(uint64_t)) == data) { + // MSVC has no strict aliasing, and is able to highly optimize this case. + uint64_t *data64 = reinterpret_cast(data); + size_t len64 = nbytes / sizeof(uint64_t); + for (size_t i = 0; i < len64; i++) { + data64[i] = BSWAP_8(data64[i]); + } + return true; + } +#endif + + uint64_t temp = 0; + uint8_t *ptr = reinterpret_cast(data); + for (size_t i = 0; i < nbytes; i += sizeof(uint64_t)) { + memcpy(&temp, &ptr[i], sizeof(uint64_t)); + temp = BSWAP_8(temp); + memcpy(&ptr[i], &temp, sizeof(uint64_t)); + } + + return true; +} + +// ============================================================================ +// Base64 (legacy) + +// supports regular and URL-safe base64 +const int8_t unbase64_table[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, -2, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, + 63, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, + 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1}; + +// ============================================================================ +// Hex + +const int8_t unhex_table[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, + 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1}; + +size_t HexEncode(const char *src, size_t slen, char *dst, size_t dlen) { + // We know how much we'll write, just make sure that there's space. + NBYTES_ASSERT_TRUE(dlen >= MultiplyWithOverflowCheck(slen, 2u) && + "not enough space provided for hex encode"); + + dlen = slen * 2; + for (size_t i = 0, k = 0; k < dlen; i += 1, k += 2) { + static const char hex[] = "0123456789abcdef"; + uint8_t val = static_cast(src[i]); + dst[k + 0] = hex[val >> 4]; + dst[k + 1] = hex[val & 15]; + } + + return dlen; +} + +std::string HexEncode(const char *src, size_t slen) { + size_t dlen = slen * 2; + std::string dst(dlen, '\0'); + HexEncode(src, slen, dst.data(), dlen); + return dst; +} + +// ============================================================================ + +void ForceAsciiSlow(const char *src, char *dst, size_t len) { + for (size_t i = 0; i < len; ++i) { + dst[i] = src[i] & 0x7f; + } +} + +void ForceAscii(const char *src, char *dst, size_t len) { + if (len < 16) { + ForceAsciiSlow(src, dst, len); + return; + } + + const unsigned bytes_per_word = sizeof(uintptr_t); + const unsigned align_mask = bytes_per_word - 1; + const unsigned src_unalign = reinterpret_cast(src) & align_mask; + const unsigned dst_unalign = reinterpret_cast(dst) & align_mask; + + if (src_unalign > 0) { + if (src_unalign == dst_unalign) { + const unsigned unalign = bytes_per_word - src_unalign; + ForceAsciiSlow(src, dst, unalign); + src += unalign; + dst += unalign; + len -= src_unalign; + } else { + ForceAsciiSlow(src, dst, len); + return; + } + } + +#if defined(_WIN64) || defined(_LP64) + const uintptr_t mask = ~0x8080808080808080ll; +#else + const uintptr_t mask = ~0x80808080l; +#endif + + const uintptr_t *srcw = reinterpret_cast(src); + uintptr_t *dstw = reinterpret_cast(dst); + + for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) { + dstw[i] = srcw[i] & mask; + } + + const unsigned remainder = len & align_mask; + if (remainder > 0) { + const size_t offset = len - remainder; + ForceAsciiSlow(src + offset, dst + offset, remainder); + } +} + +} // namespace nbytes diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..00fc8cb --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,10 @@ +include(GoogleTest) +include(CTest) +add_executable(basic basic.cpp) +target_link_libraries( + basic + GTest::gtest_main +) +target_link_libraries(basic nbytes) +add_test(basic_test basic) +gtest_discover_tests(basic) diff --git a/tests/basic.cpp b/tests/basic.cpp new file mode 100644 index 0000000..8f0a863 --- /dev/null +++ b/tests/basic.cpp @@ -0,0 +1,7 @@ +#include +#include +#include + +#include + +TEST(basic, it_works) { SUCCEED(); } diff --git a/tools/run-clang-format.sh b/tools/run-clang-format.sh new file mode 100755 index 0000000..ce1a664 --- /dev/null +++ b/tools/run-clang-format.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +# Copyright 2023 Yagiz Nizipli and Daniel Lemire + +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +set -e +COMMAND=$* +SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" +MAINSOURCE=$SCRIPTPATH/.. +ALL_FILES=$(cd $MAINSOURCE && git ls-tree --full-tree --name-only -r HEAD | grep -e ".*\.\(c\|h\|cc\|cpp\|hh\)\$") + +if clang-format-17 --version 2>/dev/null | grep -qF 'version 17.'; then + cd $MAINSOURCE; clang-format-17 --style=file --verbose -i "$@" $ALL_FILES + exit 0 +elif clang-format --version 2>/dev/null | grep -qF 'version 17.'; then + cd $MAINSOURCE; clang-format --style=file --verbose -i "$@" $ALL_FILES + exit 0 +fi +echo "Trying to use docker" +command -v docker >/dev/null 2>&1 || { echo >&2 "Please install docker. E.g., go to https://www.docker.com/products/docker-desktop Type 'docker' to diagnose the problem."; exit 1; } +docker info >/dev/null 2>&1 || { echo >&2 "Docker server is not running? type 'docker info'."; exit 1; } + +if [ -t 0 ]; then DOCKER_ARGS=-it; fi +docker pull kszonek/clang-format-17 + +docker run --rm $DOCKER_ARGS -v "$MAINSOURCE":"$MAINSOURCE":Z -w "$MAINSOURCE" -u "$(id -u $USER):$(id -g $USER)" kszonek/clang-format-17 --style=file --verbose -i "$@" $ALL_FILES