From 1832617f950b8d4648dbe63dfb7fa8d268343395 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 11 Aug 2023 05:25:02 -0400
Subject: [PATCH 001/126] initial build

---
 pandas/_libs/arrays.pyx                       |   45 +-
 .../_libs/include/pandas/vendored/nanoarrow.h | 3371 +++++++++++++++++
 pandas/_libs/meson.build                      |    2 +-
 pandas/core/arrays/masked.py                  |  100 +-
 4 files changed, 3473 insertions(+), 45 deletions(-)
 create mode 100644 pandas/_libs/include/pandas/vendored/nanoarrow.h

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 718fb358e26bc..4b7c86a067fa5 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -7,10 +7,26 @@ import numpy as np
 
 cimport numpy as cnp
 from cpython cimport PyErr_Clear
-from numpy cimport ndarray
+from numpy cimport (
+    int8_t,
+    int64_t,
+    ndarray,
+    uint8_t,
+)
 
 cnp.import_array()
 
+from libc.stdlib cimport (
+    free,
+    malloc,
+)
+
+
+cdef extern from "pandas/vendored/nanoarrow.h":
+    int8_t ArrowBitGet(const uint8_t*, int64_t)
+    void ArrowBitSet(uint8_t*, int64_t)
+    void ArrowBitClear(uint8_t*, int64_t)
+
 
 @cython.freelist(16)
 cdef class NDArrayBacked:
@@ -189,3 +205,30 @@ cdef class NDArrayBacked:
         new_values = [obj._ndarray for obj in to_concat]
         new_arr = cnp.PyArray_Concatenate(new_values, axis)
         return to_concat[0]._from_backing_data(new_arr)
+
+
+cdef class BitMaskArray:
+    cdef array_len
+    cdef uint8_t* validity_buffer
+
+    def __cinit__(self, np_array):
+        self.array_len = len(np_array)
+        nbytes = len(np_array) // 8 + 1
+        self.validity_buffer = <uint8_t *>malloc(nbytes)
+        # malloc
+
+    def __dealloc__(self):
+        ...
+        free(self.validity_buffer)
+
+    def __setitem__(self, key, value):
+        if value:
+            ArrowBitSet(self.validity_buffer, key)
+        else:
+            ArrowBitClear(self.validity_buffer, key)
+
+    def __getitem__(self, key):
+        bool(ArrowBitGet(self.validity_buffer, key))
+
+    def to_numpy(self):
+        ...
diff --git a/pandas/_libs/include/pandas/vendored/nanoarrow.h b/pandas/_libs/include/pandas/vendored/nanoarrow.h
new file mode 100644
index 0000000000000..666dea1448326
--- /dev/null
+++ b/pandas/_libs/include/pandas/vendored/nanoarrow.h
@@ -0,0 +1,3371 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef NANOARROW_BUILD_ID_H_INCLUDED
+#define NANOARROW_BUILD_ID_H_INCLUDED
+
+#define NANOARROW_VERSION_MAJOR 0
+#define NANOARROW_VERSION_MINOR 3
+#define NANOARROW_VERSION_PATCH 0
+#define NANOARROW_VERSION "0.3.0-SNAPSHOT"
+
+#define NANOARROW_VERSION_INT                                        \
+  (NANOARROW_VERSION_MAJOR * 10000 + NANOARROW_VERSION_MINOR * 100 + \
+   NANOARROW_VERSION_PATCH)
+
+// #define NANOARROW_NAMESPACE YourNamespaceHere
+
+#endif
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef NANOARROW_NANOARROW_TYPES_H_INCLUDED
+#define NANOARROW_NANOARROW_TYPES_H_INCLUDED
+
+#include <stdint.h>
+#include <string.h>
+
+
+
+#if defined(NANOARROW_DEBUG) && !defined(NANOARROW_PRINT_AND_DIE)
+#include <stdio.h>
+#include <stdlib.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Extra guard for versions of Arrow without the canonical guard
+#ifndef ARROW_FLAG_DICTIONARY_ORDERED
+
+/// \defgroup nanoarrow-arrow-cdata Arrow C Data interface
+///
+/// The Arrow C Data (https://arrow.apache.org/docs/format/CDataInterface.html)
+/// and Arrow C Stream (https://arrow.apache.org/docs/format/CStreamInterface.html)
+/// interfaces are part of the
+/// Arrow Columnar Format specification
+/// (https://arrow.apache.org/docs/format/Columnar.html). See the Arrow documentation for
+/// documentation of these structures.
+///
+/// @{
+
+#ifndef ARROW_C_DATA_INTERFACE
+#define ARROW_C_DATA_INTERFACE
+
+#define ARROW_FLAG_DICTIONARY_ORDERED 1
+#define ARROW_FLAG_NULLABLE 2
+#define ARROW_FLAG_MAP_KEYS_SORTED 4
+
+struct ArrowSchema {
+  // Array type description
+  const char* format;
+  const char* name;
+  const char* metadata;
+  int64_t flags;
+  int64_t n_children;
+  struct ArrowSchema** children;
+  struct ArrowSchema* dictionary;
+
+  // Release callback
+  void (*release)(struct ArrowSchema*);
+  // Opaque producer-specific data
+  void* private_data;
+};
+
+struct ArrowArray {
+  // Array data description
+  int64_t length;
+  int64_t null_count;
+  int64_t offset;
+  int64_t n_buffers;
+  int64_t n_children;
+  const void** buffers;
+  struct ArrowArray** children;
+  struct ArrowArray* dictionary;
+
+  // Release callback
+  void (*release)(struct ArrowArray*);
+  // Opaque producer-specific data
+  void* private_data;
+};
+
+#endif  // ARROW_C_DATA_INTERFACE
+
+#ifndef ARROW_C_STREAM_INTERFACE
+#define ARROW_C_STREAM_INTERFACE
+
+struct ArrowArrayStream {
+  // Callback to get the stream type
+  // (will be the same for all arrays in the stream).
+  //
+  // Return value: 0 if successful, an `errno`-compatible error code otherwise.
+  //
+  // If successful, the ArrowSchema must be released independently from the stream.
+  int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
+
+  // Callback to get the next array
+  // (if no error and the array is released, the stream has ended)
+  //
+  // Return value: 0 if successful, an `errno`-compatible error code otherwise.
+  //
+  // If successful, the ArrowArray must be released independently from the stream.
+  int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
+
+  // Callback to get optional detailed error information.
+  // This must only be called if the last stream operation failed
+  // with a non-0 return code.
+  //
+  // Return value: pointer to a null-terminated character array describing
+  // the last error, or NULL if no description is available.
+  //
+  // The returned pointer is only valid until the next operation on this stream
+  // (including release).
+  const char* (*get_last_error)(struct ArrowArrayStream*);
+
+  // Release callback: release the stream's own resources.
+  // Note that arrays returned by `get_next` must be individually released.
+  void (*release)(struct ArrowArrayStream*);
+
+  // Opaque producer-specific data
+  void* private_data;
+};
+
+#endif  // ARROW_C_STREAM_INTERFACE
+#endif  // ARROW_FLAG_DICTIONARY_ORDERED
+
+/// \brief Move the contents of src into dst and set src->release to NULL
+static inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dst) {
+  memcpy(dst, src, sizeof(struct ArrowSchema));
+  src->release = NULL;
+}
+
+/// \brief Move the contents of src into dst and set src->release to NULL
+static inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dst) {
+  memcpy(dst, src, sizeof(struct ArrowArray));
+  src->release = NULL;
+}
+
+/// \brief Move the contents of src into dst and set src->release to NULL
+static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src,
+                                        struct ArrowArrayStream* dst) {
+  memcpy(dst, src, sizeof(struct ArrowArrayStream));
+  src->release = NULL;
+}
+
+/// @}
+
+// Utility macros
+#define _NANOARROW_CONCAT(x, y) x##y
+#define _NANOARROW_MAKE_NAME(x, y) _NANOARROW_CONCAT(x, y)
+
+#define _NANOARROW_RETURN_NOT_OK_IMPL(NAME, EXPR) \
+  do {                                            \
+    const int NAME = (EXPR);                      \
+    if (NAME) return NAME;                        \
+  } while (0)
+
+#define _NANOARROW_CHECK_RANGE(x_, min_, max_) \
+  NANOARROW_RETURN_NOT_OK((x_ >= min_ && x_ <= max_) ? NANOARROW_OK : EINVAL)
+
+#define _NANOARROW_CHECK_UPPER_LIMIT(x_, max_) \
+  NANOARROW_RETURN_NOT_OK((x_ <= max_) ? NANOARROW_OK : EINVAL)
+
+#if defined(NANOARROW_DEBUG)
+#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR, EXPR_STR) \
+  do {                                                                                 \
+    const int NAME = (EXPR);                                                           \
+    if (NAME) {                                                                        \
+      ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d\n* %s:%d", EXPR_STR,    \
+                    NAME, __FILE__, __LINE__);                                         \
+      return NAME;                                                                     \
+    }                                                                                  \
+  } while (0)
+#else
+#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR, EXPR_STR) \
+  do {                                                                                 \
+    const int NAME = (EXPR);                                                           \
+    if (NAME) {                                                                        \
+      ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d", EXPR_STR, NAME);      \
+      return NAME;                                                                     \
+    }                                                                                  \
+  } while (0)
+#endif
+
+/// \brief Return code for success.
+/// \ingroup nanoarrow-errors
+#define NANOARROW_OK 0
+
+/// \brief Represents an errno-compatible error code
+/// \ingroup nanoarrow-errors
+typedef int ArrowErrorCode;
+
+/// \brief Check the result of an expression and return it if not NANOARROW_OK
+/// \ingroup nanoarrow-errors
+#define NANOARROW_RETURN_NOT_OK(EXPR) \
+  _NANOARROW_RETURN_NOT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR)
+
+/// \brief Check the result of an expression and return it if not NANOARROW_OK,
+/// adding an auto-generated message to an ArrowError.
+/// \ingroup nanoarrow-errors
+///
+/// This macro is used to ensure that functions that accept an ArrowError
+/// as input always set its message when returning an error code (e.g., when calling
+/// a nanoarrow function that does *not* accept ArrowError).
+#define NANOARROW_RETURN_NOT_OK_WITH_ERROR(EXPR, ERROR_EXPR) \
+  _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(                  \
+      _NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, ERROR_EXPR, #EXPR)
+
+#if defined(NANOARROW_DEBUG) && !defined(NANOARROW_PRINT_AND_DIE)
+#define NANOARROW_PRINT_AND_DIE(VALUE, EXPR_STR)                                  \
+  do {                                                                            \
+    fprintf(stderr, "%s failed with errno %d\n* %s:%d\n", EXPR_STR, (int)(VALUE), \
+            __FILE__, (int)__LINE__);                                             \
+    abort();                                                                      \
+  } while (0)
+#endif
+
+#if defined(NANOARROW_DEBUG)
+#define _NANOARROW_ASSERT_OK_IMPL(NAME, EXPR, EXPR_STR) \
+  do {                                                  \
+    const int NAME = (EXPR);                            \
+    if (NAME) NANOARROW_PRINT_AND_DIE(NAME, EXPR_STR);  \
+  } while (0)
+
+/// \brief Assert that an expression's value is NANOARROW_OK
+/// \ingroup nanoarrow-errors
+///
+/// If nanoarrow was built in debug mode (i.e., defined(NANOARROW_DEBUG) is true),
+/// print a message to stderr and abort. If nanoarrow was bulit in release mode,
+/// this statement has no effect. You can customize fatal error behaviour
+/// be defining the NANOARROW_PRINT_AND_DIE macro before including nanoarrow.h
+/// This macro is provided as a convenience for users and is not used internally.
+#define NANOARROW_ASSERT_OK(EXPR) \
+  _NANOARROW_ASSERT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, #EXPR)
+#else
+#define NANOARROW_ASSERT_OK(EXPR) EXPR
+#endif
+
+static char _ArrowIsLittleEndian(void) {
+  uint32_t check = 1;
+  char first_byte;
+  memcpy(&first_byte, &check, sizeof(char));
+  return first_byte;
+}
+
+/// \brief Arrow type enumerator
+/// \ingroup nanoarrow-utils
+///
+/// These names are intended to map to the corresponding arrow::Type::type
+/// enumerator; however, the numeric values are specifically not equal
+/// (i.e., do not rely on numeric comparison).
+enum ArrowType {
+  NANOARROW_TYPE_UNINITIALIZED = 0,
+  NANOARROW_TYPE_NA = 1,
+  NANOARROW_TYPE_BOOL,
+  NANOARROW_TYPE_UINT8,
+  NANOARROW_TYPE_INT8,
+  NANOARROW_TYPE_UINT16,
+  NANOARROW_TYPE_INT16,
+  NANOARROW_TYPE_UINT32,
+  NANOARROW_TYPE_INT32,
+  NANOARROW_TYPE_UINT64,
+  NANOARROW_TYPE_INT64,
+  NANOARROW_TYPE_HALF_FLOAT,
+  NANOARROW_TYPE_FLOAT,
+  NANOARROW_TYPE_DOUBLE,
+  NANOARROW_TYPE_STRING,
+  NANOARROW_TYPE_BINARY,
+  NANOARROW_TYPE_FIXED_SIZE_BINARY,
+  NANOARROW_TYPE_DATE32,
+  NANOARROW_TYPE_DATE64,
+  NANOARROW_TYPE_TIMESTAMP,
+  NANOARROW_TYPE_TIME32,
+  NANOARROW_TYPE_TIME64,
+  NANOARROW_TYPE_INTERVAL_MONTHS,
+  NANOARROW_TYPE_INTERVAL_DAY_TIME,
+  NANOARROW_TYPE_DECIMAL128,
+  NANOARROW_TYPE_DECIMAL256,
+  NANOARROW_TYPE_LIST,
+  NANOARROW_TYPE_STRUCT,
+  NANOARROW_TYPE_SPARSE_UNION,
+  NANOARROW_TYPE_DENSE_UNION,
+  NANOARROW_TYPE_DICTIONARY,
+  NANOARROW_TYPE_MAP,
+  NANOARROW_TYPE_EXTENSION,
+  NANOARROW_TYPE_FIXED_SIZE_LIST,
+  NANOARROW_TYPE_DURATION,
+  NANOARROW_TYPE_LARGE_STRING,
+  NANOARROW_TYPE_LARGE_BINARY,
+  NANOARROW_TYPE_LARGE_LIST,
+  NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO
+};
+
+/// \brief Get a string value of an enum ArrowType value
+/// \ingroup nanoarrow-utils
+///
+/// Returns NULL for invalid values for type
+static inline const char* ArrowTypeString(enum ArrowType type);
+
+static inline const char* ArrowTypeString(enum ArrowType type) {
+  switch (type) {
+    case NANOARROW_TYPE_NA:
+      return "na";
+    case NANOARROW_TYPE_BOOL:
+      return "bool";
+    case NANOARROW_TYPE_UINT8:
+      return "uint8";
+    case NANOARROW_TYPE_INT8:
+      return "int8";
+    case NANOARROW_TYPE_UINT16:
+      return "uint16";
+    case NANOARROW_TYPE_INT16:
+      return "int16";
+    case NANOARROW_TYPE_UINT32:
+      return "uint32";
+    case NANOARROW_TYPE_INT32:
+      return "int32";
+    case NANOARROW_TYPE_UINT64:
+      return "uint64";
+    case NANOARROW_TYPE_INT64:
+      return "int64";
+    case NANOARROW_TYPE_HALF_FLOAT:
+      return "half_float";
+    case NANOARROW_TYPE_FLOAT:
+      return "float";
+    case NANOARROW_TYPE_DOUBLE:
+      return "double";
+    case NANOARROW_TYPE_STRING:
+      return "string";
+    case NANOARROW_TYPE_BINARY:
+      return "binary";
+    case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+      return "fixed_size_binary";
+    case NANOARROW_TYPE_DATE32:
+      return "date32";
+    case NANOARROW_TYPE_DATE64:
+      return "date64";
+    case NANOARROW_TYPE_TIMESTAMP:
+      return "timestamp";
+    case NANOARROW_TYPE_TIME32:
+      return "time32";
+    case NANOARROW_TYPE_TIME64:
+      return "time64";
+    case NANOARROW_TYPE_INTERVAL_MONTHS:
+      return "interval_months";
+    case NANOARROW_TYPE_INTERVAL_DAY_TIME:
+      return "interval_day_time";
+    case NANOARROW_TYPE_DECIMAL128:
+      return "decimal128";
+    case NANOARROW_TYPE_DECIMAL256:
+      return "decimal256";
+    case NANOARROW_TYPE_LIST:
+      return "list";
+    case NANOARROW_TYPE_STRUCT:
+      return "struct";
+    case NANOARROW_TYPE_SPARSE_UNION:
+      return "sparse_union";
+    case NANOARROW_TYPE_DENSE_UNION:
+      return "dense_union";
+    case NANOARROW_TYPE_DICTIONARY:
+      return "dictionary";
+    case NANOARROW_TYPE_MAP:
+      return "map";
+    case NANOARROW_TYPE_EXTENSION:
+      return "extension";
+    case NANOARROW_TYPE_FIXED_SIZE_LIST:
+      return "fixed_size_list";
+    case NANOARROW_TYPE_DURATION:
+      return "duration";
+    case NANOARROW_TYPE_LARGE_STRING:
+      return "large_string";
+    case NANOARROW_TYPE_LARGE_BINARY:
+      return "large_binary";
+    case NANOARROW_TYPE_LARGE_LIST:
+      return "large_list";
+    case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
+      return "interval_month_day_nano";
+    default:
+      return NULL;
+  }
+}
+
+/// \brief Arrow time unit enumerator
+/// \ingroup nanoarrow-utils
+///
+/// These names and values map to the corresponding arrow::TimeUnit::type
+/// enumerator.
+enum ArrowTimeUnit {
+  NANOARROW_TIME_UNIT_SECOND = 0,
+  NANOARROW_TIME_UNIT_MILLI = 1,
+  NANOARROW_TIME_UNIT_MICRO = 2,
+  NANOARROW_TIME_UNIT_NANO = 3
+};
+
+/// \brief Validation level enumerator
+/// \ingroup nanoarrow-array
+enum ArrowValidationLevel {
+  /// \brief Do not validate buffer sizes or content.
+  NANOARROW_VALIDATION_LEVEL_NONE = 0,
+
+  /// \brief Validate buffer sizes that depend on array length but do not validate buffer
+  /// sizes that depend on buffer data access.
+  NANOARROW_VALIDATION_LEVEL_MINIMAL = 1,
+
+  /// \brief Validate all buffer sizes, including those that require buffer data access,
+  /// but do not perform any checks that are O(1) along the length of the buffers.
+  NANOARROW_VALIDATION_LEVEL_DEFAULT = 2,
+
+  /// \brief Validate all buffer sizes and all buffer content. This is useful in the
+  /// context of untrusted input or input that may have been corrupted in transit.
+  NANOARROW_VALIDATION_LEVEL_FULL = 3
+};
+
+/// \brief Get a string value of an enum ArrowTimeUnit value
+/// \ingroup nanoarrow-utils
+///
+/// Returns NULL for invalid values for time_unit
+static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit);
+
+static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit) {
+  switch (time_unit) {
+    case NANOARROW_TIME_UNIT_SECOND:
+      return "s";
+    case NANOARROW_TIME_UNIT_MILLI:
+      return "ms";
+    case NANOARROW_TIME_UNIT_MICRO:
+      return "us";
+    case NANOARROW_TIME_UNIT_NANO:
+      return "ns";
+    default:
+      return NULL;
+  }
+}
+
+/// \brief Functional types of buffers as described in the Arrow Columnar Specification
+/// \ingroup nanoarrow-array-view
+enum ArrowBufferType {
+  NANOARROW_BUFFER_TYPE_NONE,
+  NANOARROW_BUFFER_TYPE_VALIDITY,
+  NANOARROW_BUFFER_TYPE_TYPE_ID,
+  NANOARROW_BUFFER_TYPE_UNION_OFFSET,
+  NANOARROW_BUFFER_TYPE_DATA_OFFSET,
+  NANOARROW_BUFFER_TYPE_DATA
+};
+
+/// \brief An non-owning view of a string
+/// \ingroup nanoarrow-utils
+struct ArrowStringView {
+  /// \brief A pointer to the start of the string
+  ///
+  /// If size_bytes is 0, this value may be NULL.
+  const char* data;
+
+  /// \brief The size of the string in bytes,
+  ///
+  /// (Not including the null terminator.)
+  int64_t size_bytes;
+};
+
+/// \brief Return a view of a const C string
+/// \ingroup nanoarrow-utils
+static inline struct ArrowStringView ArrowCharView(const char* value);
+
+static inline struct ArrowStringView ArrowCharView(const char* value) {
+  struct ArrowStringView out;
+
+  out.data = value;
+  if (value) {
+    out.size_bytes = (int64_t)strlen(value);
+  } else {
+    out.size_bytes = 0;
+  }
+
+  return out;
+}
+
+union ArrowBufferViewData {
+  const void* data;
+  const int8_t* as_int8;
+  const uint8_t* as_uint8;
+  const int16_t* as_int16;
+  const uint16_t* as_uint16;
+  const int32_t* as_int32;
+  const uint32_t* as_uint32;
+  const int64_t* as_int64;
+  const uint64_t* as_uint64;
+  const double* as_double;
+  const float* as_float;
+  const char* as_char;
+};
+
+/// \brief An non-owning view of a buffer
+/// \ingroup nanoarrow-utils
+struct ArrowBufferView {
+  /// \brief A pointer to the start of the buffer
+  ///
+  /// If size_bytes is 0, this value may be NULL.
+  union ArrowBufferViewData data;
+
+  /// \brief The size of the buffer in bytes
+  int64_t size_bytes;
+};
+
+/// \brief Array buffer allocation and deallocation
+/// \ingroup nanoarrow-buffer
+///
+/// Container for allocate, reallocate, and free methods that can be used
+/// to customize allocation and deallocation of buffers when constructing
+/// an ArrowArray.
+struct ArrowBufferAllocator {
+  /// \brief Reallocate a buffer or return NULL if it cannot be reallocated
+  uint8_t* (*reallocate)(struct ArrowBufferAllocator* allocator, uint8_t* ptr,
+                         int64_t old_size, int64_t new_size);
+
+  /// \brief Deallocate a buffer allocated by this allocator
+  void (*free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size);
+
+  /// \brief Opaque data specific to the allocator
+  void* private_data;
+};
+
+/// \brief An owning mutable view of a buffer
+/// \ingroup nanoarrow-buffer
+struct ArrowBuffer {
+  /// \brief A pointer to the start of the buffer
+  ///
+  /// If capacity_bytes is 0, this value may be NULL.
+  uint8_t* data;
+
+  /// \brief The size of the buffer in bytes
+  int64_t size_bytes;
+
+  /// \brief The capacity of the buffer in bytes
+  int64_t capacity_bytes;
+
+  /// \brief The allocator that will be used to reallocate and/or free the buffer
+  struct ArrowBufferAllocator allocator;
+};
+
+/// \brief An owning mutable view of a bitmap
+/// \ingroup nanoarrow-bitmap
+struct ArrowBitmap {
+  /// \brief An ArrowBuffer to hold the allocated memory
+  struct ArrowBuffer buffer;
+
+  /// \brief The number of bits that have been appended to the bitmap
+  int64_t size_bits;
+};
+
+/// \brief A description of an arrangement of buffers
+/// \ingroup nanoarrow-utils
+///
+/// Contains the minimum amount of information required to
+/// calculate the size of each buffer in an ArrowArray knowing only
+/// the length and offset of the array.
+struct ArrowLayout {
+  /// \brief The function of each buffer
+  enum ArrowBufferType buffer_type[3];
+
+  /// \brief The data type of each buffer
+  enum ArrowType buffer_data_type[3];
+
+  /// \brief The size of an element each buffer or 0 if this size is variable or unknown
+  int64_t element_size_bits[3];
+
+  /// \brief The number of elements in the child array per element in this array for a
+  /// fixed-size list
+  int64_t child_size_elements;
+};
+
+/// \brief A non-owning view of an ArrowArray
+/// \ingroup nanoarrow-array-view
+///
+/// This data structure provides access to the values contained within
+/// an ArrowArray with fields provided in a more readily-extractible
+/// form. You can re-use an ArrowArrayView for multiple ArrowArrays
+/// with the same storage type, use it to represent a hypothetical
+/// ArrowArray that does not exist yet, or use it to validate the buffers
+/// of a future ArrowArray.
+struct ArrowArrayView {
+  /// \brief The underlying ArrowArray or NULL if it has not been set or
+  /// if the buffers in this ArrowArrayView are not backed by an ArrowArray.
+  struct ArrowArray* array;
+
+  /// \brief The number of elements from the physical start of the buffers.
+  int64_t offset;
+
+  /// \brief The number of elements in this view.
+  int64_t length;
+
+  /// \brief A cached null count or -1 to indicate that this value is unknown.
+  int64_t null_count;
+
+  /// \brief The type used to store values in this array
+  ///
+  /// This type represents only the minimum required information to
+  /// extract values from the array buffers (e.g., for a Date32 array,
+  /// this value will be NANOARROW_TYPE_INT32). For dictionary-encoded
+  /// arrays, this will be the index type.
+  enum ArrowType storage_type;
+
+  /// \brief The buffer types, strides, and sizes of this Array's buffers
+  struct ArrowLayout layout;
+
+  /// \brief This Array's buffers as ArrowBufferView objects
+  struct ArrowBufferView buffer_views[3];
+
+  /// \brief The number of children of this view
+  int64_t n_children;
+
+  /// \brief Pointers to views of this array's children
+  struct ArrowArrayView** children;
+
+  /// \brief Pointer to a view of this array's dictionary
+  struct ArrowArrayView* dictionary;
+
+  /// \brief Union type id to child index mapping
+  ///
+  /// If storage_type is a union type, a 256-byte ArrowMalloc()ed buffer
+  /// such that child_index == union_type_id_map[type_id] and
+  /// type_id == union_type_id_map[128 + child_index]. This value may be
+  /// NULL in the case where child_id == type_id.
+  int8_t* union_type_id_map;
+};
+
+// Used as the private data member for ArrowArrays allocated here and accessed
+// internally within inline ArrowArray* helpers.
+struct ArrowArrayPrivateData {
+  // Holder for the validity buffer (or first buffer for union types, which are
+  // the only type whose first buffer is not a valdiity buffer)
+  struct ArrowBitmap bitmap;
+
+  // Holder for additional buffers as required
+  struct ArrowBuffer buffers[2];
+
+  // The array of pointers to buffers. This must be updated after a sequence
+  // of appends to synchronize its values with the actual buffer addresses
+  // (which may have ben reallocated uring that time)
+  const void* buffer_data[3];
+
+  // The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown
+  enum ArrowType storage_type;
+
+  // The buffer arrangement for the storage type
+  struct ArrowLayout layout;
+
+  // Flag to indicate if there are non-sequence union type ids.
+  // In the future this could be replaced with a type id<->child mapping
+  // to support constructing unions in append mode where type_id != child_index
+  int8_t union_type_id_is_child_index;
+};
+
+/// \brief A representation of an interval.
+/// \ingroup nanoarrow-utils
+struct ArrowInterval {
+  /// \brief The type of interval being used
+  enum ArrowType type;
+  /// \brief The number of months represented by the interval
+  int32_t months;
+  /// \brief The number of days represented by the interval
+  int32_t days;
+  /// \brief The number of ms represented by the interval
+  int32_t ms;
+  /// \brief The number of ns represented by the interval
+  int64_t ns;
+};
+
+/// \brief Zero initialize an Interval with a given unit
+/// \ingroup nanoarrow-utils
+static inline void ArrowIntervalInit(struct ArrowInterval* interval,
+                                     enum ArrowType type) {
+  memset(interval, 0, sizeof(struct ArrowInterval));
+  interval->type = type;
+}
+
+/// \brief A representation of a fixed-precision decimal number
+/// \ingroup nanoarrow-utils
+///
+/// This structure should be initialized with ArrowDecimalInit() once and
+/// values set using ArrowDecimalSetInt(), ArrowDecimalSetBytes128(),
+/// or ArrowDecimalSetBytes256().
+struct ArrowDecimal {
+  /// \brief An array of 64-bit integers of n_words length defined in native-endian order
+  uint64_t words[4];
+
+  /// \brief The number of significant digits this decimal number can represent
+  int32_t precision;
+
+  /// \brief The number of digits after the decimal point. This can be negative.
+  int32_t scale;
+
+  /// \brief The number of words in the words array
+  int n_words;
+
+  /// \brief Cached value used by the implementation
+  int high_word_index;
+
+  /// \brief Cached value used by the implementation
+  int low_word_index;
+};
+
+/// \brief Initialize a decimal with a given set of type parameters
+/// \ingroup nanoarrow-utils
+static inline void ArrowDecimalInit(struct ArrowDecimal* decimal, int32_t bitwidth,
+                                    int32_t precision, int32_t scale) {
+  memset(decimal->words, 0, sizeof(decimal->words));
+  decimal->precision = precision;
+  decimal->scale = scale;
+  decimal->n_words = bitwidth / 8 / sizeof(uint64_t);
+
+  if (_ArrowIsLittleEndian()) {
+    decimal->low_word_index = 0;
+    decimal->high_word_index = decimal->n_words - 1;
+  } else {
+    decimal->low_word_index = decimal->n_words - 1;
+    decimal->high_word_index = 0;
+  }
+}
+
+/// \brief Get a signed integer value of a sufficiently small ArrowDecimal
+///
+/// This does not check if the decimal's precision sufficiently small to fit
+/// within the signed 64-bit integer range (A precision less than or equal
+/// to 18 is sufficiently small).
+static inline int64_t ArrowDecimalGetIntUnsafe(struct ArrowDecimal* decimal) {
+  return (int64_t)decimal->words[decimal->low_word_index];
+}
+
+/// \brief Copy the bytes of this decimal into a sufficiently large buffer
+/// \ingroup nanoarrow-utils
+static inline void ArrowDecimalGetBytes(struct ArrowDecimal* decimal, uint8_t* out) {
+  memcpy(out, decimal->words, decimal->n_words * sizeof(uint64_t));
+}
+
+/// \brief Returns 1 if the value represented by decimal is >= 0 or -1 otherwise
+/// \ingroup nanoarrow-utils
+static inline int64_t ArrowDecimalSign(struct ArrowDecimal* decimal) {
+  return 1 | ((int64_t)(decimal->words[decimal->high_word_index]) >> 63);
+}
+
+/// \brief Sets the integer value of this decimal
+/// \ingroup nanoarrow-utils
+static inline void ArrowDecimalSetInt(struct ArrowDecimal* decimal, int64_t value) {
+  if (value < 0) {
+    memset(decimal->words, 0xff, decimal->n_words * sizeof(uint64_t));
+  } else {
+    memset(decimal->words, 0, decimal->n_words * sizeof(uint64_t));
+  }
+
+  decimal->words[decimal->low_word_index] = value;
+}
+
+/// \brief Copy bytes from a buffer into this decimal
+/// \ingroup nanoarrow-utils
+static inline void ArrowDecimalSetBytes(struct ArrowDecimal* decimal,
+                                        const uint8_t* value) {
+  memcpy(decimal->words, value, decimal->n_words * sizeof(uint64_t));
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef NANOARROW_H_INCLUDED
+#define NANOARROW_H_INCLUDED
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+
+
+// If using CMake, optionally pass -DNANOARROW_NAMESPACE=MyNamespace which will set this
+// define in nanoarrow_config.h. If not, you can optionally #define NANOARROW_NAMESPACE
+// MyNamespace here.
+
+// This section remaps the non-prefixed symbols to the prefixed symbols so that
+// code written against this build can be used independent of the value of
+// NANOARROW_NAMESPACE.
+#ifdef NANOARROW_NAMESPACE
+#define NANOARROW_CAT(A, B) A##B
+#define NANOARROW_SYMBOL(A, B) NANOARROW_CAT(A, B)
+
+#define ArrowNanoarrowVersion NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowNanoarrowVersion)
+#define ArrowNanoarrowVersionInt \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowNanoarrowVersionInt)
+#define ArrowErrorMessage NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowErrorMessage)
+#define ArrowMalloc NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMalloc)
+#define ArrowRealloc NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowRealloc)
+#define ArrowFree NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowFree)
+#define ArrowBufferAllocatorDefault \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBufferAllocatorDefault)
+#define ArrowBufferDeallocator \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBufferDeallocator)
+#define ArrowErrorSet NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowErrorSet)
+#define ArrowLayoutInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowLayoutInit)
+#define ArrowSchemaInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaInit)
+#define ArrowSchemaInitFromType \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaInitFromType)
+#define ArrowSchemaSetType NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetType)
+#define ArrowSchemaSetTypeStruct \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeStruct)
+#define ArrowSchemaSetTypeFixedSize \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeFixedSize)
+#define ArrowSchemaSetTypeDecimal \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDecimal)
+#define ArrowSchemaSetTypeDateTime \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDateTime)
+#define ArrowSchemaSetTypeUnion \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeUnion)
+#define ArrowSchemaDeepCopy NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaDeepCopy)
+#define ArrowSchemaSetFormat NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetFormat)
+#define ArrowSchemaSetName NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetName)
+#define ArrowSchemaSetMetadata \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetMetadata)
+#define ArrowSchemaAllocateChildren \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaAllocateChildren)
+#define ArrowSchemaAllocateDictionary \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaAllocateDictionary)
+#define ArrowMetadataReaderInit \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataReaderInit)
+#define ArrowMetadataReaderRead \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataReaderRead)
+#define ArrowMetadataSizeOf NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataSizeOf)
+#define ArrowMetadataHasKey NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataHasKey)
+#define ArrowMetadataGetValue NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataGetValue)
+#define ArrowMetadataBuilderInit \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderInit)
+#define ArrowMetadataBuilderAppend \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderAppend)
+#define ArrowMetadataBuilderSet \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderSet)
+#define ArrowMetadataBuilderRemove \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderRemove)
+#define ArrowSchemaViewInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaViewInit)
+#define ArrowSchemaToString NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaToString)
+#define ArrowArrayInitFromType \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromType)
+#define ArrowArrayInitFromSchema \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromSchema)
+#define ArrowArrayInitFromArrayView \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromArrayView)
+#define ArrowArrayInitFromArrayView \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromArrayView)
+#define ArrowArrayAllocateChildren \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayAllocateChildren)
+#define ArrowArrayAllocateDictionary \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayAllocateDictionary)
+#define ArrowArraySetValidityBitmap \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArraySetValidityBitmap)
+#define ArrowArraySetBuffer NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArraySetBuffer)
+#define ArrowArrayReserve NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayReserve)
+#define ArrowArrayFinishBuilding \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayFinishBuilding)
+#define ArrowArrayFinishBuildingDefault \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayFinishBuildingDefault)
+#define ArrowArrayViewInitFromType \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewInitFromType)
+#define ArrowArrayViewInitFromSchema \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewInitFromSchema)
+#define ArrowArrayViewAllocateChildren \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateChildren)
+#define ArrowArrayViewAllocateDictionary \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateDictionary)
+#define ArrowArrayViewSetLength \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetLength)
+#define ArrowArrayViewSetArray \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArray)
+#define ArrowArrayViewSetArrayMinimal \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArrayMinimal)
+#define ArrowArrayViewValidate \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewValidate)
+#define ArrowArrayViewReset NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewReset)
+#define ArrowBasicArrayStreamInit \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamInit)
+#define ArrowBasicArrayStreamSetArray \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamSetArray)
+#define ArrowBasicArrayStreamValidate \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamValidate)
+
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/// \defgroup nanoarrow Nanoarrow C library
+///
+/// Except where noted, objects are not thread-safe and clients should
+/// take care to serialize accesses to methods.
+///
+/// Because this library is intended to be vendored, it provides full type
+/// definitions and encourages clients to stack or statically allocate
+/// where convenient.
+
+/// \defgroup nanoarrow-malloc Memory management
+///
+/// Non-buffer members of a struct ArrowSchema and struct ArrowArray
+/// must be allocated using ArrowMalloc() or ArrowRealloc() and freed
+/// using ArrowFree() for schemas and arrays allocated here. Buffer members
+/// are allocated using an ArrowBufferAllocator.
+///
+/// @{
+
+/// \brief Allocate like malloc()
+void* ArrowMalloc(int64_t size);
+
+/// \brief Reallocate like realloc()
+void* ArrowRealloc(void* ptr, int64_t size);
+
+/// \brief Free a pointer allocated using ArrowMalloc() or ArrowRealloc().
+void ArrowFree(void* ptr);
+
+/// \brief Return the default allocator
+///
+/// The default allocator uses ArrowMalloc(), ArrowRealloc(), and
+/// ArrowFree().
+struct ArrowBufferAllocator ArrowBufferAllocatorDefault(void);
+
+/// \brief Create a custom deallocator
+///
+/// Creates a buffer allocator with only a free method that can be used to
+/// attach a custom deallocator to an ArrowBuffer. This may be used to
+/// avoid copying an existing buffer that was not allocated using the
+/// infrastructure provided here (e.g., by an R or Python object).
+struct ArrowBufferAllocator ArrowBufferDeallocator(
+    void (*custom_free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr,
+                        int64_t size),
+    void* private_data);
+
+/// @}
+
+/// \defgroup nanoarrow-errors Error handling
+///
+/// Functions generally return an errno-compatible error code; functions that
+/// need to communicate more verbose error information accept a pointer
+/// to an ArrowError. This can be stack or statically allocated. The
+/// content of the message is undefined unless an error code has been
+/// returned. If a nanoarrow function is passed a non-null ArrowError pointer, the
+/// ArrowError pointed to by the argument will be propagated with a
+/// null-terminated error message. It is safe to pass a NULL ArrowError anywhere
+/// in the nanoarrow API.
+///
+/// Except where documented, it is generally not safe to continue after a
+/// function has returned a non-zero ArrowErrorCode. The NANOARROW_RETURN_NOT_OK and
+/// NANOARROW_ASSERT_OK macros are provided to help propagate errors. C++ clients can use
+/// the helpers provided in the nanoarrow.hpp header to facilitate using C++ idioms
+/// for memory management and error propgagtion.
+///
+/// @{
+
+/// \brief Error type containing a UTF-8 encoded message.
+struct ArrowError {
+  /// \brief A character buffer with space for an error message.
+  char message[1024];
+};
+
+/// \brief Ensure an ArrowError is null-terminated by zeroing the first character.
+///
+/// If error is NULL, this function does nothing.
+static inline void ArrowErrorInit(struct ArrowError* error) {
+  if (error) {
+    error->message[0] = '\0';
+  }
+}
+
+/// \brief Set the contents of an error using printf syntax.
+///
+/// If error is NULL, this function does nothing and returns NANOARROW_OK.
+ArrowErrorCode ArrowErrorSet(struct ArrowError* error, const char* fmt, ...);
+
+/// \brief Get the contents of an error
+///
+/// If error is NULL, returns "", or returns the contents of the error message
+/// otherwise.
+const char* ArrowErrorMessage(struct ArrowError* error);
+
+/// @}
+
+/// \defgroup nanoarrow-utils Utility data structures
+///
+/// @{
+
+/// \brief Return a version string in the form "major.minor.patch"
+const char* ArrowNanoarrowVersion(void);
+
+/// \brief Return an integer that can be used to compare versions sequentially
+int ArrowNanoarrowVersionInt(void);
+
+/// \brief Initialize a description of buffer arrangements from a storage type
+void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type);
+
+/// \brief Create a string view from a null-terminated string
+static inline struct ArrowStringView ArrowCharView(const char* value);
+
+/// @}
+
+/// \defgroup nanoarrow-schema Creating schemas
+///
+/// These functions allocate, copy, and destroy ArrowSchema structures
+///
+/// @{
+
+/// \brief Initialize an ArrowSchema
+///
+/// Initializes the fields and release callback of schema_out. Caller
+/// is responsible for calling the schema->release callback if
+/// NANOARROW_OK is returned.
+void ArrowSchemaInit(struct ArrowSchema* schema);
+
+/// \brief Initialize an ArrowSchema from an ArrowType
+///
+/// A convenience constructor for that calls ArrowSchemaInit() and
+/// ArrowSchemaSetType() for the common case of constructing an
+/// unparameterized type. The caller is responsible for calling the schema->release
+/// callback if NANOARROW_OK is returned.
+ArrowErrorCode ArrowSchemaInitFromType(struct ArrowSchema* schema, enum ArrowType type);
+
+/// \brief Get a human-readable summary of a Schema
+///
+/// Writes a summary of an ArrowSchema to out (up to n - 1 characters)
+/// and returns the number of characters required for the output if
+/// n were sufficiently large. If recursive is non-zero, the result will
+/// also include children.
+int64_t ArrowSchemaToString(struct ArrowSchema* schema, char* out, int64_t n,
+                            char recursive);
+
+/// \brief Set the format field of a schema from an ArrowType
+///
+/// Initializes the fields and release callback of schema_out. For
+/// NANOARROW_TYPE_LIST, NANOARROW_TYPE_LARGE_LIST, and
+/// NANOARROW_TYPE_MAP, the appropriate number of children are
+/// allocated, initialized, and named; however, the caller must
+/// ArrowSchemaSetType() on the preinitialized children. Schema must have been initialized
+/// using ArrowSchemaInit() or ArrowSchemaDeepCopy().
+ArrowErrorCode ArrowSchemaSetType(struct ArrowSchema* schema, enum ArrowType type);
+
+/// \brief Set the format field and initialize children of a struct schema
+///
+/// The specified number of children are initialized; however, the caller is responsible
+/// for calling ArrowSchemaSetType() and ArrowSchemaSetName() on each child.
+/// Schema must have been initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy().
+ArrowErrorCode ArrowSchemaSetTypeStruct(struct ArrowSchema* schema, int64_t n_children);
+
+/// \brief Set the format field of a fixed-size schema
+///
+/// Returns EINVAL for fixed_size <= 0 or for type that is not
+/// NANOARROW_TYPE_FIXED_SIZE_BINARY or NANOARROW_TYPE_FIXED_SIZE_LIST.
+/// For NANOARROW_TYPE_FIXED_SIZE_LIST, the appropriate number of children are
+/// allocated, initialized, and named; however, the caller must
+/// ArrowSchemaSetType() the first child. Schema must have been initialized using
+/// ArrowSchemaInit() or ArrowSchemaDeepCopy().
+ArrowErrorCode ArrowSchemaSetTypeFixedSize(struct ArrowSchema* schema,
+                                           enum ArrowType type, int32_t fixed_size);
+
+/// \brief Set the format field of a decimal schema
+///
+/// Returns EINVAL for scale <= 0 or for type that is not
+/// NANOARROW_TYPE_DECIMAL128 or NANOARROW_TYPE_DECIMAL256. Schema must have been
+/// initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy().
+ArrowErrorCode ArrowSchemaSetTypeDecimal(struct ArrowSchema* schema, enum ArrowType type,
+                                         int32_t decimal_precision,
+                                         int32_t decimal_scale);
+
+/// \brief Set the format field of a time, timestamp, or duration schema
+///
+/// Returns EINVAL for type that is not
+/// NANOARROW_TYPE_TIME32, NANOARROW_TYPE_TIME64,
+/// NANOARROW_TYPE_TIMESTAMP, or NANOARROW_TYPE_DURATION. The
+/// timezone parameter must be NULL for a non-timestamp type. Schema must have been
+/// initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy().
+ArrowErrorCode ArrowSchemaSetTypeDateTime(struct ArrowSchema* schema, enum ArrowType type,
+                                          enum ArrowTimeUnit time_unit,
+                                          const char* timezone);
+
+/// \brief Seet the format field of a union schema
+///
+/// Returns EINVAL for a type that is not NANOARROW_TYPE_DENSE_UNION
+/// or NANOARROW_TYPE_SPARSE_UNION. The specified number of children are
+/// allocated, and initialized.
+ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema* schema, enum ArrowType type,
+                                       int64_t n_children);
+
+/// \brief Make a (recursive) copy of a schema
+///
+/// Allocates and copies fields of schema into schema_out.
+ArrowErrorCode ArrowSchemaDeepCopy(struct ArrowSchema* schema,
+                                   struct ArrowSchema* schema_out);
+
+/// \brief Copy format into schema->format
+///
+/// schema must have been allocated using ArrowSchemaInitFromType() or
+/// ArrowSchemaDeepCopy().
+ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema* schema, const char* format);
+
+/// \brief Copy name into schema->name
+///
+/// schema must have been allocated using ArrowSchemaInitFromType() or
+/// ArrowSchemaDeepCopy().
+ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema* schema, const char* name);
+
+/// \brief Copy metadata into schema->metadata
+///
+/// schema must have been allocated using ArrowSchemaInitFromType() or
+/// ArrowSchemaDeepCopy.
+ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema* schema, const char* metadata);
+
+/// \brief Allocate the schema->children array
+///
+/// Includes the memory for each child struct ArrowSchema.
+/// schema must have been allocated using ArrowSchemaInitFromType() or
+/// ArrowSchemaDeepCopy().
+ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema* schema,
+                                           int64_t n_children);
+
+/// \brief Allocate the schema->dictionary member
+///
+/// schema must have been allocated using ArrowSchemaInitFromType() or
+/// ArrowSchemaDeepCopy().
+ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema* schema);
+
+/// @}
+
+/// \defgroup nanoarrow-metadata Create, read, and modify schema metadata
+///
+/// @{
+
+/// \brief Reader for key/value pairs in schema metadata
+///
+/// The ArrowMetadataReader does not own any data and is only valid
+/// for the lifetime of the underlying metadata pointer.
+struct ArrowMetadataReader {
+  /// \brief A metadata string from a schema->metadata field.
+  const char* metadata;
+
+  /// \brief The current offset into the metadata string
+  int64_t offset;
+
+  /// \brief The number of remaining keys
+  int32_t remaining_keys;
+};
+
+/// \brief Initialize an ArrowMetadataReader
+ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader,
+                                       const char* metadata);
+
+/// \brief Read the next key/value pair from an ArrowMetadataReader
+ArrowErrorCode ArrowMetadataReaderRead(struct ArrowMetadataReader* reader,
+                                       struct ArrowStringView* key_out,
+                                       struct ArrowStringView* value_out);
+
+/// \brief The number of bytes in in a key/value metadata string
+int64_t ArrowMetadataSizeOf(const char* metadata);
+
+/// \brief Check for a key in schema metadata
+char ArrowMetadataHasKey(const char* metadata, struct ArrowStringView key);
+
+/// \brief Extract a value from schema metadata
+///
+/// If key does not exist in metadata, value_out is unmodified
+ArrowErrorCode ArrowMetadataGetValue(const char* metadata, struct ArrowStringView key,
+                                     struct ArrowStringView* value_out);
+
+/// \brief Initialize a builder for schema metadata from key/value pairs
+///
+/// metadata can be an existing metadata string or NULL to initialize
+/// an empty metadata string.
+ArrowErrorCode ArrowMetadataBuilderInit(struct ArrowBuffer* buffer, const char* metadata);
+
+/// \brief Append a key/value pair to a buffer containing serialized metadata
+ArrowErrorCode ArrowMetadataBuilderAppend(struct ArrowBuffer* buffer,
+                                          struct ArrowStringView key,
+                                          struct ArrowStringView value);
+
+/// \brief Set a key/value pair to a buffer containing serialized metadata
+///
+/// Ensures that the only entry for key in the metadata is set to value.
+/// This function maintains the existing position of (the first instance of)
+/// key if present in the data.
+ArrowErrorCode ArrowMetadataBuilderSet(struct ArrowBuffer* buffer,
+                                       struct ArrowStringView key,
+                                       struct ArrowStringView value);
+
+/// \brief Remove a key from a buffer containing serialized metadata
+ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer,
+                                          struct ArrowStringView key);
+
+/// @}
+
+/// \defgroup nanoarrow-schema-view Reading schemas
+///
+/// @{
+
+/// \brief A non-owning view of a parsed ArrowSchema
+///
+/// Contains more readily extractable values than a raw ArrowSchema.
+/// Clients can stack or statically allocate this structure but are
+/// encouraged to use the provided getters to ensure forward
+/// compatiblity.
+struct ArrowSchemaView {
+  /// \brief A pointer to the schema represented by this view
+  struct ArrowSchema* schema;
+
+  /// \brief The data type represented by the schema
+  ///
+  /// This value may be NANOARROW_TYPE_DICTIONARY if the schema has a
+  /// non-null dictionary member; datetime types are valid values.
+  /// This value will never be NANOARROW_TYPE_EXTENSION (see
+  /// extension_name and/or extension_metadata to check for
+  /// an extension type).
+  enum ArrowType type;
+
+  /// \brief The storage data type represented by the schema
+  ///
+  /// This value will never be NANOARROW_TYPE_DICTIONARY, NANOARROW_TYPE_EXTENSION
+  /// or any datetime type. This value represents only the type required to
+  /// interpret the buffers in the array.
+  enum ArrowType storage_type;
+
+  /// \brief The storage layout represented by the schema
+  struct ArrowLayout layout;
+
+  /// \brief The extension type name if it exists
+  ///
+  /// If the ARROW:extension:name key is present in schema.metadata,
+  /// extension_name.data will be non-NULL.
+  struct ArrowStringView extension_name;
+
+  /// \brief The extension type metadata if it exists
+  ///
+  /// If the ARROW:extension:metadata key is present in schema.metadata,
+  /// extension_metadata.data will be non-NULL.
+  struct ArrowStringView extension_metadata;
+
+  /// \brief Format fixed size parameter
+  ///
+  /// This value is set when parsing a fixed-size binary or fixed-size
+  /// list schema; this value is undefined for other types. For a
+  /// fixed-size binary schema this value is in bytes; for a fixed-size
+  /// list schema this value refers to the number of child elements for
+  /// each element of the parent.
+  int32_t fixed_size;
+
+  /// \brief Decimal bitwidth
+  ///
+  /// This value is set when parsing a decimal type schema;
+  /// this value is undefined for other types.
+  int32_t decimal_bitwidth;
+
+  /// \brief Decimal precision
+  ///
+  /// This value is set when parsing a decimal type schema;
+  /// this value is undefined for other types.
+  int32_t decimal_precision;
+
+  /// \brief Decimal scale
+  ///
+  /// This value is set when parsing a decimal type schema;
+  /// this value is undefined for other types.
+  int32_t decimal_scale;
+
+  /// \brief Format time unit parameter
+  ///
+  /// This value is set when parsing a date/time type. The value is
+  /// undefined for other types.
+  enum ArrowTimeUnit time_unit;
+
+  /// \brief Format timezone parameter
+  ///
+  /// This value is set when parsing a timestamp type and represents
+  /// the timezone format parameter. This value points to
+  /// data within the schema and is undefined for other types.
+  const char* timezone;
+
+  /// \brief Union type ids parameter
+  ///
+  /// This value is set when parsing a union type and represents
+  /// type ids parameter. This value points to
+  /// data within the schema and is undefined for other types.
+  const char* union_type_ids;
+};
+
+/// \brief Initialize an ArrowSchemaView
+ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view,
+                                   struct ArrowSchema* schema, struct ArrowError* error);
+
+/// @}
+
+/// \defgroup nanoarrow-buffer Owning, growable buffers
+///
+/// @{
+
+/// \brief Initialize an ArrowBuffer
+///
+/// Initialize a buffer with a NULL, zero-size buffer using the default
+/// buffer allocator.
+static inline void ArrowBufferInit(struct ArrowBuffer* buffer);
+
+/// \brief Set a newly-initialized buffer's allocator
+///
+/// Returns EINVAL if the buffer has already been allocated.
+static inline ArrowErrorCode ArrowBufferSetAllocator(
+    struct ArrowBuffer* buffer, struct ArrowBufferAllocator allocator);
+
+/// \brief Reset an ArrowBuffer
+///
+/// Releases the buffer using the allocator's free method if
+/// the buffer's data member is non-null, sets the data member
+/// to NULL, and sets the buffer's size and capacity to 0.
+static inline void ArrowBufferReset(struct ArrowBuffer* buffer);
+
+/// \brief Move an ArrowBuffer
+///
+/// Transfers the buffer data and lifecycle management to another
+/// address and resets buffer.
+static inline void ArrowBufferMove(struct ArrowBuffer* src, struct ArrowBuffer* dst);
+
+/// \brief Grow or shrink a buffer to a given capacity
+///
+/// When shrinking the capacity of the buffer, the buffer is only reallocated
+/// if shrink_to_fit is non-zero. Calling ArrowBufferResize() does not
+/// adjust the buffer's size member except to ensure that the invariant
+/// capacity >= size remains true.
+static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer,
+                                               int64_t new_capacity_bytes,
+                                               char shrink_to_fit);
+
+/// \brief Ensure a buffer has at least a given additional capacity
+///
+/// Ensures that the buffer has space to append at least
+/// additional_size_bytes, overallocating when required.
+static inline ArrowErrorCode ArrowBufferReserve(struct ArrowBuffer* buffer,
+                                                int64_t additional_size_bytes);
+
+/// \brief Write data to buffer and increment the buffer size
+///
+/// This function does not check that buffer has the required capacity
+static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const void* data,
+                                           int64_t size_bytes);
+
+/// \brief Write data to buffer and increment the buffer size
+///
+/// This function writes and ensures that the buffer has the required capacity,
+/// possibly by reallocating the buffer. Like ArrowBufferReserve, this will
+/// overallocate when reallocation is required.
+static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer,
+                                               const void* data, int64_t size_bytes);
+
+/// \brief Write fill to buffer and increment the buffer size
+///
+/// This function writes the specified number of fill bytes and
+/// ensures that the buffer has the required capacity,
+static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer* buffer,
+                                                   uint8_t value, int64_t size_bytes);
+
+/// \brief Write an 8-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer,
+                                                   int8_t value);
+
+/// \brief Write an unsigned 8-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendUInt8(struct ArrowBuffer* buffer,
+                                                    uint8_t value);
+
+/// \brief Write a 16-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendInt16(struct ArrowBuffer* buffer,
+                                                    int16_t value);
+
+/// \brief Write an unsigned 16-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendUInt16(struct ArrowBuffer* buffer,
+                                                     uint16_t value);
+
+/// \brief Write a 32-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendInt32(struct ArrowBuffer* buffer,
+                                                    int32_t value);
+
+/// \brief Write an unsigned 32-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendUInt32(struct ArrowBuffer* buffer,
+                                                     uint32_t value);
+
+/// \brief Write a 64-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendInt64(struct ArrowBuffer* buffer,
+                                                    int64_t value);
+
+/// \brief Write an unsigned 64-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendUInt64(struct ArrowBuffer* buffer,
+                                                     uint64_t value);
+
+/// \brief Write a double to a buffer
+static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer* buffer,
+                                                     double value);
+
+/// \brief Write a float to a buffer
+static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer,
+                                                    float value);
+
+/// \brief Write an ArrowStringView to a buffer
+static inline ArrowErrorCode ArrowBufferAppendStringView(struct ArrowBuffer* buffer,
+                                                         struct ArrowStringView value);
+
+/// \brief Write an ArrowBufferView to a buffer
+static inline ArrowErrorCode ArrowBufferAppendBufferView(struct ArrowBuffer* buffer,
+                                                         struct ArrowBufferView value);
+
+/// @}
+
+/// \defgroup nanoarrow-bitmap Bitmap utilities
+///
+/// @{
+
+/// \brief Extract a boolean value from a bitmap
+static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i);
+
+/// \brief Set a boolean value to a bitmap to true
+static inline void ArrowBitSet(uint8_t* bits, int64_t i);
+
+/// \brief Set a boolean value to a bitmap to false
+static inline void ArrowBitClear(uint8_t* bits, int64_t i);
+
+/// \brief Set a boolean value to a bitmap
+static inline void ArrowBitSetTo(uint8_t* bits, int64_t i, uint8_t value);
+
+/// \brief Set a boolean value to a range in a bitmap
+static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t length,
+                                  uint8_t bits_are_set);
+
+/// \brief Count true values in a bitmap
+static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t i_from, int64_t i_to);
+
+/// \brief Initialize an ArrowBitmap
+///
+/// Initialize the builder's buffer, empty its cache, and reset the size to zero
+static inline void ArrowBitmapInit(struct ArrowBitmap* bitmap);
+
+/// \brief Move an ArrowBitmap
+///
+/// Transfers the underlying buffer data and lifecycle management to another
+/// address and resets the bitmap.
+static inline void ArrowBitmapMove(struct ArrowBitmap* src, struct ArrowBitmap* dst);
+
+/// \brief Ensure a bitmap builder has at least a given additional capacity
+///
+/// Ensures that the buffer has space to append at least
+/// additional_size_bits, overallocating when required.
+static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap,
+                                                int64_t additional_size_bits);
+
+/// \brief Grow or shrink a bitmap to a given capacity
+///
+/// When shrinking the capacity of the bitmap, the bitmap is only reallocated
+/// if shrink_to_fit is non-zero. Calling ArrowBitmapResize() does not
+/// adjust the buffer's size member except when shrinking new_capacity_bits
+/// to a value less than the current number of bits in the bitmap.
+static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap,
+                                               int64_t new_capacity_bits,
+                                               char shrink_to_fit);
+
+/// \brief Reserve space for and append zero or more of the same boolean value to a bitmap
+static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap,
+                                               uint8_t bits_are_set, int64_t length);
+
+/// \brief Append zero or more of the same boolean value to a bitmap
+static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap,
+                                           uint8_t bits_are_set, int64_t length);
+
+/// \brief Append boolean values encoded as int8_t to a bitmap
+///
+/// The values must all be 0 or 1.
+static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap,
+                                               const int8_t* values, int64_t n_values);
+
+/// \brief Append boolean values encoded as int32_t to a bitmap
+///
+/// The values must all be 0 or 1.
+static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap,
+                                                const int32_t* values, int64_t n_values);
+
+/// \brief Reset a bitmap builder
+///
+/// Releases any memory held by buffer, empties the cache, and resets the size to zero
+static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap);
+
+/// @}
+
+/// \defgroup nanoarrow-array Creating arrays
+///
+/// These functions allocate, copy, and destroy ArrowArray structures.
+/// Once an ArrowArray has been initialized via ArrowArrayInitFromType()
+/// or ArrowArrayInitFromSchema(), the caller is responsible for releasing
+/// it using the embedded release callback.
+///
+/// @{
+
+/// \brief Initialize the fields of an array
+///
+/// Initializes the fields and release callback of array. Caller
+/// is responsible for calling the array->release callback if
+/// NANOARROW_OK is returned.
+ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array,
+                                      enum ArrowType storage_type);
+
+/// \brief Initialize the contents of an ArrowArray from an ArrowSchema
+///
+/// Caller is responsible for calling the array->release callback if
+/// NANOARROW_OK is returned.
+ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array,
+                                        struct ArrowSchema* schema,
+                                        struct ArrowError* error);
+
+/// \brief Initialize the contents of an ArrowArray from an ArrowArrayView
+///
+/// Caller is responsible for calling the array->release callback if
+/// NANOARROW_OK is returned.
+ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array,
+                                           struct ArrowArrayView* array_view,
+                                           struct ArrowError* error);
+
+/// \brief Allocate the array->children array
+///
+/// Includes the memory for each child struct ArrowArray,
+/// whose members are marked as released and may be subsequently initialized
+/// with ArrowArrayInitFromType() or moved from an existing ArrowArray.
+/// schema must have been allocated using ArrowArrayInitFromType().
+ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray* array, int64_t n_children);
+
+/// \brief Allocate the array->dictionary member
+///
+/// Includes the memory for the struct ArrowArray, whose contents
+/// is marked as released and may be subsequently initialized
+/// with ArrowArrayInitFromType() or moved from an existing ArrowArray.
+/// array must have been allocated using ArrowArrayInitFromType()
+ArrowErrorCode ArrowArrayAllocateDictionary(struct ArrowArray* array);
+
+/// \brief Set the validity bitmap of an ArrowArray
+///
+/// array must have been allocated using ArrowArrayInitFromType()
+void ArrowArraySetValidityBitmap(struct ArrowArray* array, struct ArrowBitmap* bitmap);
+
+/// \brief Set a buffer of an ArrowArray
+///
+/// array must have been allocated using ArrowArrayInitFromType()
+ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i,
+                                   struct ArrowBuffer* buffer);
+
+/// \brief Get the validity bitmap of an ArrowArray
+///
+/// array must have been allocated using ArrowArrayInitFromType()
+static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray* array);
+
+/// \brief Get a buffer of an ArrowArray
+///
+/// array must have been allocated using ArrowArrayInitFromType()
+static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int64_t i);
+
+/// \brief Start element-wise appending to an ArrowArray
+///
+/// Initializes any values needed to use ArrowArrayAppend*() functions.
+/// All element-wise appenders append by value and return EINVAL if the exact value
+/// cannot be represented by the underlying storage type.
+/// array must have been allocated using ArrowArrayInitFromType()
+static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array);
+
+/// \brief Reserve space for future appends
+///
+/// For buffer sizes that can be calculated (i.e., not string data buffers or
+/// child array sizes for non-fixed-size arrays), recursively reserve space for
+/// additional elements. This is useful for reducing the number of reallocations
+/// that occur using the item-wise appenders.
+ArrowErrorCode ArrowArrayReserve(struct ArrowArray* array,
+                                 int64_t additional_size_elements);
+
+/// \brief Append a null value to an array
+static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray* array, int64_t n);
+
+/// \brief Append an empty, non-null value to an array
+static inline ArrowErrorCode ArrowArrayAppendEmpty(struct ArrowArray* array, int64_t n);
+
+/// \brief Append a signed integer value to an array
+///
+/// Returns NANOARROW_OK if value can be exactly represented by
+/// the underlying storage type or EINVAL otherwise (e.g., value
+/// is outside the valid array range).
+static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array, int64_t value);
+
+/// \brief Append an unsigned integer value to an array
+///
+/// Returns NANOARROW_OK if value can be exactly represented by
+/// the underlying storage type or EINVAL otherwise (e.g., value
+/// is outside the valid array range).
+static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array,
+                                                  uint64_t value);
+
+/// \brief Append a double value to an array
+///
+/// Returns NANOARROW_OK if value can be exactly represented by
+/// the underlying storage type or EINVAL otherwise (e.g., value
+/// is outside the valid array range or there is an attempt to append
+/// a non-integer to an array with an integer storage type).
+static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array,
+                                                    double value);
+
+/// \brief Append a string of bytes to an array
+///
+/// Returns NANOARROW_OK if value can be exactly represented by
+/// the underlying storage type or EINVAL otherwise (e.g.,
+/// the underlying array is not a binary, string, large binary, large string,
+/// or fixed-size binary array, or value is the wrong size for a fixed-size
+/// binary array).
+static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
+                                                   struct ArrowBufferView value);
+
+/// \brief Append a string value to an array
+///
+/// Returns NANOARROW_OK if value can be exactly represented by
+/// the underlying storage type or EINVAL otherwise (e.g.,
+/// the underlying array is not a string or large string array).
+static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array,
+                                                    struct ArrowStringView value);
+
+/// \brief Append a Interval to an array
+///
+/// Returns NANOARROW_OK if value can be exactly represented by
+/// the underlying storage type or EINVAL otherwise.
+static inline ArrowErrorCode ArrowArrayAppendInterval(struct ArrowArray* array,
+                                                      struct ArrowInterval* value);
+
+/// \brief Append a decimal value to an array
+///
+/// Returns NANOARROW_OK if array is a decimal array with the appropriate
+/// bitwidth or EINVAL otherwise.
+static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array,
+                                                     struct ArrowDecimal* value);
+
+/// \brief Finish a nested array element
+///
+/// Appends a non-null element to the array based on the first child's current
+/// length. Returns NANOARROW_OK if the item was successfully added or EINVAL
+/// if the underlying storage type is not a struct, list, large list, or fixed-size
+/// list, or if there was an attempt to add a struct or fixed-size list element where the
+/// length of the child array(s) did not match the expected length.
+static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array);
+
+/// \brief Finish a union array element
+///
+/// Appends an element to the union type ids buffer and increments array->length.
+/// For sparse unions, up to one element is added to non type-id children. Returns
+/// EINVAL if the underlying storage type is not a union, if type_id is not valid,
+/// or if child sizes after appending are inconsistent.
+static inline ArrowErrorCode ArrowArrayFinishUnionElement(struct ArrowArray* array,
+                                                          int8_t type_id);
+
+/// \brief Shrink buffer capacity to the size required
+///
+/// Also applies shrinking to any child arrays. array must have been allocated using
+/// ArrowArrayInitFromType
+static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array);
+
+/// \brief Finish building an ArrowArray
+///
+/// Flushes any pointers from internal buffers that may have been reallocated
+/// into array->buffers and checks the actual size of the buffers
+/// against the expected size based on the final length.
+/// array must have been allocated using ArrowArrayInitFromType()
+ArrowErrorCode ArrowArrayFinishBuildingDefault(struct ArrowArray* array,
+                                               struct ArrowError* error);
+
+/// \brief Finish building an ArrowArray with explicit validation
+///
+/// Finish building with an explicit validation level. This could perform less validation
+/// (i.e. NANOARROW_VALIDATION_LEVEL_NONE or NANOARROW_VALIDATION_LEVEL_MINIMAL) if CPU
+/// buffer data access is not possible or more validation (i.e.,
+/// NANOARROW_VALIDATION_LEVEL_FULL) if buffer content was obtained from an untrusted or
+/// corruptable source.
+ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
+                                        enum ArrowValidationLevel validation_level,
+                                        struct ArrowError* error);
+
+/// @}
+
+/// \defgroup nanoarrow-array-view Reading arrays
+///
+/// These functions read and validate the contents ArrowArray structures.
+///
+/// @{
+
+/// \brief Initialize the contents of an ArrowArrayView
+void ArrowArrayViewInitFromType(struct ArrowArrayView* array_view,
+                                enum ArrowType storage_type);
+
+/// \brief Move an ArrowArrayView
+///
+/// Transfers the ArrowArrayView data and lifecycle management to another
+/// address and resets the contents of src.
+static inline void ArrowArrayViewMove(struct ArrowArrayView* src,
+                                      struct ArrowArrayView* dst);
+
+/// \brief Initialize the contents of an ArrowArrayView from an ArrowSchema
+ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
+                                            struct ArrowSchema* schema,
+                                            struct ArrowError* error);
+
+/// \brief Allocate the array_view->children array
+///
+/// Includes the memory for each child struct ArrowArrayView
+ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view,
+                                              int64_t n_children);
+
+/// \brief Allocate array_view->dictionary
+ArrowErrorCode ArrowArrayViewAllocateDictionary(struct ArrowArrayView* array_view);
+
+/// \brief Set data-independent buffer sizes from length
+void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length);
+
+/// \brief Set buffer sizes and data pointers from an ArrowArray
+ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
+                                      struct ArrowArray* array, struct ArrowError* error);
+
+/// \brief Set buffer sizes and data pointers from an ArrowArray except for those
+/// that require dereferencing buffer content.
+ArrowErrorCode ArrowArrayViewSetArrayMinimal(struct ArrowArrayView* array_view,
+                                             struct ArrowArray* array,
+                                             struct ArrowError* error);
+
+/// \brief Performs checks on the content of an ArrowArrayView
+///
+/// If using ArrowArrayViewSetArray() to back array_view with an ArrowArray,
+/// the buffer sizes and some content (fist and last offset) have already
+/// been validated at the "default" level. If setting the buffer pointers
+/// and sizes otherwise, you may wish to perform checks at a different level. See
+/// documentation for ArrowValidationLevel for the details of checks performed
+/// at each level.
+ArrowErrorCode ArrowArrayViewValidate(struct ArrowArrayView* array_view,
+                                      enum ArrowValidationLevel validation_level,
+                                      struct ArrowError* error);
+
+/// \brief Reset the contents of an ArrowArrayView and frees resources
+void ArrowArrayViewReset(struct ArrowArrayView* array_view);
+
+/// \brief Check for a null element in an ArrowArrayView
+static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view, int64_t i);
+
+/// \brief Get the type id of a union array element
+static inline int8_t ArrowArrayViewUnionTypeId(struct ArrowArrayView* array_view,
+                                               int64_t i);
+
+/// \brief Get the child index of a union array element
+static inline int8_t ArrowArrayViewUnionChildIndex(struct ArrowArrayView* array_view,
+                                                   int64_t i);
+
+/// \brief Get the index to use into the relevant union child array
+static inline int64_t ArrowArrayViewUnionChildOffset(struct ArrowArrayView* array_view,
+                                                     int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as an integer
+///
+/// This function does not check for null values, that values are actually integers, or
+/// that values are within a valid range for an int64.
+static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView* array_view,
+                                                 int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as an unsigned integer
+///
+/// This function does not check for null values, that values are actually integers, or
+/// that values are within a valid range for a uint64.
+static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView* array_view,
+                                                   int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as a double
+///
+/// This function does not check for null values, or
+/// that values are within a valid range for a double.
+static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView* array_view,
+                                                   int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as an ArrowStringView
+///
+/// This function does not check for null values.
+static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe(
+    struct ArrowArrayView* array_view, int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as an ArrowBufferView
+///
+/// This function does not check for null values.
+static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe(
+    struct ArrowArrayView* array_view, int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as an ArrowDecimal
+///
+/// This function does not check for null values. The out parameter must
+/// be initialized with ArrowDecimalInit() with the proper parameters for this
+/// type before calling this for the first time.
+static inline void ArrowArrayViewGetDecimalUnsafe(struct ArrowArrayView* array_view,
+                                                  int64_t i, struct ArrowDecimal* out);
+
+/// @}
+
+/// \defgroup nanoarrow-basic-array-stream Basic ArrowArrayStream implementation
+///
+/// An implementation of an ArrowArrayStream based on a collection of
+/// zero or more previously-existing ArrowArray objects. Users should
+/// initialize and/or validate the contents before transferring the
+/// responsibility of the ArrowArrayStream elsewhere.
+///
+/// @{
+
+/// \brief Initialize an ArrowArrayStream backed by this implementation
+///
+/// This function moves the ownership of schema to the array_stream. If
+/// this function returns NANOARROW_OK, the caller is responsible for
+/// releasing the ArrowArrayStream.
+ArrowErrorCode ArrowBasicArrayStreamInit(struct ArrowArrayStream* array_stream,
+                                         struct ArrowSchema* schema, int64_t n_arrays);
+
+/// \brief Set the ith ArrowArray in this ArrowArrayStream.
+///
+/// array_stream must have been initialized with ArrowBasicArrayStreamInit().
+/// This function move the ownership of array to the array_stream. i must
+/// be greater than zero and less than the value of n_arrays passed in
+/// ArrowBasicArrayStreamInit(). Callers are not required to fill all
+/// n_arrays members (i.e., n_arrays is a maximum bound).
+void ArrowBasicArrayStreamSetArray(struct ArrowArrayStream* array_stream, int64_t i,
+                                   struct ArrowArray* array);
+
+/// \brief Validate the contents of this ArrowArrayStream
+///
+/// array_stream must have been initialized with ArrowBasicArrayStreamInit().
+/// This function uses ArrowArrayStreamInitFromSchema() and ArrowArrayStreamSetArray()
+/// to validate the contents of the arrays.
+ArrowErrorCode ArrowBasicArrayStreamValidate(struct ArrowArrayStream* array_stream,
+                                             struct ArrowError* error);
+
+/// @}
+
+// Inline function definitions
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef NANOARROW_BUFFER_INLINE_H_INCLUDED
+#define NANOARROW_BUFFER_INLINE_H_INCLUDED
+
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline int64_t _ArrowGrowByFactor(int64_t current_capacity, int64_t new_capacity) {
+  int64_t doubled_capacity = current_capacity * 2;
+  if (doubled_capacity > new_capacity) {
+    return doubled_capacity;
+  } else {
+    return new_capacity;
+  }
+}
+
+static inline void ArrowBufferInit(struct ArrowBuffer* buffer) {
+  buffer->data = NULL;
+  buffer->size_bytes = 0;
+  buffer->capacity_bytes = 0;
+  buffer->allocator = ArrowBufferAllocatorDefault();
+}
+
+static inline ArrowErrorCode ArrowBufferSetAllocator(
+    struct ArrowBuffer* buffer, struct ArrowBufferAllocator allocator) {
+  if (buffer->data == NULL) {
+    buffer->allocator = allocator;
+    return NANOARROW_OK;
+  } else {
+    return EINVAL;
+  }
+}
+
+static inline void ArrowBufferReset(struct ArrowBuffer* buffer) {
+  if (buffer->data != NULL) {
+    buffer->allocator.free(&buffer->allocator, (uint8_t*)buffer->data,
+                           buffer->capacity_bytes);
+    buffer->data = NULL;
+  }
+
+  buffer->capacity_bytes = 0;
+  buffer->size_bytes = 0;
+}
+
+static inline void ArrowBufferMove(struct ArrowBuffer* src, struct ArrowBuffer* dst) {
+  memcpy(dst, src, sizeof(struct ArrowBuffer));
+  src->data = NULL;
+  ArrowBufferReset(src);
+}
+
+static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer,
+                                               int64_t new_capacity_bytes,
+                                               char shrink_to_fit) {
+  if (new_capacity_bytes < 0) {
+    return EINVAL;
+  }
+
+  if (new_capacity_bytes > buffer->capacity_bytes || shrink_to_fit) {
+    buffer->data = buffer->allocator.reallocate(
+        &buffer->allocator, buffer->data, buffer->capacity_bytes, new_capacity_bytes);
+    if (buffer->data == NULL && new_capacity_bytes > 0) {
+      buffer->capacity_bytes = 0;
+      buffer->size_bytes = 0;
+      return ENOMEM;
+    }
+
+    buffer->capacity_bytes = new_capacity_bytes;
+  }
+
+  // Ensures that when shrinking that size <= capacity
+  if (new_capacity_bytes < buffer->size_bytes) {
+    buffer->size_bytes = new_capacity_bytes;
+  }
+
+  return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowBufferReserve(struct ArrowBuffer* buffer,
+                                                int64_t additional_size_bytes) {
+  int64_t min_capacity_bytes = buffer->size_bytes + additional_size_bytes;
+  if (min_capacity_bytes <= buffer->capacity_bytes) {
+    return NANOARROW_OK;
+  }
+
+  return ArrowBufferResize(
+      buffer, _ArrowGrowByFactor(buffer->capacity_bytes, min_capacity_bytes), 0);
+}
+
+static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const void* data,
+                                           int64_t size_bytes) {
+  if (size_bytes > 0) {
+    memcpy(buffer->data + buffer->size_bytes, data, size_bytes);
+    buffer->size_bytes += size_bytes;
+  }
+}
+
+static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer,
+                                               const void* data, int64_t size_bytes) {
+  NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes));
+
+  ArrowBufferAppendUnsafe(buffer, data, size_bytes);
+  return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer,
+                                                   int8_t value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(int8_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendUInt8(struct ArrowBuffer* buffer,
+                                                    uint8_t value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(uint8_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendInt16(struct ArrowBuffer* buffer,
+                                                    int16_t value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(int16_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendUInt16(struct ArrowBuffer* buffer,
+                                                     uint16_t value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(uint16_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendInt32(struct ArrowBuffer* buffer,
+                                                    int32_t value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(int32_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendUInt32(struct ArrowBuffer* buffer,
+                                                     uint32_t value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(uint32_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendInt64(struct ArrowBuffer* buffer,
+                                                    int64_t value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(int64_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendUInt64(struct ArrowBuffer* buffer,
+                                                     uint64_t value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(uint64_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer* buffer,
+                                                     double value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(double));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer,
+                                                    float value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(float));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendStringView(struct ArrowBuffer* buffer,
+                                                         struct ArrowStringView value) {
+  return ArrowBufferAppend(buffer, value.data, value.size_bytes);
+}
+
+static inline ArrowErrorCode ArrowBufferAppendBufferView(struct ArrowBuffer* buffer,
+                                                         struct ArrowBufferView value) {
+  return ArrowBufferAppend(buffer, value.data.data, value.size_bytes);
+}
+
+static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer* buffer,
+                                                   uint8_t value, int64_t size_bytes) {
+  NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes));
+
+  memset(buffer->data + buffer->size_bytes, value, size_bytes);
+  buffer->size_bytes += size_bytes;
+  return NANOARROW_OK;
+}
+
+static const uint8_t _ArrowkBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128};
+static const uint8_t _ArrowkFlippedBitmask[] = {254, 253, 251, 247, 239, 223, 191, 127};
+static const uint8_t _ArrowkPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127};
+static const uint8_t _ArrowkTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128};
+
+static const uint8_t _ArrowkBytePopcount[] = {
+    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3,
+    4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4,
+    4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4,
+    5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5,
+    4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2,
+    3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5,
+    5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4,
+    5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6,
+    4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
+
+static inline int64_t _ArrowRoundUpToMultipleOf8(int64_t value) {
+  return (value + 7) & ~((int64_t)7);
+}
+
+static inline int64_t _ArrowRoundDownToMultipleOf8(int64_t value) {
+  return (value / 8) * 8;
+}
+
+static inline int64_t _ArrowBytesForBits(int64_t bits) {
+  return (bits >> 3) + ((bits & 7) != 0);
+}
+
+static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) {
+  *out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 | values[4] << 4 |
+          values[5] << 5 | values[6] << 6 | values[7] << 7);
+}
+
+static inline void _ArrowBitmapPackInt32(const int32_t* values, uint8_t* out) {
+  *out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 | values[4] << 4 |
+          values[5] << 5 | values[6] << 6 | values[7] << 7);
+}
+
+static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i) {
+  return (bits[i >> 3] >> (i & 0x07)) & 1;
+}
+
+static inline void ArrowBitSet(uint8_t* bits, int64_t i) {
+  bits[i / 8] |= _ArrowkBitmask[i % 8];
+}
+
+static inline void ArrowBitClear(uint8_t* bits, int64_t i) {
+  bits[i / 8] &= _ArrowkFlippedBitmask[i % 8];
+}
+
+static inline void ArrowBitSetTo(uint8_t* bits, int64_t i, uint8_t bit_is_set) {
+  bits[i / 8] ^=
+      ((uint8_t)(-((uint8_t)(bit_is_set != 0)) ^ bits[i / 8])) & _ArrowkBitmask[i % 8];
+}
+
+static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t length,
+                                  uint8_t bits_are_set) {
+  const int64_t i_begin = start_offset;
+  const int64_t i_end = start_offset + length;
+  const uint8_t fill_byte = (uint8_t)(-bits_are_set);
+
+  const int64_t bytes_begin = i_begin / 8;
+  const int64_t bytes_end = i_end / 8 + 1;
+
+  const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8];
+  const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8];
+
+  if (bytes_end == bytes_begin + 1) {
+    // set bits within a single byte
+    const uint8_t only_byte_mask =
+        i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask | last_byte_mask);
+    bits[bytes_begin] &= only_byte_mask;
+    bits[bytes_begin] |= (uint8_t)(fill_byte & ~only_byte_mask);
+    return;
+  }
+
+  // set/clear trailing bits of first byte
+  bits[bytes_begin] &= first_byte_mask;
+  bits[bytes_begin] |= (uint8_t)(fill_byte & ~first_byte_mask);
+
+  if (bytes_end - bytes_begin > 2) {
+    // set/clear whole bytes
+    memset(bits + bytes_begin + 1, fill_byte, (size_t)(bytes_end - bytes_begin - 2));
+  }
+
+  if (i_end % 8 == 0) {
+    return;
+  }
+
+  // set/clear leading bits of last byte
+  bits[bytes_end - 1] &= last_byte_mask;
+  bits[bytes_end - 1] |= (uint8_t)(fill_byte & ~last_byte_mask);
+}
+
+static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t start_offset,
+                                       int64_t length) {
+  if (length == 0) {
+    return 0;
+  }
+
+  const int64_t i_begin = start_offset;
+  const int64_t i_end = start_offset + length;
+  const int64_t i_last_valid = i_end - 1;
+
+  const int64_t bytes_begin = i_begin / 8;
+  const int64_t bytes_last_valid = i_last_valid / 8;
+
+  if (bytes_begin == bytes_last_valid) {
+    // count bits within a single byte
+    const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_end % 8];
+    const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_begin % 8];
+
+    const uint8_t only_byte_mask =
+        i_end % 8 == 0 ? last_byte_mask : (uint8_t)(first_byte_mask & last_byte_mask);
+
+    const uint8_t byte_masked = bits[bytes_begin] & only_byte_mask;
+    return _ArrowkBytePopcount[byte_masked];
+  }
+
+  const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8];
+  const uint8_t last_byte_mask = i_end % 8 == 0 ? 0 : _ArrowkTrailingBitmask[i_end % 8];
+  int64_t count = 0;
+
+  // first byte
+  count += _ArrowkBytePopcount[bits[bytes_begin] & ~first_byte_mask];
+
+  // middle bytes
+  for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) {
+    count += _ArrowkBytePopcount[bits[i]];
+  }
+
+  // last byte
+  count += _ArrowkBytePopcount[bits[bytes_last_valid] & ~last_byte_mask];
+
+  return count;
+}
+
+static inline void ArrowBitmapInit(struct ArrowBitmap* bitmap) {
+  ArrowBufferInit(&bitmap->buffer);
+  bitmap->size_bits = 0;
+}
+
+static inline void ArrowBitmapMove(struct ArrowBitmap* src, struct ArrowBitmap* dst) {
+  ArrowBufferMove(&src->buffer, &dst->buffer);
+  dst->size_bits = src->size_bits;
+  src->size_bits = 0;
+}
+
+static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap,
+                                                int64_t additional_size_bits) {
+  int64_t min_capacity_bits = bitmap->size_bits + additional_size_bits;
+  if (min_capacity_bits <= (bitmap->buffer.capacity_bytes * 8)) {
+    return NANOARROW_OK;
+  }
+
+  NANOARROW_RETURN_NOT_OK(
+      ArrowBufferReserve(&bitmap->buffer, _ArrowBytesForBits(additional_size_bits)));
+
+  bitmap->buffer.data[bitmap->buffer.capacity_bytes - 1] = 0;
+  return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap,
+                                               int64_t new_capacity_bits,
+                                               char shrink_to_fit) {
+  if (new_capacity_bits < 0) {
+    return EINVAL;
+  }
+
+  int64_t new_capacity_bytes = _ArrowBytesForBits(new_capacity_bits);
+  NANOARROW_RETURN_NOT_OK(
+      ArrowBufferResize(&bitmap->buffer, new_capacity_bytes, shrink_to_fit));
+
+  if (new_capacity_bits < bitmap->size_bits) {
+    bitmap->size_bits = new_capacity_bits;
+  }
+
+  return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap,
+                                               uint8_t bits_are_set, int64_t length) {
+  NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(bitmap, length));
+
+  ArrowBitmapAppendUnsafe(bitmap, bits_are_set, length);
+  return NANOARROW_OK;
+}
+
+static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap,
+                                           uint8_t bits_are_set, int64_t length) {
+  ArrowBitsSetTo(bitmap->buffer.data, bitmap->size_bits, length, bits_are_set);
+  bitmap->size_bits += length;
+  bitmap->buffer.size_bytes = _ArrowBytesForBits(bitmap->size_bits);
+}
+
+static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap,
+                                               const int8_t* values, int64_t n_values) {
+  if (n_values == 0) {
+    return;
+  }
+
+  const int8_t* values_cursor = values;
+  int64_t n_remaining = n_values;
+  int64_t out_i_cursor = bitmap->size_bits;
+  uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8;
+
+  // First byte
+  if ((out_i_cursor % 8) != 0) {
+    int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - out_i_cursor;
+    for (int i = 0; i < n_partial_bits; i++) {
+      ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values[i]);
+    }
+
+    out_cursor++;
+    values_cursor += n_partial_bits;
+    n_remaining -= n_partial_bits;
+  }
+
+  // Middle bytes
+  int64_t n_full_bytes = n_remaining / 8;
+  for (int64_t i = 0; i < n_full_bytes; i++) {
+    _ArrowBitmapPackInt8(values_cursor, out_cursor);
+    values_cursor += 8;
+    out_cursor++;
+  }
+
+  // Last byte
+  out_i_cursor += n_full_bytes * 8;
+  n_remaining -= n_full_bytes * 8;
+  if (n_remaining > 0) {
+    // Zero out the last byte
+    *out_cursor = 0x00;
+    for (int i = 0; i < n_remaining; i++) {
+      ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
+    }
+    out_cursor++;
+  }
+
+  bitmap->size_bits += n_values;
+  bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data;
+}
+
+static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap,
+                                                const int32_t* values, int64_t n_values) {
+  if (n_values == 0) {
+    return;
+  }
+
+  const int32_t* values_cursor = values;
+  int64_t n_remaining = n_values;
+  int64_t out_i_cursor = bitmap->size_bits;
+  uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8;
+
+  // First byte
+  if ((out_i_cursor % 8) != 0) {
+    int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - out_i_cursor;
+    for (int i = 0; i < n_partial_bits; i++) {
+      ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values[i]);
+    }
+
+    out_cursor++;
+    values_cursor += n_partial_bits;
+    n_remaining -= n_partial_bits;
+  }
+
+  // Middle bytes
+  int64_t n_full_bytes = n_remaining / 8;
+  for (int64_t i = 0; i < n_full_bytes; i++) {
+    _ArrowBitmapPackInt32(values_cursor, out_cursor);
+    values_cursor += 8;
+    out_cursor++;
+  }
+
+  // Last byte
+  out_i_cursor += n_full_bytes * 8;
+  n_remaining -= n_full_bytes * 8;
+  if (n_remaining > 0) {
+    // Zero out the last byte
+    *out_cursor = 0x00;
+    for (int i = 0; i < n_remaining; i++) {
+      ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
+    }
+    out_cursor++;
+  }
+
+  bitmap->size_bits += n_values;
+  bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data;
+}
+
+static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap) {
+  ArrowBufferReset(&bitmap->buffer);
+  bitmap->size_bits = 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef NANOARROW_ARRAY_INLINE_H_INCLUDED
+#define NANOARROW_ARRAY_INLINE_H_INCLUDED
+
+#include <errno.h>
+#include <float.h>
+#include <limits.h>
+#include <stdint.h>
+#include <string.h>
+
+
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray* array) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+  return &private_data->bitmap;
+}
+
+static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int64_t i) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+  switch (i) {
+    case 0:
+      return &private_data->bitmap.buffer;
+    default:
+      return private_data->buffers + i - 1;
+  }
+}
+
+// We don't currently support the case of unions where type_id != child_index;
+// however, these functions are used to keep track of where that assumption
+// is made.
+static inline int8_t _ArrowArrayUnionChildIndex(struct ArrowArray* array,
+                                                int8_t type_id) {
+  return type_id;
+}
+
+static inline int8_t _ArrowArrayUnionTypeId(struct ArrowArray* array,
+                                            int8_t child_index) {
+  return child_index;
+}
+
+static inline int8_t _ArrowParseUnionTypeIds(const char* type_ids, int8_t* out) {
+  if (*type_ids == '\0') {
+    return 0;
+  }
+
+  int32_t i = 0;
+  long type_id;
+  char* end_ptr;
+  do {
+    type_id = strtol(type_ids, &end_ptr, 10);
+    if (end_ptr == type_ids || type_id < 0 || type_id > 127) {
+      return -1;
+    }
+
+    if (out != NULL) {
+      out[i] = (int8_t)type_id;
+    }
+
+    i++;
+
+    type_ids = end_ptr;
+    if (*type_ids == '\0') {
+      return i;
+    } else if (*type_ids != ',') {
+      return -1;
+    } else {
+      type_ids++;
+    }
+  } while (1);
+
+  return -1;
+}
+
+static inline int8_t _ArrowParsedUnionTypeIdsWillEqualChildIndices(const int8_t* type_ids,
+                                                                   int64_t n_type_ids,
+                                                                   int64_t n_children) {
+  if (n_type_ids != n_children) {
+    return 0;
+  }
+
+  for (int8_t i = 0; i < n_type_ids; i++) {
+    if (type_ids[i] != i) {
+      return 0;
+    }
+  }
+
+  return 1;
+}
+
+static inline int8_t _ArrowUnionTypeIdsWillEqualChildIndices(const char* type_id_str,
+                                                             int64_t n_children) {
+  int8_t type_ids[128];
+  int8_t n_type_ids = _ArrowParseUnionTypeIds(type_id_str, type_ids);
+  return _ArrowParsedUnionTypeIdsWillEqualChildIndices(type_ids, n_type_ids, n_children);
+}
+
+static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+
+  switch (private_data->storage_type) {
+    case NANOARROW_TYPE_UNINITIALIZED:
+      return EINVAL;
+    case NANOARROW_TYPE_SPARSE_UNION:
+    case NANOARROW_TYPE_DENSE_UNION:
+      // Note that this value could be -1 if the type_ids string was invalid
+      if (private_data->union_type_id_is_child_index != 1) {
+        return EINVAL;
+      } else {
+        break;
+      }
+    default:
+      break;
+  }
+  if (private_data->storage_type == NANOARROW_TYPE_UNINITIALIZED) {
+    return EINVAL;
+  }
+
+  // Initialize any data offset buffer with a single zero
+  for (int i = 0; i < 3; i++) {
+    if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET &&
+        private_data->layout.element_size_bits[i] == 64) {
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(ArrowArrayBuffer(array, i), 0));
+    } else if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET &&
+               private_data->layout.element_size_bits[i] == 32) {
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(ArrowArrayBuffer(array, i), 0));
+    }
+  }
+
+  // Start building any child arrays or dictionaries
+  for (int64_t i = 0; i < array->n_children; i++) {
+    NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->children[i]));
+  }
+
+  if (array->dictionary != NULL) {
+    NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->dictionary));
+  }
+
+  return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array) {
+  for (int64_t i = 0; i < 3; i++) {
+    struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i);
+    NANOARROW_RETURN_NOT_OK(ArrowBufferResize(buffer, buffer->size_bytes, 1));
+  }
+
+  for (int64_t i = 0; i < array->n_children; i++) {
+    NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->children[i]));
+  }
+
+  if (array->dictionary != NULL) {
+    NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->dictionary));
+  }
+
+  return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode _ArrowArrayAppendBits(struct ArrowArray* array,
+                                                   int64_t buffer_i, uint8_t value,
+                                                   int64_t n) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+  struct ArrowBuffer* buffer = ArrowArrayBuffer(array, buffer_i);
+  int64_t bytes_required =
+      _ArrowRoundUpToMultipleOf8(private_data->layout.element_size_bits[buffer_i] *
+                                 (array->length + 1)) /
+      8;
+  if (bytes_required > buffer->size_bytes) {
+    NANOARROW_RETURN_NOT_OK(
+        ArrowBufferAppendFill(buffer, 0, bytes_required - buffer->size_bytes));
+  }
+
+  ArrowBitsSetTo(buffer->data, array->length, n, value);
+  return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode _ArrowArrayAppendEmptyInternal(struct ArrowArray* array,
+                                                            int64_t n, uint8_t is_valid) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+
+  if (n == 0) {
+    return NANOARROW_OK;
+  }
+
+  // Some type-specific handling
+  switch (private_data->storage_type) {
+    case NANOARROW_TYPE_NA:
+      // (An empty value for a null array *is* a null)
+      array->null_count += n;
+      array->length += n;
+      return NANOARROW_OK;
+
+    case NANOARROW_TYPE_DENSE_UNION: {
+      // Add one null to the first child and append n references to that child
+      int8_t type_id = _ArrowArrayUnionTypeId(array, 0);
+      NANOARROW_RETURN_NOT_OK(
+          _ArrowArrayAppendEmptyInternal(array->children[0], 1, is_valid));
+      NANOARROW_RETURN_NOT_OK(
+          ArrowBufferAppendFill(ArrowArrayBuffer(array, 0), type_id, n));
+      for (int64_t i = 0; i < n; i++) {
+        NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(
+            ArrowArrayBuffer(array, 1), (int32_t)array->children[0]->length - 1));
+      }
+      // For the purposes of array->null_count, union elements are never considered "null"
+      // even if some children contain nulls.
+      array->length += n;
+      return NANOARROW_OK;
+    }
+
+    case NANOARROW_TYPE_SPARSE_UNION: {
+      // Add n nulls to the first child and append n references to that child
+      int8_t type_id = _ArrowArrayUnionTypeId(array, 0);
+      NANOARROW_RETURN_NOT_OK(
+          _ArrowArrayAppendEmptyInternal(array->children[0], n, is_valid));
+      for (int64_t i = 1; i < array->n_children; i++) {
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty(array->children[i], n));
+      }
+
+      NANOARROW_RETURN_NOT_OK(
+          ArrowBufferAppendFill(ArrowArrayBuffer(array, 0), type_id, n));
+      // For the purposes of array->null_count, union elements are never considered "null"
+      // even if some children contain nulls.
+      array->length += n;
+      return NANOARROW_OK;
+    }
+
+    case NANOARROW_TYPE_FIXED_SIZE_LIST:
+      NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty(
+          array->children[0], n * private_data->layout.child_size_elements));
+      break;
+    case NANOARROW_TYPE_STRUCT:
+      for (int64_t i = 0; i < array->n_children; i++) {
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty(array->children[i], n));
+      }
+      break;
+
+    default:
+      break;
+  }
+
+  // Append n is_valid bits to the validity bitmap. If we haven't allocated a bitmap yet
+  // and we need to append nulls, do it now.
+  if (!is_valid && private_data->bitmap.buffer.data == NULL) {
+    NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, array->length + n));
+    ArrowBitmapAppendUnsafe(&private_data->bitmap, 1, array->length);
+    ArrowBitmapAppendUnsafe(&private_data->bitmap, is_valid, n);
+  } else if (private_data->bitmap.buffer.data != NULL) {
+    NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, n));
+    ArrowBitmapAppendUnsafe(&private_data->bitmap, is_valid, n);
+  }
+
+  // Add appropriate buffer fill
+  struct ArrowBuffer* buffer;
+  int64_t size_bytes;
+
+  for (int i = 0; i < 3; i++) {
+    buffer = ArrowArrayBuffer(array, i);
+    size_bytes = private_data->layout.element_size_bits[i] / 8;
+
+    switch (private_data->layout.buffer_type[i]) {
+      case NANOARROW_BUFFER_TYPE_NONE:
+      case NANOARROW_BUFFER_TYPE_VALIDITY:
+        continue;
+      case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
+        // Append the current value at the end of the offset buffer for each element
+        NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n));
+
+        for (int64_t j = 0; j < n; j++) {
+          ArrowBufferAppendUnsafe(buffer, buffer->data + size_bytes * (array->length + j),
+                                  size_bytes);
+        }
+
+        // Skip the data buffer
+        i++;
+        continue;
+      case NANOARROW_BUFFER_TYPE_DATA:
+        // Zero out the next bit of memory
+        if (private_data->layout.element_size_bits[i] % 8 == 0) {
+          NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes * n));
+        } else {
+          NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, i, 0, n));
+        }
+        continue;
+
+      case NANOARROW_BUFFER_TYPE_TYPE_ID:
+      case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
+        // These cases return above
+        return EINVAL;
+    }
+  }
+
+  array->length += n;
+  array->null_count += n * !is_valid;
+  return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray* array, int64_t n) {
+  return _ArrowArrayAppendEmptyInternal(array, n, 0);
+}
+
+static inline ArrowErrorCode ArrowArrayAppendEmpty(struct ArrowArray* array, int64_t n) {
+  return _ArrowArrayAppendEmptyInternal(array, n, 1);
+}
+
+static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array,
+                                                 int64_t value) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+
+  struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
+
+  switch (private_data->storage_type) {
+    case NANOARROW_TYPE_INT64:
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(int64_t)));
+      break;
+    case NANOARROW_TYPE_INT32:
+      _NANOARROW_CHECK_RANGE(value, INT32_MIN, INT32_MAX);
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, (int32_t)value));
+      break;
+    case NANOARROW_TYPE_INT16:
+      _NANOARROW_CHECK_RANGE(value, INT16_MIN, INT16_MAX);
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt16(data_buffer, (int16_t)value));
+      break;
+    case NANOARROW_TYPE_INT8:
+      _NANOARROW_CHECK_RANGE(value, INT8_MIN, INT8_MAX);
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt8(data_buffer, (int8_t)value));
+      break;
+    case NANOARROW_TYPE_UINT64:
+    case NANOARROW_TYPE_UINT32:
+    case NANOARROW_TYPE_UINT16:
+    case NANOARROW_TYPE_UINT8:
+      _NANOARROW_CHECK_RANGE(value, 0, INT64_MAX);
+      return ArrowArrayAppendUInt(array, value);
+    case NANOARROW_TYPE_DOUBLE:
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, (double)value));
+      break;
+    case NANOARROW_TYPE_FLOAT:
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, (float)value));
+      break;
+    case NANOARROW_TYPE_BOOL:
+      NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
+      break;
+    default:
+      return EINVAL;
+  }
+
+  if (private_data->bitmap.buffer.data != NULL) {
+    NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
+  }
+
+  array->length++;
+  return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array,
+                                                  uint64_t value) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+
+  struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
+
+  switch (private_data->storage_type) {
+    case NANOARROW_TYPE_UINT64:
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(uint64_t)));
+      break;
+    case NANOARROW_TYPE_UINT32:
+      _NANOARROW_CHECK_UPPER_LIMIT(value, UINT32_MAX);
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt32(data_buffer, (uint32_t)value));
+      break;
+    case NANOARROW_TYPE_UINT16:
+      _NANOARROW_CHECK_UPPER_LIMIT(value, UINT16_MAX);
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt16(data_buffer, (uint16_t)value));
+      break;
+    case NANOARROW_TYPE_UINT8:
+      _NANOARROW_CHECK_UPPER_LIMIT(value, UINT8_MAX);
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt8(data_buffer, (uint8_t)value));
+      break;
+    case NANOARROW_TYPE_INT64:
+    case NANOARROW_TYPE_INT32:
+    case NANOARROW_TYPE_INT16:
+    case NANOARROW_TYPE_INT8:
+      _NANOARROW_CHECK_UPPER_LIMIT(value, INT64_MAX);
+      return ArrowArrayAppendInt(array, value);
+    case NANOARROW_TYPE_DOUBLE:
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, (double)value));
+      break;
+    case NANOARROW_TYPE_FLOAT:
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, (float)value));
+      break;
+    case NANOARROW_TYPE_BOOL:
+      NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
+      break;
+    default:
+      return EINVAL;
+  }
+
+  if (private_data->bitmap.buffer.data != NULL) {
+    NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
+  }
+
+  array->length++;
+  return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array,
+                                                    double value) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+
+  struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
+
+  switch (private_data->storage_type) {
+    case NANOARROW_TYPE_DOUBLE:
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(double)));
+      break;
+    case NANOARROW_TYPE_FLOAT:
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, (float)value));
+      break;
+    default:
+      return EINVAL;
+  }
+
+  if (private_data->bitmap.buffer.data != NULL) {
+    NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
+  }
+
+  array->length++;
+  return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
+                                                   struct ArrowBufferView value) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+
+  struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1);
+  struct ArrowBuffer* data_buffer = ArrowArrayBuffer(
+      array, 1 + (private_data->storage_type != NANOARROW_TYPE_FIXED_SIZE_BINARY));
+  int32_t offset;
+  int64_t large_offset;
+  int64_t fixed_size_bytes = private_data->layout.element_size_bits[1] / 8;
+
+  switch (private_data->storage_type) {
+    case NANOARROW_TYPE_STRING:
+    case NANOARROW_TYPE_BINARY:
+      offset = ((int32_t*)offset_buffer->data)[array->length];
+      if ((offset + value.size_bytes) > INT32_MAX) {
+        return EINVAL;
+      }
+
+      offset += (int32_t)value.size_bytes;
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(offset_buffer, &offset, sizeof(int32_t)));
+      NANOARROW_RETURN_NOT_OK(
+          ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes));
+      break;
+
+    case NANOARROW_TYPE_LARGE_STRING:
+    case NANOARROW_TYPE_LARGE_BINARY:
+      large_offset = ((int64_t*)offset_buffer->data)[array->length];
+      large_offset += value.size_bytes;
+      NANOARROW_RETURN_NOT_OK(
+          ArrowBufferAppend(offset_buffer, &large_offset, sizeof(int64_t)));
+      NANOARROW_RETURN_NOT_OK(
+          ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes));
+      break;
+
+    case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+      if (value.size_bytes != fixed_size_bytes) {
+        return EINVAL;
+      }
+
+      NANOARROW_RETURN_NOT_OK(
+          ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes));
+      break;
+    default:
+      return EINVAL;
+  }
+
+  if (private_data->bitmap.buffer.data != NULL) {
+    NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
+  }
+
+  array->length++;
+  return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array,
+                                                    struct ArrowStringView value) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+
+  struct ArrowBufferView buffer_view;
+  buffer_view.data.data = value.data;
+  buffer_view.size_bytes = value.size_bytes;
+
+  switch (private_data->storage_type) {
+    case NANOARROW_TYPE_STRING:
+    case NANOARROW_TYPE_LARGE_STRING:
+    case NANOARROW_TYPE_BINARY:
+    case NANOARROW_TYPE_LARGE_BINARY:
+      return ArrowArrayAppendBytes(array, buffer_view);
+    default:
+      return EINVAL;
+  }
+}
+
+static inline ArrowErrorCode ArrowArrayAppendInterval(struct ArrowArray* array,
+                                                      struct ArrowInterval* value) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+
+  struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
+
+  switch (private_data->storage_type) {
+    case NANOARROW_TYPE_INTERVAL_MONTHS: {
+      if (value->type != NANOARROW_TYPE_INTERVAL_MONTHS) {
+        return EINVAL;
+      }
+
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->months));
+      break;
+    }
+    case NANOARROW_TYPE_INTERVAL_DAY_TIME: {
+      if (value->type != NANOARROW_TYPE_INTERVAL_DAY_TIME) {
+        return EINVAL;
+      }
+
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->days));
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->ms));
+      break;
+    }
+    case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: {
+      if (value->type != NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO) {
+        return EINVAL;
+      }
+
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->months));
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->days));
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(data_buffer, value->ns));
+      break;
+    }
+    default:
+      return EINVAL;
+  }
+
+  array->length++;
+  return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array,
+                                                     struct ArrowDecimal* value) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+  struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
+
+  switch (private_data->storage_type) {
+    case NANOARROW_TYPE_DECIMAL128:
+      if (value->n_words != 2) {
+        return EINVAL;
+      } else {
+        NANOARROW_RETURN_NOT_OK(
+            ArrowBufferAppend(data_buffer, value->words, 2 * sizeof(uint64_t)));
+        break;
+      }
+    case NANOARROW_TYPE_DECIMAL256:
+      if (value->n_words != 4) {
+        return EINVAL;
+      } else {
+        NANOARROW_RETURN_NOT_OK(
+            ArrowBufferAppend(data_buffer, value->words, 4 * sizeof(uint64_t)));
+        break;
+      }
+    default:
+      return EINVAL;
+  }
+
+  if (private_data->bitmap.buffer.data != NULL) {
+    NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
+  }
+
+  array->length++;
+  return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+
+  int64_t child_length;
+
+  switch (private_data->storage_type) {
+    case NANOARROW_TYPE_LIST:
+    case NANOARROW_TYPE_MAP:
+      child_length = array->children[0]->length;
+      if (child_length > INT32_MAX) {
+        return EINVAL;
+      }
+      NANOARROW_RETURN_NOT_OK(
+          ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), (int32_t)child_length));
+      break;
+    case NANOARROW_TYPE_LARGE_LIST:
+      child_length = array->children[0]->length;
+      NANOARROW_RETURN_NOT_OK(
+          ArrowBufferAppendInt64(ArrowArrayBuffer(array, 1), child_length));
+      break;
+    case NANOARROW_TYPE_FIXED_SIZE_LIST:
+      child_length = array->children[0]->length;
+      if (child_length !=
+          ((array->length + 1) * private_data->layout.child_size_elements)) {
+        return EINVAL;
+      }
+      break;
+    case NANOARROW_TYPE_STRUCT:
+      for (int64_t i = 0; i < array->n_children; i++) {
+        child_length = array->children[i]->length;
+        if (child_length != (array->length + 1)) {
+          return EINVAL;
+        }
+      }
+      break;
+    default:
+      return EINVAL;
+  }
+
+  if (private_data->bitmap.buffer.data != NULL) {
+    NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
+  }
+
+  array->length++;
+  return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayFinishUnionElement(struct ArrowArray* array,
+                                                          int8_t type_id) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+
+  int64_t child_index = _ArrowArrayUnionChildIndex(array, type_id);
+  if (child_index < 0 || child_index >= array->n_children) {
+    return EINVAL;
+  }
+
+  switch (private_data->storage_type) {
+    case NANOARROW_TYPE_DENSE_UNION:
+      // Apppend the target child length to the union offsets buffer
+      _NANOARROW_CHECK_RANGE(array->children[child_index]->length, 0, INT32_MAX);
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(
+          ArrowArrayBuffer(array, 1), (int32_t)array->children[child_index]->length - 1));
+      break;
+    case NANOARROW_TYPE_SPARSE_UNION:
+      // Append one empty to any non-target column that isn't already the right length
+      // or abort if appending a null will result in a column with invalid length
+      for (int64_t i = 0; i < array->n_children; i++) {
+        if (i == child_index || array->children[i]->length == (array->length + 1)) {
+          continue;
+        }
+
+        if (array->children[i]->length != array->length) {
+          return EINVAL;
+        }
+
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty(array->children[i], 1));
+      }
+
+      break;
+    default:
+      return EINVAL;
+  }
+
+  // Write to the type_ids buffer
+  NANOARROW_RETURN_NOT_OK(
+      ArrowBufferAppendInt8(ArrowArrayBuffer(array, 0), (int8_t)type_id));
+  array->length++;
+  return NANOARROW_OK;
+}
+
+static inline void ArrowArrayViewMove(struct ArrowArrayView* src,
+                                      struct ArrowArrayView* dst) {
+  memcpy(dst, src, sizeof(struct ArrowArrayView));
+  ArrowArrayViewInitFromType(src, NANOARROW_TYPE_UNINITIALIZED);
+}
+
+static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view, int64_t i) {
+  const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8;
+  i += array_view->offset;
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_NA:
+      return 0x01;
+    case NANOARROW_TYPE_DENSE_UNION:
+    case NANOARROW_TYPE_SPARSE_UNION:
+      // Unions are "never null" in Arrow land
+      return 0x00;
+    default:
+      return validity_buffer != NULL && !ArrowBitGet(validity_buffer, i);
+  }
+}
+
+static inline int8_t ArrowArrayViewUnionTypeId(struct ArrowArrayView* array_view,
+                                               int64_t i) {
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_DENSE_UNION:
+    case NANOARROW_TYPE_SPARSE_UNION:
+      return array_view->buffer_views[0].data.as_int8[i];
+    default:
+      return -1;
+  }
+}
+
+static inline int8_t ArrowArrayViewUnionChildIndex(struct ArrowArrayView* array_view,
+                                                   int64_t i) {
+  int8_t type_id = ArrowArrayViewUnionTypeId(array_view, i);
+  if (array_view->union_type_id_map == NULL) {
+    return type_id;
+  } else {
+    return array_view->union_type_id_map[type_id];
+  }
+}
+
+static inline int64_t ArrowArrayViewUnionChildOffset(struct ArrowArrayView* array_view,
+                                                     int64_t i) {
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_DENSE_UNION:
+      return array_view->buffer_views[1].data.as_int32[i];
+    case NANOARROW_TYPE_SPARSE_UNION:
+      return i;
+    default:
+      return -1;
+  }
+}
+
+static inline int64_t ArrowArrayViewListChildOffset(struct ArrowArrayView* array_view,
+                                                    int64_t i) {
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_LIST:
+      return array_view->buffer_views[1].data.as_int32[i];
+    case NANOARROW_TYPE_LARGE_LIST:
+      return array_view->buffer_views[1].data.as_int64[i];
+    default:
+      return -1;
+  }
+}
+
+static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView* array_view,
+                                                 int64_t i) {
+  struct ArrowBufferView* data_view = &array_view->buffer_views[1];
+  i += array_view->offset;
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_INT64:
+      return data_view->data.as_int64[i];
+    case NANOARROW_TYPE_UINT64:
+      return data_view->data.as_uint64[i];
+    case NANOARROW_TYPE_INT32:
+      return data_view->data.as_int32[i];
+    case NANOARROW_TYPE_UINT32:
+      return data_view->data.as_uint32[i];
+    case NANOARROW_TYPE_INT16:
+      return data_view->data.as_int16[i];
+    case NANOARROW_TYPE_UINT16:
+      return data_view->data.as_uint16[i];
+    case NANOARROW_TYPE_INT8:
+      return data_view->data.as_int8[i];
+    case NANOARROW_TYPE_UINT8:
+      return data_view->data.as_uint8[i];
+    case NANOARROW_TYPE_DOUBLE:
+      return (int64_t)data_view->data.as_double[i];
+    case NANOARROW_TYPE_FLOAT:
+      return (int64_t)data_view->data.as_float[i];
+    case NANOARROW_TYPE_BOOL:
+      return ArrowBitGet(data_view->data.as_uint8, i);
+    default:
+      return INT64_MAX;
+  }
+}
+
+static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView* array_view,
+                                                   int64_t i) {
+  i += array_view->offset;
+  struct ArrowBufferView* data_view = &array_view->buffer_views[1];
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_INT64:
+      return data_view->data.as_int64[i];
+    case NANOARROW_TYPE_UINT64:
+      return data_view->data.as_uint64[i];
+    case NANOARROW_TYPE_INT32:
+      return data_view->data.as_int32[i];
+    case NANOARROW_TYPE_UINT32:
+      return data_view->data.as_uint32[i];
+    case NANOARROW_TYPE_INT16:
+      return data_view->data.as_int16[i];
+    case NANOARROW_TYPE_UINT16:
+      return data_view->data.as_uint16[i];
+    case NANOARROW_TYPE_INT8:
+      return data_view->data.as_int8[i];
+    case NANOARROW_TYPE_UINT8:
+      return data_view->data.as_uint8[i];
+    case NANOARROW_TYPE_DOUBLE:
+      return (uint64_t)data_view->data.as_double[i];
+    case NANOARROW_TYPE_FLOAT:
+      return (uint64_t)data_view->data.as_float[i];
+    case NANOARROW_TYPE_BOOL:
+      return ArrowBitGet(data_view->data.as_uint8, i);
+    default:
+      return UINT64_MAX;
+  }
+}
+
+static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView* array_view,
+                                                   int64_t i) {
+  i += array_view->offset;
+  struct ArrowBufferView* data_view = &array_view->buffer_views[1];
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_INT64:
+      return (double)data_view->data.as_int64[i];
+    case NANOARROW_TYPE_UINT64:
+      return (double)data_view->data.as_uint64[i];
+    case NANOARROW_TYPE_INT32:
+      return data_view->data.as_int32[i];
+    case NANOARROW_TYPE_UINT32:
+      return data_view->data.as_uint32[i];
+    case NANOARROW_TYPE_INT16:
+      return data_view->data.as_int16[i];
+    case NANOARROW_TYPE_UINT16:
+      return data_view->data.as_uint16[i];
+    case NANOARROW_TYPE_INT8:
+      return data_view->data.as_int8[i];
+    case NANOARROW_TYPE_UINT8:
+      return data_view->data.as_uint8[i];
+    case NANOARROW_TYPE_DOUBLE:
+      return data_view->data.as_double[i];
+    case NANOARROW_TYPE_FLOAT:
+      return data_view->data.as_float[i];
+    case NANOARROW_TYPE_BOOL:
+      return ArrowBitGet(data_view->data.as_uint8, i);
+    default:
+      return DBL_MAX;
+  }
+}
+
+static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe(
+    struct ArrowArrayView* array_view, int64_t i) {
+  i += array_view->offset;
+  struct ArrowBufferView* offsets_view = &array_view->buffer_views[1];
+  const char* data_view = array_view->buffer_views[2].data.as_char;
+
+  struct ArrowStringView view;
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_STRING:
+    case NANOARROW_TYPE_BINARY:
+      view.data = data_view + offsets_view->data.as_int32[i];
+      view.size_bytes =
+          offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i];
+      break;
+    case NANOARROW_TYPE_LARGE_STRING:
+    case NANOARROW_TYPE_LARGE_BINARY:
+      view.data = data_view + offsets_view->data.as_int64[i];
+      view.size_bytes =
+          offsets_view->data.as_int64[i + 1] - offsets_view->data.as_int64[i];
+      break;
+    case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+      view.size_bytes = array_view->layout.element_size_bits[1] / 8;
+      view.data = array_view->buffer_views[1].data.as_char + (i * view.size_bytes);
+      break;
+    default:
+      view.data = NULL;
+      view.size_bytes = 0;
+      break;
+  }
+
+  return view;
+}
+
+static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe(
+    struct ArrowArrayView* array_view, int64_t i) {
+  i += array_view->offset;
+  struct ArrowBufferView* offsets_view = &array_view->buffer_views[1];
+  const uint8_t* data_view = array_view->buffer_views[2].data.as_uint8;
+
+  struct ArrowBufferView view;
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_STRING:
+    case NANOARROW_TYPE_BINARY:
+      view.size_bytes =
+          offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i];
+      view.data.as_uint8 = data_view + offsets_view->data.as_int32[i];
+      break;
+    case NANOARROW_TYPE_LARGE_STRING:
+    case NANOARROW_TYPE_LARGE_BINARY:
+      view.size_bytes =
+          offsets_view->data.as_int64[i + 1] - offsets_view->data.as_int64[i];
+      view.data.as_uint8 = data_view + offsets_view->data.as_int64[i];
+      break;
+    case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+      view.size_bytes = array_view->layout.element_size_bits[1] / 8;
+      view.data.as_uint8 =
+          array_view->buffer_views[1].data.as_uint8 + (i * view.size_bytes);
+      break;
+    default:
+      view.data.data = NULL;
+      view.size_bytes = 0;
+      break;
+  }
+
+  return view;
+}
+
+static inline void ArrowArrayViewGetIntervalUnsafe(struct ArrowArrayView* array_view,
+                                                   int64_t i, struct ArrowInterval* out) {
+  const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8;
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_INTERVAL_MONTHS: {
+      const size_t size = sizeof(int32_t);
+      memcpy(&out->months, data_view + i * size, sizeof(int32_t));
+      break;
+    }
+    case NANOARROW_TYPE_INTERVAL_DAY_TIME: {
+      const size_t size = sizeof(int32_t) + sizeof(int32_t);
+      memcpy(&out->days, data_view + i * size, sizeof(int32_t));
+      memcpy(&out->ms, data_view + i * size + 4, sizeof(int32_t));
+      break;
+    }
+    case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: {
+      const size_t size = sizeof(int32_t) + sizeof(int32_t) + sizeof(int64_t);
+      memcpy(&out->months, data_view + i * size, sizeof(int32_t));
+      memcpy(&out->days, data_view + i * size + 4, sizeof(int32_t));
+      memcpy(&out->ns, data_view + i * size + 8, sizeof(int64_t));
+      break;
+    }
+    default:
+      break;
+  }
+}
+
+static inline void ArrowArrayViewGetDecimalUnsafe(struct ArrowArrayView* array_view,
+                                                  int64_t i, struct ArrowDecimal* out) {
+  i += array_view->offset;
+  const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8;
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_DECIMAL128:
+      ArrowDecimalSetBytes(out, data_view + (i * 16));
+      break;
+    case NANOARROW_TYPE_DECIMAL256:
+      ArrowDecimalSetBytes(out, data_view + (i * 32));
+      break;
+    default:
+      memset(out->words, 0, sizeof(out->words));
+      break;
+  }
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build
index f302c649bc7bd..fabcd220b8b30 100644
--- a/pandas/_libs/meson.build
+++ b/pandas/_libs/meson.build
@@ -62,7 +62,7 @@ libs_sources = {
     # Dict of extension name -> dict of {sources, include_dirs, and deps}
     # numpy include dir is implicitly included
     'algos': {'sources': ['algos.pyx', _algos_common_helper, _algos_take_helper, _khash_primitive_helper]},
-    'arrays': {'sources': ['arrays.pyx']},
+    'arrays': {'sources': ['arrays.pyx', 'src/vendored/nanoarrow.c']},
     'groupby': {'sources': ['groupby.pyx']},
     'hashing': {'sources': ['hashing.pyx']},
     'hashtable': {'sources': ['hashtable.pyx', _khash_primitive_helper, _hashtable_class_helper, _hashtable_func_helper]},
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index bec875f2bbfa1..8dc9e55e8a1de 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -15,6 +15,7 @@
     lib,
     missing as libmissing,
 )
+from pandas._libs.arrays import BitMaskArray
 from pandas._libs.tslibs import (
     get_unit_from_dtype,
     is_supported_unit,
@@ -112,7 +113,7 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
     _internal_fill_value: Scalar
     # our underlying data and mask are each ndarrays
     _data: np.ndarray
-    _mask: npt.NDArray[np.bool_]
+    _mask: BitMaskArray
 
     # Fill values used for any/all
     _truthy_value = Scalar  # bool(_truthy_value) = True
@@ -122,7 +123,7 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
     def _simple_new(cls, values: np.ndarray, mask: npt.NDArray[np.bool_]) -> Self:
         result = BaseMaskedArray.__new__(cls)
         result._data = values
-        result._mask = mask
+        result._mask = BitMaskArray(mask)
         return result
 
     def __init__(
@@ -142,7 +143,7 @@ def __init__(
             mask = mask.copy()
 
         self._data = values
-        self._mask = mask
+        self._mask = BitMaskArray(mask)
 
     @classmethod
     def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self:
@@ -181,6 +182,8 @@ def __getitem__(self, item: SequenceIndexer) -> Self:
     def __getitem__(self, item: PositionalIndexer) -> Self | Any:
         item = check_array_indexer(self, item)
 
+        # TODO: need to change this to special case multiple
+        # indexers versus just scalar
         newmask = self._mask[item]
         if is_bool(newmask):
             # This is a scalar indexing
@@ -204,7 +207,7 @@ def pad_or_backfill(
             func = missing.get_fill_func(method, ndim=self.ndim)
 
             npvalues = self._data.T
-            new_mask = mask.T
+            new_mask = mask.to_numpy().T
             if copy:
                 npvalues = npvalues.copy()
                 new_mask = new_mask.copy()
@@ -226,7 +229,7 @@ def fillna(
     ) -> Self:
         value, method = validate_fillna_kwargs(value, method)
 
-        mask = self._mask
+        mask = self._mask.to_numpy()
 
         value = missing.check_value_size(value, mask, len(self))
 
@@ -234,7 +237,7 @@ def fillna(
             if method is not None:
                 func = missing.get_fill_func(method, ndim=self.ndim)
                 npvalues = self._data.T
-                new_mask = mask.T
+                new_mask = mask.to_numpy().T
                 if copy:
                     npvalues = npvalues.copy()
                     new_mask = new_mask.copy()
@@ -308,7 +311,8 @@ def __contains__(self, key) -> bool:
         if isna(key) and key is not self.dtype.na_value:
             # GH#52840
             if self._data.dtype.kind == "f" and lib.is_float(key):
-                return bool((np.isnan(self._data) & ~self._mask).any())
+                # TODO: implement low level invert operator on BitMaskArray
+                return bool((np.isnan(self._data) & ~self._mask.to_numpy()).any())
 
         return bool(super().__contains__(key))
 
@@ -319,7 +323,7 @@ def __iter__(self) -> Iterator:
                     yield val
             else:
                 na_value = self.dtype.na_value
-                for isna_, val in zip(self._mask, self._data):
+                for isna_, val in zip(self._mask.to_numpy(), self._data):
                     if isna_:
                         yield na_value
                     else:
@@ -341,28 +345,28 @@ def ndim(self) -> int:
 
     def swapaxes(self, axis1, axis2) -> Self:
         data = self._data.swapaxes(axis1, axis2)
-        mask = self._mask.swapaxes(axis1, axis2)
+        mask = self._mask.to_numpy().swapaxes(axis1, axis2)
         return self._simple_new(data, mask)
 
     def delete(self, loc, axis: AxisInt = 0) -> Self:
         data = np.delete(self._data, loc, axis=axis)
-        mask = np.delete(self._mask, loc, axis=axis)
+        mask = np.delete(self._mask.to_numpy(), loc, axis=axis)
         return self._simple_new(data, mask)
 
     def reshape(self, *args, **kwargs) -> Self:
         data = self._data.reshape(*args, **kwargs)
-        mask = self._mask.reshape(*args, **kwargs)
+        mask = self._mask.to_numpy().reshape(*args, **kwargs)
         return self._simple_new(data, mask)
 
     def ravel(self, *args, **kwargs) -> Self:
         # TODO: need to make sure we have the same order for data/mask
         data = self._data.ravel(*args, **kwargs)
-        mask = self._mask.ravel(*args, **kwargs)
+        mask = self._mask.to_numpy().ravel(*args, **kwargs)
         return type(self)(data, mask)
 
     @property
     def T(self) -> Self:
-        return self._simple_new(self._data.T, self._mask.T)
+        return self._simple_new(self._data.T, self._mask.to_numpy().T)
 
     def round(self, decimals: int = 0, *args, **kwargs):
         """
@@ -392,22 +396,22 @@ def round(self, decimals: int = 0, *args, **kwargs):
         values = np.round(self._data, decimals=decimals, **kwargs)
 
         # Usually we'll get same type as self, but ndarray[bool] casts to float
-        return self._maybe_mask_result(values, self._mask.copy())
+        return self._maybe_mask_result(values, self._mask.to_numpy().copy())
 
     # ------------------------------------------------------------------
     # Unary Methods
 
     def __invert__(self) -> Self:
-        return self._simple_new(~self._data, self._mask.copy())
+        return self._simple_new(~self._data, self._mask.to_numpy().copy())
 
     def __neg__(self) -> Self:
-        return self._simple_new(-self._data, self._mask.copy())
+        return self._simple_new(-self._data, self._mask.to_numpy().copy())
 
     def __pos__(self) -> Self:
         return self.copy()
 
     def __abs__(self) -> Self:
-        return self._simple_new(abs(self._data), self._mask.copy())
+        return self._simple_new(abs(self._data), self._mask.to_numpy().copy())
 
     # ------------------------------------------------------------------
 
@@ -498,7 +502,7 @@ def to_numpy(
             with warnings.catch_warnings():
                 warnings.filterwarnings("ignore", category=RuntimeWarning)
                 data = self._data.astype(dtype)
-            data[self._mask] = na_value
+            data[self._mask.to_numpy()] = na_value
         else:
             with warnings.catch_warnings():
                 warnings.filterwarnings("ignore", category=RuntimeWarning)
@@ -541,7 +545,11 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
                 data = self._data.astype(dtype.numpy_dtype, copy=copy)
             # mask is copied depending on whether the data was copied, and
             # not directly depending on the `copy` keyword
-            mask = self._mask if data is self._data else self._mask.copy()
+            mask = (
+                self._mask.to_numpy()
+                if data is self._data
+                else self._mask.to_numpy().copy()
+            )
             cls = dtype.construct_array_type()
             return cls(data, mask, copy=False)
 
@@ -652,7 +660,7 @@ def reconstruct(x: np.ndarray):
             return tuple(reconstruct(x) for x in result)
         elif method == "reduce":
             # e.g. np.add.reduce; test_ufunc_reduce_raises
-            if self._mask.any():
+            if self._mask.to_numpy().any():
                 return self._na_value
             return result
         else:
@@ -664,7 +672,7 @@ def __arrow_array__(self, type=None):
         """
         import pyarrow as pa
 
-        return pa.array(self._data, mask=self._mask, type=type)
+        return pa.array(self._data, mask=self._mask.to_numpy(), type=type)
 
     @property
     def _hasna(self) -> bool:
@@ -673,20 +681,22 @@ def _hasna(self) -> bool:
         # source code using it..
 
         # error: Incompatible return value type (got "bool_", expected "bool")
-        return self._mask.any()  # type: ignore[return-value]
+        return self._mask.to_numpy().any()  # type: ignore[return-value]
 
     def _propagate_mask(
         self, mask: npt.NDArray[np.bool_] | None, other
     ) -> npt.NDArray[np.bool_]:
         if mask is None:
-            mask = self._mask.copy()  # TODO: need test for BooleanArray needing a copy
+            mask = (
+                self._mask.to_numpy().copy()
+            )  # TODO: need test for BooleanArray needing a copy
             if other is libmissing.NA:
                 # GH#45421 don't alter inplace
                 mask = mask | True
             elif is_list_like(other) and len(other) == len(mask):
                 mask = mask | isna(other)
         else:
-            mask = self._mask | mask
+            mask = self._mask.to_numpy() | mask
         # Incompatible return value type (got "Optional[ndarray[Any, dtype[bool_]]]",
         # expected "ndarray[Any, dtype[bool_]]")
         return mask  # type: ignore[return-value]
@@ -766,7 +776,7 @@ def _arith_method(self, other, op):
 
         if op_name == "pow":
             # 1 ** x is 1.
-            mask = np.where((self._data == 1) & ~self._mask, False, mask)
+            mask = np.where((self._data == 1) & ~self._mask.to_numpy(), False, mask)
             # x ** 0 is 1.
             if omask is not None:
                 mask = np.where((other == 0) & ~omask, False, mask)
@@ -780,7 +790,7 @@ def _arith_method(self, other, op):
             elif other is not libmissing.NA:
                 mask = np.where(other == 1, False, mask)
             # x ** 0 is 1.
-            mask = np.where((self._data == 0) & ~self._mask, False, mask)
+            mask = np.where((self._data == 0) & ~self._mask.to_numpy(), False, mask)
 
         return self._maybe_mask_result(result, mask)
 
@@ -876,7 +886,7 @@ def _maybe_mask_result(
             return result
 
     def isna(self) -> np.ndarray:
-        return self._mask.copy()
+        return self._mask.to_numpy().copy()
 
     @property
     def _na_value(self):
@@ -916,7 +926,11 @@ def take(
         )
 
         mask = take(
-            self._mask, indexer, fill_value=True, allow_fill=allow_fill, axis=axis
+            self._mask.to_numpy(),
+            indexer,
+            fill_value=True,
+            allow_fill=allow_fill,
+            axis=axis,
         )
 
         # if we are filling
@@ -947,14 +961,14 @@ def isin(self, values) -> BooleanArray:  # type: ignore[override]
 
             # For now, NA does not propagate so set result according to presence of NA,
             # see https://github.com/pandas-dev/pandas/pull/38379 for some discussion
-            result[self._mask] = values_have_NA
+            result[self._mask.to_numpy()] = values_have_NA
 
         mask = np.zeros(self._data.shape, dtype=bool)
         return BooleanArray(result, mask, copy=False)
 
     def copy(self) -> Self:
         data = self._data.copy()
-        mask = self._mask.copy()
+        mask = self._mask.to_numpy().copy()
         return self._simple_new(data, mask)
 
     def unique(self) -> Self:
@@ -965,7 +979,7 @@ def unique(self) -> Self:
         -------
         uniques : BaseMaskedArray
         """
-        uniques, mask = algos.unique_with_mask(self._data, self._mask)
+        uniques, mask = algos.unique_with_mask(self._data, self._mask.to_numpy())
         return self._simple_new(uniques, mask)
 
     @doc(ExtensionArray.searchsorted)
@@ -991,7 +1005,7 @@ def factorize(
         use_na_sentinel: bool = True,
     ) -> tuple[np.ndarray, ExtensionArray]:
         arr = self._data
-        mask = self._mask
+        mask = self._mask.to_numpy()
 
         # Use a sentinel for na; recode and add NA to uniques if necessary below
         codes, uniques = factorize_array(arr, use_na_sentinel=True, mask=mask)
@@ -1050,7 +1064,7 @@ def value_counts(self, dropna: bool = True) -> Series:
         from pandas.arrays import IntegerArray
 
         keys, value_counts = algos.value_counts_arraylike(
-            self._data, dropna=True, mask=self._mask
+            self._data, dropna=True, mask=self._mask.to_numpy()
         )
 
         if dropna:
@@ -1062,7 +1076,7 @@ def value_counts(self, dropna: bool = True) -> Series:
         # if we want nans, count the mask
         counts = np.empty(len(value_counts) + 1, dtype="int64")
         counts[:-1] = value_counts
-        counts[-1] = self._mask.sum()
+        counts[-1] = self._mask.to_numpy().sum()
 
         index = Index(keys, dtype=self.dtype).insert(len(keys), self.dtype.na_value)
         index = index.astype(self.dtype)
@@ -1081,11 +1095,11 @@ def equals(self, other) -> bool:
 
         # GH#44382 if e.g. self[1] is np.nan and other[1] is pd.NA, we are NOT
         #  equal.
-        if not np.array_equal(self._mask, other._mask):
+        if not np.array_equal(self._mask.to_numpy(), other._mask):
             return False
 
-        left = self._data[~self._mask]
-        right = other._data[~other._mask]
+        left = self._data[~self._mask.to_numpy()]
+        right = other._data[~other._mask.to_numpy()]
         return array_equivalent(left, right, strict_nan=True, dtype_equal=True)
 
     def _quantile(
@@ -1101,7 +1115,7 @@ def _quantile(
         """
         res = quantile_with_mask(
             self._data,
-            mask=self._mask,
+            mask=self._mask.to_numpy(),
             # TODO(GH#40932): na_value_for_dtype(self.dtype.numpy_dtype)
             #  instead of np.nan
             fill_value=np.nan,
@@ -1140,7 +1154,7 @@ def _reduce(
         else:
             # median, skew, kurt, sem
             data = self._data
-            mask = self._mask
+            mask = self._mask.to_numpy()
             op = getattr(nanops, f"nan{name}")
             axis = kwargs.pop("axis", None)
             result = op(data, axis=axis, skipna=skipna, mask=mask, **kwargs)
@@ -1162,9 +1176,9 @@ def _wrap_reduction_result(self, name: str, result, *, skipna, axis):
         if isinstance(result, np.ndarray):
             if skipna:
                 # we only retain mask for all-NA rows/columns
-                mask = self._mask.all(axis=axis)
+                mask = self._mask.to_numpy().all(axis=axis)
             else:
-                mask = self._mask.any(axis=axis)
+                mask = self._mask.to_numpy().any(axis=axis)
 
             return self._maybe_mask_result(result, mask)
         return result
@@ -1369,7 +1383,7 @@ def any(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         if skipna:
             return result
         else:
-            if result or len(self) == 0 or not self._mask.any():
+            if result or len(self) == 0 or not self._mask.to_numpy().any():
                 return result
             else:
                 return self.dtype.na_value
@@ -1451,7 +1465,7 @@ def all(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         if skipna:
             return result
         else:
-            if not result or len(self) == 0 or not self._mask.any():
+            if not result or len(self) == 0 or not self._mask.to_numpy().any():
                 return result
             else:
                 return self.dtype.na_value

From b69c00fc0956aeea94ff6e032a9ed3d578935af5 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 11 Aug 2023 05:30:35 -0400
Subject: [PATCH 002/126] removed cpplint

---
 .pre-commit-config.yaml | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 90627216a1354..000949c41f5a0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -70,19 +70,6 @@ repos:
     -   id: fix-encoding-pragma
         args: [--remove]
     -   id: trailing-whitespace
--   repo: https://github.com/cpplint/cpplint
-    rev: 1.6.1
-    hooks:
-    -   id: cpplint
-        exclude: ^pandas/_libs/include/pandas/vendored/klib
-        args: [
-            --quiet,
-            '--extensions=c,h',
-            '--headers=h',
-            --recursive,
-            --linelength=88,
-            '--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size'
-        ]
 -   repo: https://github.com/pylint-dev/pylint
     rev: v3.0.0a6
     hooks:

From 64b0f01fb1b9042e0c3c11e75f7b6d34975df43d Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 11 Aug 2023 05:35:40 -0400
Subject: [PATCH 003/126] checkpoint

---
 pandas/_libs/arrays.pyx      | 11 +++++++----
 pandas/core/arrays/masked.py |  2 +-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 4b7c86a067fa5..1168948718665 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -215,10 +215,8 @@ cdef class BitMaskArray:
         self.array_len = len(np_array)
         nbytes = len(np_array) // 8 + 1
         self.validity_buffer = <uint8_t *>malloc(nbytes)
-        # malloc
 
     def __dealloc__(self):
-        ...
         free(self.validity_buffer)
 
     def __setitem__(self, key, value):
@@ -230,5 +228,10 @@ cdef class BitMaskArray:
     def __getitem__(self, key):
         bool(ArrowBitGet(self.validity_buffer, key))
 
-    def to_numpy(self):
-        ...
+    def to_numpy(self) -> ndarray:
+        cdef ndarray[uint8_t] result
+        result = np.empty(self.array_len, dtype=bool)
+        for i in range(self.array_len):
+            result = self[i]
+
+        return result
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 8dc9e55e8a1de..14853895905f6 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -184,7 +184,7 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:
 
         # TODO: need to change this to special case multiple
         # indexers versus just scalar
-        newmask = self._mask[item]
+        newmask = self._mask.to_numpy()[item]
         if is_bool(newmask):
             # This is a scalar indexing
             if newmask:

From e5238d964a6e168ecd40c03c67f58828ae9dd2b7 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 11 Aug 2023 15:35:18 -0400
Subject: [PATCH 004/126] Passing test suite

---
 pandas/_libs/arrays.pyx                       | 24 +++++++++++++++----
 pandas/core/arrays/masked.py                  |  8 +++----
 pandas/tests/arrays/masked/test_arithmetic.py |  8 +++----
 3 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 1168948718665..c8440de16bc38 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -21,6 +21,8 @@ from libc.stdlib cimport (
     malloc,
 )
 
+from pandas._libs.lib import is_list_like
+
 
 cdef extern from "pandas/vendored/nanoarrow.h":
     int8_t ArrowBitGet(const uint8_t*, int64_t)
@@ -215,23 +217,35 @@ cdef class BitMaskArray:
         self.array_len = len(np_array)
         nbytes = len(np_array) // 8 + 1
         self.validity_buffer = <uint8_t *>malloc(nbytes)
+        for index, value in enumerate(np_array):
+            self[index] = value
 
     def __dealloc__(self):
         free(self.validity_buffer)
 
     def __setitem__(self, key, value):
-        if value:
-            ArrowBitSet(self.validity_buffer, key)
+        if is_list_like(key):
+            for k in key:
+                if value:
+                    ArrowBitSet(self.validity_buffer, k)
+                else:
+                    ArrowBitClear(self.validity_buffer, k)
         else:
-            ArrowBitClear(self.validity_buffer, key)
+            if value:
+                ArrowBitSet(self.validity_buffer, key)
+            else:
+                ArrowBitClear(self.validity_buffer, key)
 
     def __getitem__(self, key):
-        bool(ArrowBitGet(self.validity_buffer, key))
+        return bool(ArrowBitGet(self.validity_buffer, key))
+
+    def __invert__(self):
+        return ~self.to_numpy()
 
     def to_numpy(self) -> ndarray:
         cdef ndarray[uint8_t] result
         result = np.empty(self.array_len, dtype=bool)
         for i in range(self.array_len):
-            result = self[i]
+            result[i] = self[i]
 
         return result
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 14853895905f6..fb47982d3807c 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -696,7 +696,7 @@ def _propagate_mask(
             elif is_list_like(other) and len(other) == len(mask):
                 mask = mask | isna(other)
         else:
-            mask = self._mask.to_numpy() | mask
+            mask = self._mask.to_numpy() | mask.to_numpy()
         # Incompatible return value type (got "Optional[ndarray[Any, dtype[bool_]]]",
         # expected "ndarray[Any, dtype[bool_]]")
         return mask  # type: ignore[return-value]
@@ -869,7 +869,7 @@ def _maybe_mask_result(
             # e.g. test_numeric_arr_mul_tdscalar_numexpr_path
             from pandas.core.arrays import TimedeltaArray
 
-            result[mask] = result.dtype.type("NaT")
+            result[mask.to_numpy()] = result.dtype.type("NaT")
 
             if not isinstance(result, TimedeltaArray):
                 return TimedeltaArray._simple_new(result, dtype=result.dtype)
@@ -882,7 +882,7 @@ def _maybe_mask_result(
             return IntegerArray(result, mask, copy=False)
 
         else:
-            result[mask] = np.nan
+            result[mask.to_numpy()] = np.nan
             return result
 
     def isna(self) -> np.ndarray:
@@ -903,7 +903,7 @@ def _concat_same_type(
         axis: AxisInt = 0,
     ) -> Self:
         data = np.concatenate([x._data for x in to_concat], axis=axis)
-        mask = np.concatenate([x._mask for x in to_concat], axis=axis)
+        mask = np.concatenate([x._mask.to_numpy() for x in to_concat], axis=axis)
         return cls(data, mask)
 
     def take(
diff --git a/pandas/tests/arrays/masked/test_arithmetic.py b/pandas/tests/arrays/masked/test_arithmetic.py
index f4b571ca627b3..21e292e5bbc29 100644
--- a/pandas/tests/arrays/masked/test_arithmetic.py
+++ b/pandas/tests/arrays/masked/test_arithmetic.py
@@ -76,22 +76,22 @@ def test_array_NA(data, all_arithmetic_operators):
     scalar = pd.NA
     scalar_array = pd.array([pd.NA] * len(data), dtype=data.dtype)
 
-    mask = data._mask.copy()
+    mask = data._mask.to_numpy().copy()
 
     if is_bool_not_implemented(data, all_arithmetic_operators):
         msg = "operator '.*' not implemented for bool dtypes"
         with pytest.raises(NotImplementedError, match=msg):
             op(data, scalar)
         # GH#45421 check op doesn't alter data._mask inplace
-        tm.assert_numpy_array_equal(mask, data._mask)
+        tm.assert_numpy_array_equal(mask, data._mask.to_numpy())
         return
 
     result = op(data, scalar)
     # GH#45421 check op doesn't alter data._mask inplace
-    tm.assert_numpy_array_equal(mask, data._mask)
+    tm.assert_numpy_array_equal(mask, data._mask.to_numpy())
 
     expected = op(data, scalar_array)
-    tm.assert_numpy_array_equal(mask, data._mask)
+    tm.assert_numpy_array_equal(mask, data._mask.to_numpy())
 
     tm.assert_extension_array_equal(result, expected)
 

From b63b6715a48951d96128fddd97f1a2f200eb6c8e Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 11 Aug 2023 15:38:45 -0400
Subject: [PATCH 005/126] revert modifications to nanoarrow

---
 pandas/_libs/meson.build | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build
index fabcd220b8b30..29b1298050619 100644
--- a/pandas/_libs/meson.build
+++ b/pandas/_libs/meson.build
@@ -62,7 +62,7 @@ libs_sources = {
     # Dict of extension name -> dict of {sources, include_dirs, and deps}
     # numpy include dir is implicitly included
     'algos': {'sources': ['algos.pyx', _algos_common_helper, _algos_take_helper, _khash_primitive_helper]},
-    'arrays': {'sources': ['arrays.pyx', 'src/vendored/nanoarrow.c']},
+    'arrays': {'sources': ['arrays.pyx', 'src/vendored/nanoarrow.c'], 'includes': ['include/pandas/vendored']},
     'groupby': {'sources': ['groupby.pyx']},
     'hashing': {'sources': ['hashing.pyx']},
     'hashtable': {'sources': ['hashtable.pyx', _khash_primitive_helper, _hashtable_class_helper, _hashtable_func_helper]},
@@ -106,7 +106,7 @@ foreach ext_name, ext_dict : libs_sources
         ext_name,
         ext_dict.get('sources'),
         cython_args: ['--include-dir', meson.current_build_dir(), '-X always_allow_keywords=true'],
-        include_directories: [inc_np, inc_pd],
+        include_directories: [inc_np, inc_pd] + ext_dict.get('includes', []),
         dependencies: ext_dict.get('deps', ''),
         subdir: 'pandas/_libs',
         install: true

From fe31993dfef1ad4635f9bae307b0acdb9fd57167 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 11 Aug 2023 16:07:32 -0400
Subject: [PATCH 006/126] force vendor

---
 pandas/_libs/src/vendored/nanoarrow.c | 3107 +++++++++++++++++++++++++
 1 file changed, 3107 insertions(+)
 create mode 100644 pandas/_libs/src/vendored/nanoarrow.c

diff --git a/pandas/_libs/src/vendored/nanoarrow.c b/pandas/_libs/src/vendored/nanoarrow.c
new file mode 100644
index 0000000000000..7cc53b43550d7
--- /dev/null
+++ b/pandas/_libs/src/vendored/nanoarrow.c
@@ -0,0 +1,3107 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "pandas/vendored/nanoarrow.h"
+
+const char* ArrowNanoarrowVersion(void) { return NANOARROW_VERSION; }
+
+int ArrowNanoarrowVersionInt(void) { return NANOARROW_VERSION_INT; }
+
+int ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) {
+  if (error == NULL) {
+    return NANOARROW_OK;
+  }
+
+  memset(error->message, 0, sizeof(error->message));
+
+  va_list args;
+  va_start(args, fmt);
+  int chars_needed = vsnprintf(error->message, sizeof(error->message), fmt, args);
+  va_end(args);
+
+  if (chars_needed < 0) {
+    return EINVAL;
+  } else if (((size_t)chars_needed) >= sizeof(error->message)) {
+    return ERANGE;
+  } else {
+    return NANOARROW_OK;
+  }
+}
+
+const char* ArrowErrorMessage(struct ArrowError* error) {
+  if (error == NULL) {
+    return "";
+  } else {
+    return error->message;
+  }
+}
+
+void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) {
+  layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_VALIDITY;
+  layout->buffer_data_type[0] = NANOARROW_TYPE_BOOL;
+  layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+  layout->buffer_data_type[1] = storage_type;
+  layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_NONE;
+  layout->buffer_data_type[2] = NANOARROW_TYPE_UNINITIALIZED;
+
+  layout->element_size_bits[0] = 1;
+  layout->element_size_bits[1] = 0;
+  layout->element_size_bits[2] = 0;
+
+  layout->child_size_elements = 0;
+
+  switch (storage_type) {
+    case NANOARROW_TYPE_UNINITIALIZED:
+    case NANOARROW_TYPE_NA:
+      layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_NONE;
+      layout->buffer_data_type[0] = NANOARROW_TYPE_UNINITIALIZED;
+      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED;
+      layout->element_size_bits[0] = 0;
+      break;
+
+    case NANOARROW_TYPE_LIST:
+    case NANOARROW_TYPE_MAP:
+      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
+      layout->element_size_bits[1] = 32;
+      break;
+
+    case NANOARROW_TYPE_LARGE_LIST:
+      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
+      layout->element_size_bits[1] = 64;
+      break;
+
+    case NANOARROW_TYPE_STRUCT:
+    case NANOARROW_TYPE_FIXED_SIZE_LIST:
+      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED;
+      break;
+
+    case NANOARROW_TYPE_BOOL:
+      layout->element_size_bits[1] = 1;
+      break;
+
+    case NANOARROW_TYPE_UINT8:
+    case NANOARROW_TYPE_INT8:
+      layout->element_size_bits[1] = 8;
+      break;
+
+    case NANOARROW_TYPE_UINT16:
+    case NANOARROW_TYPE_INT16:
+    case NANOARROW_TYPE_HALF_FLOAT:
+      layout->element_size_bits[1] = 16;
+      break;
+
+    case NANOARROW_TYPE_UINT32:
+    case NANOARROW_TYPE_INT32:
+    case NANOARROW_TYPE_FLOAT:
+      layout->element_size_bits[1] = 32;
+      break;
+    case NANOARROW_TYPE_INTERVAL_MONTHS:
+      layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
+      layout->element_size_bits[1] = 32;
+      break;
+
+    case NANOARROW_TYPE_UINT64:
+    case NANOARROW_TYPE_INT64:
+    case NANOARROW_TYPE_DOUBLE:
+    case NANOARROW_TYPE_INTERVAL_DAY_TIME:
+      layout->element_size_bits[1] = 64;
+      break;
+
+    case NANOARROW_TYPE_DECIMAL128:
+    case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
+      layout->element_size_bits[1] = 128;
+      break;
+
+    case NANOARROW_TYPE_DECIMAL256:
+      layout->element_size_bits[1] = 256;
+      break;
+
+    case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+      layout->buffer_data_type[1] = NANOARROW_TYPE_BINARY;
+      break;
+
+    case NANOARROW_TYPE_DENSE_UNION:
+      layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID;
+      layout->buffer_data_type[0] = NANOARROW_TYPE_INT8;
+      layout->element_size_bits[0] = 8;
+      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_UNION_OFFSET;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
+      layout->element_size_bits[1] = 32;
+      break;
+
+    case NANOARROW_TYPE_SPARSE_UNION:
+      layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID;
+      layout->buffer_data_type[0] = NANOARROW_TYPE_INT8;
+      layout->element_size_bits[0] = 8;
+      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED;
+      break;
+
+    case NANOARROW_TYPE_STRING:
+    case NANOARROW_TYPE_BINARY:
+      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
+      layout->element_size_bits[1] = 32;
+      layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
+      layout->buffer_data_type[2] = storage_type;
+      break;
+
+    case NANOARROW_TYPE_LARGE_STRING:
+      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
+      layout->element_size_bits[1] = 64;
+      layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
+      layout->buffer_data_type[2] = NANOARROW_TYPE_STRING;
+      break;
+    case NANOARROW_TYPE_LARGE_BINARY:
+      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
+      layout->element_size_bits[1] = 64;
+      layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
+      layout->buffer_data_type[2] = NANOARROW_TYPE_BINARY;
+      break;
+
+    default:
+      break;
+  }
+}
+
+void* ArrowMalloc(int64_t size) { return malloc(size); }
+
+void* ArrowRealloc(void* ptr, int64_t size) { return realloc(ptr, size); }
+
+void ArrowFree(void* ptr) { free(ptr); }
+
+static uint8_t* ArrowBufferAllocatorMallocReallocate(
+    struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t old_size,
+    int64_t new_size) {
+  return (uint8_t*)ArrowRealloc(ptr, new_size);
+}
+
+static void ArrowBufferAllocatorMallocFree(struct ArrowBufferAllocator* allocator,
+                                           uint8_t* ptr, int64_t size) {
+  ArrowFree(ptr);
+}
+
+static struct ArrowBufferAllocator ArrowBufferAllocatorMalloc = {
+    &ArrowBufferAllocatorMallocReallocate, &ArrowBufferAllocatorMallocFree, NULL};
+
+struct ArrowBufferAllocator ArrowBufferAllocatorDefault(void) {
+  return ArrowBufferAllocatorMalloc;
+}
+
+static uint8_t* ArrowBufferAllocatorNeverReallocate(
+    struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t old_size,
+    int64_t new_size) {
+  return NULL;
+}
+
+struct ArrowBufferAllocator ArrowBufferDeallocator(
+    void (*custom_free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr,
+                        int64_t size),
+    void* private_data) {
+  struct ArrowBufferAllocator allocator;
+  allocator.reallocate = &ArrowBufferAllocatorNeverReallocate;
+  allocator.free = custom_free;
+  allocator.private_data = private_data;
+  return allocator;
+}
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "pandas/vendored/nanoarrow.h"
+
+static void ArrowSchemaRelease(struct ArrowSchema* schema) {
+  if (schema->format != NULL) ArrowFree((void*)schema->format);
+  if (schema->name != NULL) ArrowFree((void*)schema->name);
+  if (schema->metadata != NULL) ArrowFree((void*)schema->metadata);
+
+  // This object owns the memory for all the children, but those
+  // children may have been generated elsewhere and might have
+  // their own release() callback.
+  if (schema->children != NULL) {
+    for (int64_t i = 0; i < schema->n_children; i++) {
+      if (schema->children[i] != NULL) {
+        if (schema->children[i]->release != NULL) {
+          schema->children[i]->release(schema->children[i]);
+        }
+
+        ArrowFree(schema->children[i]);
+      }
+    }
+
+    ArrowFree(schema->children);
+  }
+
+  // This object owns the memory for the dictionary but it
+  // may have been generated somewhere else and have its own
+  // release() callback.
+  if (schema->dictionary != NULL) {
+    if (schema->dictionary->release != NULL) {
+      schema->dictionary->release(schema->dictionary);
+    }
+
+    ArrowFree(schema->dictionary);
+  }
+
+  // private data not currently used
+  if (schema->private_data != NULL) {
+    ArrowFree(schema->private_data);
+  }
+
+  schema->release = NULL;
+}
+
+static const char* ArrowSchemaFormatTemplate(enum ArrowType type) {
+  switch (type) {
+    case NANOARROW_TYPE_UNINITIALIZED:
+      return NULL;
+    case NANOARROW_TYPE_NA:
+      return "n";
+    case NANOARROW_TYPE_BOOL:
+      return "b";
+
+    case NANOARROW_TYPE_UINT8:
+      return "C";
+    case NANOARROW_TYPE_INT8:
+      return "c";
+    case NANOARROW_TYPE_UINT16:
+      return "S";
+    case NANOARROW_TYPE_INT16:
+      return "s";
+    case NANOARROW_TYPE_UINT32:
+      return "I";
+    case NANOARROW_TYPE_INT32:
+      return "i";
+    case NANOARROW_TYPE_UINT64:
+      return "L";
+    case NANOARROW_TYPE_INT64:
+      return "l";
+
+    case NANOARROW_TYPE_HALF_FLOAT:
+      return "e";
+    case NANOARROW_TYPE_FLOAT:
+      return "f";
+    case NANOARROW_TYPE_DOUBLE:
+      return "g";
+
+    case NANOARROW_TYPE_STRING:
+      return "u";
+    case NANOARROW_TYPE_LARGE_STRING:
+      return "U";
+    case NANOARROW_TYPE_BINARY:
+      return "z";
+    case NANOARROW_TYPE_LARGE_BINARY:
+      return "Z";
+
+    case NANOARROW_TYPE_DATE32:
+      return "tdD";
+    case NANOARROW_TYPE_DATE64:
+      return "tdm";
+    case NANOARROW_TYPE_INTERVAL_MONTHS:
+      return "tiM";
+    case NANOARROW_TYPE_INTERVAL_DAY_TIME:
+      return "tiD";
+    case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
+      return "tin";
+
+    case NANOARROW_TYPE_LIST:
+      return "+l";
+    case NANOARROW_TYPE_LARGE_LIST:
+      return "+L";
+    case NANOARROW_TYPE_STRUCT:
+      return "+s";
+    case NANOARROW_TYPE_MAP:
+      return "+m";
+
+    default:
+      return NULL;
+  }
+}
+
+static int ArrowSchemaInitChildrenIfNeeded(struct ArrowSchema* schema,
+                                           enum ArrowType type) {
+  switch (type) {
+    case NANOARROW_TYPE_LIST:
+    case NANOARROW_TYPE_LARGE_LIST:
+    case NANOARROW_TYPE_FIXED_SIZE_LIST:
+      NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, 1));
+      ArrowSchemaInit(schema->children[0]);
+      NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[0], "item"));
+      break;
+    case NANOARROW_TYPE_MAP:
+      NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, 1));
+      NANOARROW_RETURN_NOT_OK(
+          ArrowSchemaInitFromType(schema->children[0], NANOARROW_TYPE_STRUCT));
+      NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[0], "entries"));
+      schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE;
+      NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema->children[0], 2));
+      ArrowSchemaInit(schema->children[0]->children[0]);
+      ArrowSchemaInit(schema->children[0]->children[1]);
+      NANOARROW_RETURN_NOT_OK(
+          ArrowSchemaSetName(schema->children[0]->children[0], "key"));
+      schema->children[0]->children[0]->flags &= ~ARROW_FLAG_NULLABLE;
+      NANOARROW_RETURN_NOT_OK(
+          ArrowSchemaSetName(schema->children[0]->children[1], "value"));
+      break;
+    default:
+      break;
+  }
+
+  return NANOARROW_OK;
+}
+
+void ArrowSchemaInit(struct ArrowSchema* schema) {
+  schema->format = NULL;
+  schema->name = NULL;
+  schema->metadata = NULL;
+  schema->flags = ARROW_FLAG_NULLABLE;
+  schema->n_children = 0;
+  schema->children = NULL;
+  schema->dictionary = NULL;
+  schema->private_data = NULL;
+  schema->release = &ArrowSchemaRelease;
+}
+
+ArrowErrorCode ArrowSchemaSetType(struct ArrowSchema* schema, enum ArrowType type) {
+  // We don't allocate the dictionary because it has to be nullptr
+  // for non-dictionary-encoded arrays.
+
+  // Set the format to a valid format string for type
+  const char* template_format = ArrowSchemaFormatTemplate(type);
+
+  // If type isn't recognized and not explicitly unset
+  if (template_format == NULL && type != NANOARROW_TYPE_UNINITIALIZED) {
+    return EINVAL;
+  }
+
+  NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat(schema, template_format));
+
+  // For types with an umabiguous child structure, allocate children
+  return ArrowSchemaInitChildrenIfNeeded(schema, type);
+}
+
+ArrowErrorCode ArrowSchemaSetTypeStruct(struct ArrowSchema* schema, int64_t n_children) {
+  NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_STRUCT));
+  NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, n_children));
+  for (int64_t i = 0; i < n_children; i++) {
+    ArrowSchemaInit(schema->children[i]);
+  }
+
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaInitFromType(struct ArrowSchema* schema, enum ArrowType type) {
+  ArrowSchemaInit(schema);
+
+  int result = ArrowSchemaSetType(schema, type);
+  if (result != NANOARROW_OK) {
+    schema->release(schema);
+    return result;
+  }
+
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaSetTypeFixedSize(struct ArrowSchema* schema,
+                                           enum ArrowType type, int32_t fixed_size) {
+  if (fixed_size <= 0) {
+    return EINVAL;
+  }
+
+  char buffer[64];
+  int n_chars;
+  switch (type) {
+    case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+      n_chars = snprintf(buffer, sizeof(buffer), "w:%d", (int)fixed_size);
+      break;
+    case NANOARROW_TYPE_FIXED_SIZE_LIST:
+      n_chars = snprintf(buffer, sizeof(buffer), "+w:%d", (int)fixed_size);
+      break;
+    default:
+      return EINVAL;
+  }
+
+  buffer[n_chars] = '\0';
+  NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat(schema, buffer));
+
+  if (type == NANOARROW_TYPE_FIXED_SIZE_LIST) {
+    NANOARROW_RETURN_NOT_OK(ArrowSchemaInitChildrenIfNeeded(schema, type));
+  }
+
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaSetTypeDecimal(struct ArrowSchema* schema, enum ArrowType type,
+                                         int32_t decimal_precision,
+                                         int32_t decimal_scale) {
+  if (decimal_precision <= 0) {
+    return EINVAL;
+  }
+
+  char buffer[64];
+  int n_chars;
+  switch (type) {
+    case NANOARROW_TYPE_DECIMAL128:
+      n_chars =
+          snprintf(buffer, sizeof(buffer), "d:%d,%d", decimal_precision, decimal_scale);
+      break;
+    case NANOARROW_TYPE_DECIMAL256:
+      n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d,256", decimal_precision,
+                         decimal_scale);
+      break;
+    default:
+      return EINVAL;
+  }
+
+  buffer[n_chars] = '\0';
+  return ArrowSchemaSetFormat(schema, buffer);
+}
+
+static const char* ArrowTimeUnitFormatString(enum ArrowTimeUnit time_unit) {
+  switch (time_unit) {
+    case NANOARROW_TIME_UNIT_SECOND:
+      return "s";
+    case NANOARROW_TIME_UNIT_MILLI:
+      return "m";
+    case NANOARROW_TIME_UNIT_MICRO:
+      return "u";
+    case NANOARROW_TIME_UNIT_NANO:
+      return "n";
+    default:
+      return NULL;
+  }
+}
+
+ArrowErrorCode ArrowSchemaSetTypeDateTime(struct ArrowSchema* schema, enum ArrowType type,
+                                          enum ArrowTimeUnit time_unit,
+                                          const char* timezone) {
+  const char* time_unit_str = ArrowTimeUnitFormatString(time_unit);
+  if (time_unit_str == NULL) {
+    return EINVAL;
+  }
+
+  char buffer[128];
+  int n_chars;
+  switch (type) {
+    case NANOARROW_TYPE_TIME32:
+    case NANOARROW_TYPE_TIME64:
+      if (timezone != NULL) {
+        return EINVAL;
+      }
+      n_chars = snprintf(buffer, sizeof(buffer), "tt%s", time_unit_str);
+      break;
+    case NANOARROW_TYPE_TIMESTAMP:
+      if (timezone == NULL) {
+        timezone = "";
+      }
+      n_chars = snprintf(buffer, sizeof(buffer), "ts%s:%s", time_unit_str, timezone);
+      break;
+    case NANOARROW_TYPE_DURATION:
+      if (timezone != NULL) {
+        return EINVAL;
+      }
+      n_chars = snprintf(buffer, sizeof(buffer), "tD%s", time_unit_str);
+      break;
+    default:
+      return EINVAL;
+  }
+
+  if (((size_t)n_chars) >= sizeof(buffer)) {
+    return ERANGE;
+  }
+
+  buffer[n_chars] = '\0';
+
+  return ArrowSchemaSetFormat(schema, buffer);
+}
+
+ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema* schema, enum ArrowType type,
+                                       int64_t n_children) {
+  if (n_children < 0 || n_children > 127) {
+    return EINVAL;
+  }
+
+  // Max valid size would be +ud:0,1,...126 = 401 characters + null terminator
+  char format_out[512];
+  int64_t format_out_size = 512;
+  memset(format_out, 0, format_out_size);
+  int n_chars;
+  char* format_cursor = format_out;
+
+  switch (type) {
+    case NANOARROW_TYPE_SPARSE_UNION:
+      n_chars = snprintf(format_cursor, format_out_size, "+us:");
+      format_cursor += n_chars;
+      format_out_size -= n_chars;
+      break;
+    case NANOARROW_TYPE_DENSE_UNION:
+      n_chars = snprintf(format_cursor, format_out_size, "+ud:");
+      format_cursor += n_chars;
+      format_out_size -= n_chars;
+      break;
+    default:
+      return EINVAL;
+  }
+
+  if (n_children > 0) {
+    n_chars = snprintf(format_cursor, format_out_size, "0");
+    format_cursor += n_chars;
+    format_out_size -= n_chars;
+
+    for (int64_t i = 1; i < n_children; i++) {
+      n_chars = snprintf(format_cursor, format_out_size, ",%d", (int)i);
+      format_cursor += n_chars;
+      format_out_size -= n_chars;
+    }
+  }
+
+  NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat(schema, format_out));
+
+  NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, n_children));
+  for (int64_t i = 0; i < n_children; i++) {
+    ArrowSchemaInit(schema->children[i]);
+  }
+
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema* schema, const char* format) {
+  if (schema->format != NULL) {
+    ArrowFree((void*)schema->format);
+  }
+
+  if (format != NULL) {
+    size_t format_size = strlen(format) + 1;
+    schema->format = (const char*)ArrowMalloc(format_size);
+    if (schema->format == NULL) {
+      return ENOMEM;
+    }
+
+    memcpy((void*)schema->format, format, format_size);
+  } else {
+    schema->format = NULL;
+  }
+
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema* schema, const char* name) {
+  if (schema->name != NULL) {
+    ArrowFree((void*)schema->name);
+  }
+
+  if (name != NULL) {
+    size_t name_size = strlen(name) + 1;
+    schema->name = (const char*)ArrowMalloc(name_size);
+    if (schema->name == NULL) {
+      return ENOMEM;
+    }
+
+    memcpy((void*)schema->name, name, name_size);
+  } else {
+    schema->name = NULL;
+  }
+
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema* schema, const char* metadata) {
+  if (schema->metadata != NULL) {
+    ArrowFree((void*)schema->metadata);
+  }
+
+  if (metadata != NULL) {
+    size_t metadata_size = ArrowMetadataSizeOf(metadata);
+    schema->metadata = (const char*)ArrowMalloc(metadata_size);
+    if (schema->metadata == NULL) {
+      return ENOMEM;
+    }
+
+    memcpy((void*)schema->metadata, metadata, metadata_size);
+  } else {
+    schema->metadata = NULL;
+  }
+
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema* schema,
+                                           int64_t n_children) {
+  if (schema->children != NULL) {
+    return EEXIST;
+  }
+
+  if (n_children > 0) {
+    schema->children =
+        (struct ArrowSchema**)ArrowMalloc(n_children * sizeof(struct ArrowSchema*));
+
+    if (schema->children == NULL) {
+      return ENOMEM;
+    }
+
+    schema->n_children = n_children;
+
+    memset(schema->children, 0, n_children * sizeof(struct ArrowSchema*));
+
+    for (int64_t i = 0; i < n_children; i++) {
+      schema->children[i] = (struct ArrowSchema*)ArrowMalloc(sizeof(struct ArrowSchema));
+
+      if (schema->children[i] == NULL) {
+        return ENOMEM;
+      }
+
+      schema->children[i]->release = NULL;
+    }
+  }
+
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema* schema) {
+  if (schema->dictionary != NULL) {
+    return EEXIST;
+  }
+
+  schema->dictionary = (struct ArrowSchema*)ArrowMalloc(sizeof(struct ArrowSchema));
+  if (schema->dictionary == NULL) {
+    return ENOMEM;
+  }
+
+  schema->dictionary->release = NULL;
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaDeepCopy(struct ArrowSchema* schema,
+                                   struct ArrowSchema* schema_out) {
+  ArrowSchemaInit(schema_out);
+
+  int result = ArrowSchemaSetFormat(schema_out, schema->format);
+  if (result != NANOARROW_OK) {
+    schema_out->release(schema_out);
+    return result;
+  }
+
+  schema_out->flags = schema->flags;
+
+  result = ArrowSchemaSetName(schema_out, schema->name);
+  if (result != NANOARROW_OK) {
+    schema_out->release(schema_out);
+    return result;
+  }
+
+  result = ArrowSchemaSetMetadata(schema_out, schema->metadata);
+  if (result != NANOARROW_OK) {
+    schema_out->release(schema_out);
+    return result;
+  }
+
+  result = ArrowSchemaAllocateChildren(schema_out, schema->n_children);
+  if (result != NANOARROW_OK) {
+    schema_out->release(schema_out);
+    return result;
+  }
+
+  for (int64_t i = 0; i < schema->n_children; i++) {
+    result = ArrowSchemaDeepCopy(schema->children[i], schema_out->children[i]);
+    if (result != NANOARROW_OK) {
+      schema_out->release(schema_out);
+      return result;
+    }
+  }
+
+  if (schema->dictionary != NULL) {
+    result = ArrowSchemaAllocateDictionary(schema_out);
+    if (result != NANOARROW_OK) {
+      schema_out->release(schema_out);
+      return result;
+    }
+
+    result = ArrowSchemaDeepCopy(schema->dictionary, schema_out->dictionary);
+    if (result != NANOARROW_OK) {
+      schema_out->release(schema_out);
+      return result;
+    }
+  }
+
+  return NANOARROW_OK;
+}
+
+static void ArrowSchemaViewSetPrimitive(struct ArrowSchemaView* schema_view,
+                                        enum ArrowType type) {
+  schema_view->type = type;
+  schema_view->storage_type = type;
+}
+
+static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
+                                           const char* format,
+                                           const char** format_end_out,
+                                           struct ArrowError* error) {
+  *format_end_out = format;
+
+  // needed for decimal parsing
+  const char* parse_start;
+  char* parse_end;
+
+  switch (format[0]) {
+    case 'n':
+      schema_view->type = NANOARROW_TYPE_NA;
+      schema_view->storage_type = NANOARROW_TYPE_NA;
+      *format_end_out = format + 1;
+      return NANOARROW_OK;
+    case 'b':
+      ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_BOOL);
+      *format_end_out = format + 1;
+      return NANOARROW_OK;
+    case 'c':
+      ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT8);
+      *format_end_out = format + 1;
+      return NANOARROW_OK;
+    case 'C':
+      ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT8);
+      *format_end_out = format + 1;
+      return NANOARROW_OK;
+    case 's':
+      ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT16);
+      *format_end_out = format + 1;
+      return NANOARROW_OK;
+    case 'S':
+      ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT16);
+      *format_end_out = format + 1;
+      return NANOARROW_OK;
+    case 'i':
+      ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+      *format_end_out = format + 1;
+      return NANOARROW_OK;
+    case 'I':
+      ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT32);
+      *format_end_out = format + 1;
+      return NANOARROW_OK;
+    case 'l':
+      ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+      *format_end_out = format + 1;
+      return NANOARROW_OK;
+    case 'L':
+      ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT64);
+      *format_end_out = format + 1;
+      return NANOARROW_OK;
+    case 'e':
+      ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_HALF_FLOAT);
+      *format_end_out = format + 1;
+      return NANOARROW_OK;
+    case 'f':
+      ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_FLOAT);
+      *format_end_out = format + 1;
+      return NANOARROW_OK;
+    case 'g':
+      ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DOUBLE);
+      *format_end_out = format + 1;
+      return NANOARROW_OK;
+
+    // decimal
+    case 'd':
+      if (format[1] != ':' || format[2] == '\0') {
+        ArrowErrorSet(error, "Expected ':precision,scale[,bitwidth]' following 'd'",
+                      format + 3);
+        return EINVAL;
+      }
+
+      parse_start = format + 2;
+      schema_view->decimal_precision = (int32_t)strtol(parse_start, &parse_end, 10);
+      if (parse_end == parse_start || parse_end[0] != ',') {
+        ArrowErrorSet(error, "Expected 'precision,scale[,bitwidth]' following 'd:'");
+        return EINVAL;
+      }
+
+      parse_start = parse_end + 1;
+      schema_view->decimal_scale = (int32_t)strtol(parse_start, &parse_end, 10);
+      if (parse_end == parse_start) {
+        ArrowErrorSet(error, "Expected 'scale[,bitwidth]' following 'd:precision,'");
+        return EINVAL;
+      } else if (parse_end[0] != ',') {
+        schema_view->decimal_bitwidth = 128;
+      } else {
+        parse_start = parse_end + 1;
+        schema_view->decimal_bitwidth = (int32_t)strtol(parse_start, &parse_end, 10);
+        if (parse_start == parse_end) {
+          ArrowErrorSet(error, "Expected precision following 'd:precision,scale,'");
+          return EINVAL;
+        }
+      }
+
+      *format_end_out = parse_end;
+
+      switch (schema_view->decimal_bitwidth) {
+        case 128:
+          ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL128);
+          return NANOARROW_OK;
+        case 256:
+          ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL256);
+          return NANOARROW_OK;
+        default:
+          ArrowErrorSet(error, "Expected decimal bitwidth of 128 or 256 but found %d",
+                        (int)schema_view->decimal_bitwidth);
+          return EINVAL;
+      }
+
+    // validity + data
+    case 'w':
+      schema_view->type = NANOARROW_TYPE_FIXED_SIZE_BINARY;
+      schema_view->storage_type = NANOARROW_TYPE_FIXED_SIZE_BINARY;
+      if (format[1] != ':' || format[2] == '\0') {
+        ArrowErrorSet(error, "Expected ':<width>' following 'w'");
+        return EINVAL;
+      }
+
+      schema_view->fixed_size = (int32_t)strtol(format + 2, (char**)format_end_out, 10);
+      return NANOARROW_OK;
+
+    // validity + offset + data
+    case 'z':
+      schema_view->type = NANOARROW_TYPE_BINARY;
+      schema_view->storage_type = NANOARROW_TYPE_BINARY;
+      *format_end_out = format + 1;
+      return NANOARROW_OK;
+    case 'u':
+      schema_view->type = NANOARROW_TYPE_STRING;
+      schema_view->storage_type = NANOARROW_TYPE_STRING;
+      *format_end_out = format + 1;
+      return NANOARROW_OK;
+
+    // validity + large_offset + data
+    case 'Z':
+      schema_view->type = NANOARROW_TYPE_LARGE_BINARY;
+      schema_view->storage_type = NANOARROW_TYPE_LARGE_BINARY;
+      *format_end_out = format + 1;
+      return NANOARROW_OK;
+    case 'U':
+      schema_view->type = NANOARROW_TYPE_LARGE_STRING;
+      schema_view->storage_type = NANOARROW_TYPE_LARGE_STRING;
+      *format_end_out = format + 1;
+      return NANOARROW_OK;
+
+    // nested types
+    case '+':
+      switch (format[1]) {
+        // list has validity + offset or offset
+        case 'l':
+          schema_view->storage_type = NANOARROW_TYPE_LIST;
+          schema_view->type = NANOARROW_TYPE_LIST;
+          *format_end_out = format + 2;
+          return NANOARROW_OK;
+
+        // large list has validity + large_offset or large_offset
+        case 'L':
+          schema_view->storage_type = NANOARROW_TYPE_LARGE_LIST;
+          schema_view->type = NANOARROW_TYPE_LARGE_LIST;
+          *format_end_out = format + 2;
+          return NANOARROW_OK;
+
+        // just validity buffer
+        case 'w':
+          if (format[2] != ':' || format[3] == '\0') {
+            ArrowErrorSet(error, "Expected ':<width>' following '+w'");
+            return EINVAL;
+          }
+
+          schema_view->storage_type = NANOARROW_TYPE_FIXED_SIZE_LIST;
+          schema_view->type = NANOARROW_TYPE_FIXED_SIZE_LIST;
+          schema_view->fixed_size =
+              (int32_t)strtol(format + 3, (char**)format_end_out, 10);
+          return NANOARROW_OK;
+        case 's':
+          schema_view->storage_type = NANOARROW_TYPE_STRUCT;
+          schema_view->type = NANOARROW_TYPE_STRUCT;
+          *format_end_out = format + 2;
+          return NANOARROW_OK;
+        case 'm':
+          schema_view->storage_type = NANOARROW_TYPE_MAP;
+          schema_view->type = NANOARROW_TYPE_MAP;
+          *format_end_out = format + 2;
+          return NANOARROW_OK;
+
+        // unions
+        case 'u':
+          switch (format[2]) {
+            case 'd':
+              schema_view->storage_type = NANOARROW_TYPE_DENSE_UNION;
+              schema_view->type = NANOARROW_TYPE_DENSE_UNION;
+              break;
+            case 's':
+              schema_view->storage_type = NANOARROW_TYPE_SPARSE_UNION;
+              schema_view->type = NANOARROW_TYPE_SPARSE_UNION;
+              break;
+            default:
+              ArrowErrorSet(error,
+                            "Expected union format string +us:<type_ids> or "
+                            "+ud:<type_ids> but found '%s'",
+                            format);
+              return EINVAL;
+          }
+
+          if (format[3] == ':') {
+            schema_view->union_type_ids = format + 4;
+            int64_t n_type_ids =
+                _ArrowParseUnionTypeIds(schema_view->union_type_ids, NULL);
+            if (n_type_ids != schema_view->schema->n_children) {
+              ArrowErrorSet(
+                  error,
+                  "Expected union type_ids parameter to be a comma-separated list of %ld "
+                  "values between 0 and 127 but found '%s'",
+                  (long)schema_view->schema->n_children, schema_view->union_type_ids);
+              return EINVAL;
+            }
+            *format_end_out = format + strlen(format);
+            return NANOARROW_OK;
+          } else {
+            ArrowErrorSet(error,
+                          "Expected union format string +us:<type_ids> or +ud:<type_ids> "
+                          "but found '%s'",
+                          format);
+            return EINVAL;
+          }
+
+        default:
+          ArrowErrorSet(error, "Expected nested type format string but found '%s'",
+                        format);
+          return EINVAL;
+      }
+
+    // date/time types
+    case 't':
+      switch (format[1]) {
+        // date
+        case 'd':
+          switch (format[2]) {
+            case 'D':
+              ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+              schema_view->type = NANOARROW_TYPE_DATE32;
+              *format_end_out = format + 3;
+              return NANOARROW_OK;
+            case 'm':
+              ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+              schema_view->type = NANOARROW_TYPE_DATE64;
+              *format_end_out = format + 3;
+              return NANOARROW_OK;
+            default:
+              ArrowErrorSet(error, "Expected 'D' or 'm' following 'td' but found '%s'",
+                            format + 2);
+              return EINVAL;
+          }
+
+        // time of day
+        case 't':
+          switch (format[2]) {
+            case 's':
+              ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+              schema_view->type = NANOARROW_TYPE_TIME32;
+              schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND;
+              *format_end_out = format + 3;
+              return NANOARROW_OK;
+            case 'm':
+              ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+              schema_view->type = NANOARROW_TYPE_TIME32;
+              schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI;
+              *format_end_out = format + 3;
+              return NANOARROW_OK;
+            case 'u':
+              ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+              schema_view->type = NANOARROW_TYPE_TIME64;
+              schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO;
+              *format_end_out = format + 3;
+              return NANOARROW_OK;
+            case 'n':
+              ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+              schema_view->type = NANOARROW_TYPE_TIME64;
+              schema_view->time_unit = NANOARROW_TIME_UNIT_NANO;
+              *format_end_out = format + 3;
+              return NANOARROW_OK;
+            default:
+              ArrowErrorSet(
+                  error, "Expected 's', 'm', 'u', or 'n' following 'tt' but found '%s'",
+                  format + 2);
+              return EINVAL;
+          }
+
+        // timestamp
+        case 's':
+          switch (format[2]) {
+            case 's':
+              ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+              schema_view->type = NANOARROW_TYPE_TIMESTAMP;
+              schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND;
+              break;
+            case 'm':
+              ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+              schema_view->type = NANOARROW_TYPE_TIMESTAMP;
+              schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI;
+              break;
+            case 'u':
+              ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+              schema_view->type = NANOARROW_TYPE_TIMESTAMP;
+              schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO;
+              break;
+            case 'n':
+              ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+              schema_view->type = NANOARROW_TYPE_TIMESTAMP;
+              schema_view->time_unit = NANOARROW_TIME_UNIT_NANO;
+              break;
+            default:
+              ArrowErrorSet(
+                  error, "Expected 's', 'm', 'u', or 'n' following 'ts' but found '%s'",
+                  format + 2);
+              return EINVAL;
+          }
+
+          if (format[3] != ':') {
+            ArrowErrorSet(error, "Expected ':' following '%.3s' but found '%s'", format,
+                          format + 3);
+            return EINVAL;
+          }
+
+          schema_view->timezone = format + 4;
+          *format_end_out = format + strlen(format);
+          return NANOARROW_OK;
+
+        // duration
+        case 'D':
+          switch (format[2]) {
+            case 's':
+              ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+              schema_view->type = NANOARROW_TYPE_DURATION;
+              schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND;
+              *format_end_out = format + 3;
+              return NANOARROW_OK;
+            case 'm':
+              ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+              schema_view->type = NANOARROW_TYPE_DURATION;
+              schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI;
+              *format_end_out = format + 3;
+              return NANOARROW_OK;
+            case 'u':
+              ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+              schema_view->type = NANOARROW_TYPE_DURATION;
+              schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO;
+              *format_end_out = format + 3;
+              return NANOARROW_OK;
+            case 'n':
+              ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+              schema_view->type = NANOARROW_TYPE_DURATION;
+              schema_view->time_unit = NANOARROW_TIME_UNIT_NANO;
+              *format_end_out = format + 3;
+              return NANOARROW_OK;
+            default:
+              ArrowErrorSet(error,
+                            "Expected 's', 'm', u', or 'n' following 'tD' but found '%s'",
+                            format + 2);
+              return EINVAL;
+          }
+
+        // interval
+        case 'i':
+          switch (format[2]) {
+            case 'M':
+              ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INTERVAL_MONTHS);
+              *format_end_out = format + 3;
+              return NANOARROW_OK;
+            case 'D':
+              ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INTERVAL_DAY_TIME);
+              *format_end_out = format + 3;
+              return NANOARROW_OK;
+            case 'n':
+              ArrowSchemaViewSetPrimitive(schema_view,
+                                          NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO);
+              *format_end_out = format + 3;
+              return NANOARROW_OK;
+            default:
+              ArrowErrorSet(error,
+                            "Expected 'M', 'D', or 'n' following 'ti' but found '%s'",
+                            format + 2);
+              return EINVAL;
+          }
+
+        default:
+          ArrowErrorSet(
+              error, "Expected 'd', 't', 's', 'D', or 'i' following 't' but found '%s'",
+              format + 1);
+          return EINVAL;
+      }
+
+    default:
+      ArrowErrorSet(error, "Unknown format: '%s'", format);
+      return EINVAL;
+  }
+}
+
+static ArrowErrorCode ArrowSchemaViewValidateNChildren(
+    struct ArrowSchemaView* schema_view, int64_t n_children, struct ArrowError* error) {
+  if (n_children != -1 && schema_view->schema->n_children != n_children) {
+    ArrowErrorSet(error, "Expected schema with %d children but found %d children",
+                  (int)n_children, (int)schema_view->schema->n_children);
+    return EINVAL;
+  }
+
+  // Don't do a full validation of children but do check that they won't
+  // segfault if inspected
+  struct ArrowSchema* child;
+  for (int64_t i = 0; i < schema_view->schema->n_children; i++) {
+    child = schema_view->schema->children[i];
+    if (child == NULL) {
+      ArrowErrorSet(error, "Expected valid schema at schema->children[%d] but found NULL",
+                    i);
+      return EINVAL;
+    } else if (child->release == NULL) {
+      ArrowErrorSet(
+          error,
+          "Expected valid schema at schema->children[%d] but found a released schema", i);
+      return EINVAL;
+    }
+  }
+
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowSchemaViewValidateUnion(struct ArrowSchemaView* schema_view,
+                                                   struct ArrowError* error) {
+  return ArrowSchemaViewValidateNChildren(schema_view, -1, error);
+}
+
+static ArrowErrorCode ArrowSchemaViewValidateMap(struct ArrowSchemaView* schema_view,
+                                                 struct ArrowError* error) {
+  NANOARROW_RETURN_NOT_OK(ArrowSchemaViewValidateNChildren(schema_view, 1, error));
+
+  if (schema_view->schema->children[0]->n_children != 2) {
+    ArrowErrorSet(error, "Expected child of map type to have 2 children but found %d",
+                  (int)schema_view->schema->children[0]->n_children);
+    return EINVAL;
+  }
+
+  if (strcmp(schema_view->schema->children[0]->format, "+s") != 0) {
+    ArrowErrorSet(error, "Expected format of child of map type to be '+s' but found '%s'",
+                  schema_view->schema->children[0]->format);
+    return EINVAL;
+  }
+
+  if (schema_view->schema->children[0]->flags & ARROW_FLAG_NULLABLE) {
+    ArrowErrorSet(error,
+                  "Expected child of map type to be non-nullable but was nullable");
+    return EINVAL;
+  }
+
+  if (schema_view->schema->children[0]->children[0]->flags & ARROW_FLAG_NULLABLE) {
+    ArrowErrorSet(error, "Expected key of map type to be non-nullable but was nullable");
+    return EINVAL;
+  }
+
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowSchemaViewValidateDictionary(
+    struct ArrowSchemaView* schema_view, struct ArrowError* error) {
+  // check for valid index type
+  switch (schema_view->storage_type) {
+    case NANOARROW_TYPE_UINT8:
+    case NANOARROW_TYPE_INT8:
+    case NANOARROW_TYPE_UINT16:
+    case NANOARROW_TYPE_INT16:
+    case NANOARROW_TYPE_UINT32:
+    case NANOARROW_TYPE_INT32:
+    case NANOARROW_TYPE_UINT64:
+    case NANOARROW_TYPE_INT64:
+      break;
+    default:
+      ArrowErrorSet(
+          error,
+          "Expected dictionary schema index type to be an integral type but found '%s'",
+          schema_view->schema->format);
+      return EINVAL;
+  }
+
+  struct ArrowSchemaView dictionary_schema_view;
+  return ArrowSchemaViewInit(&dictionary_schema_view, schema_view->schema->dictionary,
+                             error);
+}
+
+static ArrowErrorCode ArrowSchemaViewValidate(struct ArrowSchemaView* schema_view,
+                                              enum ArrowType type,
+                                              struct ArrowError* error) {
+  switch (type) {
+    case NANOARROW_TYPE_NA:
+    case NANOARROW_TYPE_BOOL:
+    case NANOARROW_TYPE_UINT8:
+    case NANOARROW_TYPE_INT8:
+    case NANOARROW_TYPE_UINT16:
+    case NANOARROW_TYPE_INT16:
+    case NANOARROW_TYPE_UINT32:
+    case NANOARROW_TYPE_INT32:
+    case NANOARROW_TYPE_UINT64:
+    case NANOARROW_TYPE_INT64:
+    case NANOARROW_TYPE_HALF_FLOAT:
+    case NANOARROW_TYPE_FLOAT:
+    case NANOARROW_TYPE_DOUBLE:
+    case NANOARROW_TYPE_DECIMAL128:
+    case NANOARROW_TYPE_DECIMAL256:
+    case NANOARROW_TYPE_STRING:
+    case NANOARROW_TYPE_LARGE_STRING:
+    case NANOARROW_TYPE_BINARY:
+    case NANOARROW_TYPE_LARGE_BINARY:
+    case NANOARROW_TYPE_DATE32:
+    case NANOARROW_TYPE_DATE64:
+    case NANOARROW_TYPE_INTERVAL_MONTHS:
+    case NANOARROW_TYPE_INTERVAL_DAY_TIME:
+    case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
+    case NANOARROW_TYPE_TIMESTAMP:
+    case NANOARROW_TYPE_TIME32:
+    case NANOARROW_TYPE_TIME64:
+    case NANOARROW_TYPE_DURATION:
+      return ArrowSchemaViewValidateNChildren(schema_view, 0, error);
+
+    case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+      if (schema_view->fixed_size <= 0) {
+        ArrowErrorSet(error, "Expected size > 0 for fixed size binary but found size %d",
+                      schema_view->fixed_size);
+        return EINVAL;
+      }
+      return ArrowSchemaViewValidateNChildren(schema_view, 0, error);
+
+    case NANOARROW_TYPE_LIST:
+    case NANOARROW_TYPE_LARGE_LIST:
+    case NANOARROW_TYPE_FIXED_SIZE_LIST:
+      return ArrowSchemaViewValidateNChildren(schema_view, 1, error);
+
+    case NANOARROW_TYPE_STRUCT:
+      return ArrowSchemaViewValidateNChildren(schema_view, -1, error);
+
+    case NANOARROW_TYPE_SPARSE_UNION:
+    case NANOARROW_TYPE_DENSE_UNION:
+      return ArrowSchemaViewValidateUnion(schema_view, error);
+
+    case NANOARROW_TYPE_MAP:
+      return ArrowSchemaViewValidateMap(schema_view, error);
+
+    case NANOARROW_TYPE_DICTIONARY:
+      return ArrowSchemaViewValidateDictionary(schema_view, error);
+
+    default:
+      ArrowErrorSet(error, "Expected a valid enum ArrowType value but found %d",
+                    (int)schema_view->type);
+      return EINVAL;
+  }
+
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view,
+                                   struct ArrowSchema* schema, struct ArrowError* error) {
+  if (schema == NULL) {
+    ArrowErrorSet(error, "Expected non-NULL schema");
+    return EINVAL;
+  }
+
+  if (schema->release == NULL) {
+    ArrowErrorSet(error, "Expected non-released schema");
+    return EINVAL;
+  }
+
+  schema_view->schema = schema;
+
+  const char* format = schema->format;
+  if (format == NULL) {
+    ArrowErrorSet(
+        error,
+        "Error parsing schema->format: Expected a null-terminated string but found NULL");
+    return EINVAL;
+  }
+
+  size_t format_len = strlen(format);
+  if (format_len == 0) {
+    ArrowErrorSet(error, "Error parsing schema->format: Expected a string with size > 0");
+    return EINVAL;
+  }
+
+  const char* format_end_out;
+  ArrowErrorCode result =
+      ArrowSchemaViewParse(schema_view, format, &format_end_out, error);
+
+  if (result != NANOARROW_OK) {
+    if (error != NULL) {
+      char child_error[1024];
+      memcpy(child_error, ArrowErrorMessage(error), 1024);
+      ArrowErrorSet(error, "Error parsing schema->format: %s", child_error);
+    }
+
+    return result;
+  }
+
+  if ((format + format_len) != format_end_out) {
+    ArrowErrorSet(error, "Error parsing schema->format '%s': parsed %d/%d characters",
+                  format, (int)(format_end_out - format), (int)(format_len));
+    return EINVAL;
+  }
+
+  if (schema->dictionary != NULL) {
+    schema_view->type = NANOARROW_TYPE_DICTIONARY;
+  }
+
+  result = ArrowSchemaViewValidate(schema_view, schema_view->storage_type, error);
+  if (result != NANOARROW_OK) {
+    return result;
+  }
+
+  if (schema_view->storage_type != schema_view->type) {
+    result = ArrowSchemaViewValidate(schema_view, schema_view->type, error);
+    if (result != NANOARROW_OK) {
+      return result;
+    }
+  }
+
+  ArrowLayoutInit(&schema_view->layout, schema_view->storage_type);
+  if (schema_view->storage_type == NANOARROW_TYPE_FIXED_SIZE_BINARY) {
+    schema_view->layout.element_size_bits[1] = schema_view->fixed_size * 8;
+  } else if (schema_view->storage_type == NANOARROW_TYPE_FIXED_SIZE_LIST) {
+    schema_view->layout.child_size_elements = schema_view->fixed_size;
+  }
+
+  schema_view->extension_name = ArrowCharView(NULL);
+  schema_view->extension_metadata = ArrowCharView(NULL);
+  ArrowMetadataGetValue(schema->metadata, ArrowCharView("ARROW:extension:name"),
+                        &schema_view->extension_name);
+  ArrowMetadataGetValue(schema->metadata, ArrowCharView("ARROW:extension:metadata"),
+                        &schema_view->extension_metadata);
+
+  return NANOARROW_OK;
+}
+
+static int64_t ArrowSchemaTypeToStringInternal(struct ArrowSchemaView* schema_view,
+                                               char* out, int64_t n) {
+  const char* type_string = ArrowTypeString(schema_view->type);
+  switch (schema_view->type) {
+    case NANOARROW_TYPE_DECIMAL128:
+    case NANOARROW_TYPE_DECIMAL256:
+      return snprintf(out, n, "%s(%d, %d)", type_string,
+                      (int)schema_view->decimal_precision,
+                      (int)schema_view->decimal_scale);
+    case NANOARROW_TYPE_TIMESTAMP:
+      return snprintf(out, n, "%s('%s', '%s')", type_string,
+                      ArrowTimeUnitString(schema_view->time_unit), schema_view->timezone);
+    case NANOARROW_TYPE_TIME32:
+    case NANOARROW_TYPE_TIME64:
+    case NANOARROW_TYPE_DURATION:
+      return snprintf(out, n, "%s('%s')", type_string,
+                      ArrowTimeUnitString(schema_view->time_unit));
+    case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+    case NANOARROW_TYPE_FIXED_SIZE_LIST:
+      return snprintf(out, n, "%s(%ld)", type_string, (long)schema_view->fixed_size);
+    case NANOARROW_TYPE_SPARSE_UNION:
+    case NANOARROW_TYPE_DENSE_UNION:
+      return snprintf(out, n, "%s([%s])", type_string, schema_view->union_type_ids);
+    default:
+      return snprintf(out, n, "%s", type_string);
+  }
+}
+
+// Helper for bookeeping to emulate sprintf()-like behaviour spread
+// among multiple sprintf calls.
+static inline void ArrowToStringLogChars(char** out, int64_t n_chars_last,
+                                         int64_t* n_remaining, int64_t* n_chars) {
+  *n_chars += n_chars_last;
+  *n_remaining -= n_chars_last;
+
+  // n_remaining is never less than 0
+  if (*n_remaining < 0) {
+    *n_remaining = 0;
+  }
+
+  // Can't do math on a NULL pointer
+  if (*out != NULL) {
+    *out += n_chars_last;
+  }
+}
+
+int64_t ArrowSchemaToString(struct ArrowSchema* schema, char* out, int64_t n,
+                            char recursive) {
+  if (schema == NULL) {
+    return snprintf(out, n, "[invalid: pointer is null]");
+  }
+
+  if (schema->release == NULL) {
+    return snprintf(out, n, "[invalid: schema is released]");
+  }
+
+  struct ArrowSchemaView schema_view;
+  struct ArrowError error;
+
+  if (ArrowSchemaViewInit(&schema_view, schema, &error) != NANOARROW_OK) {
+    return snprintf(out, n, "[invalid: %s]", ArrowErrorMessage(&error));
+  }
+
+  // Extension type and dictionary should include both the top-level type
+  // and the storage type.
+  int is_extension = schema_view.extension_name.size_bytes > 0;
+  int is_dictionary = schema->dictionary != NULL;
+  int64_t n_chars = 0;
+  int64_t n_chars_last = 0;
+
+  // Uncommon but not technically impossible that both are true
+  if (is_extension && is_dictionary) {
+    n_chars_last = snprintf(
+        out, n, "%.*s{dictionary(%s)<", (int)schema_view.extension_name.size_bytes,
+        schema_view.extension_name.data, ArrowTypeString(schema_view.storage_type));
+  } else if (is_extension) {
+    n_chars_last = snprintf(out, n, "%.*s{", (int)schema_view.extension_name.size_bytes,
+                            schema_view.extension_name.data);
+  } else if (is_dictionary) {
+    n_chars_last =
+        snprintf(out, n, "dictionary(%s)<", ArrowTypeString(schema_view.storage_type));
+  }
+
+  ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
+
+  if (!is_dictionary) {
+    n_chars_last = ArrowSchemaTypeToStringInternal(&schema_view, out, n);
+  } else {
+    n_chars_last = ArrowSchemaToString(schema->dictionary, out, n, recursive);
+  }
+
+  ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
+
+  if (recursive && schema->format[0] == '+') {
+    n_chars_last = snprintf(out, n, "<");
+    ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
+
+    for (int64_t i = 0; i < schema->n_children; i++) {
+      if (i > 0) {
+        n_chars_last = snprintf(out, n, ", ");
+        ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
+      }
+
+      // ArrowSchemaToStringInternal() will validate the child and print the error,
+      // but we need the name first
+      if (schema->children[i] != NULL && schema->children[i]->release != NULL &&
+          schema->children[i]->name != NULL) {
+        n_chars_last = snprintf(out, n, "%s: ", schema->children[i]->name);
+        ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
+      }
+
+      n_chars_last = ArrowSchemaToString(schema->children[i], out, n, recursive);
+      ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
+    }
+
+    n_chars_last = snprintf(out, n, ">");
+    ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
+  }
+
+  if (is_extension && is_dictionary) {
+    n_chars += snprintf(out, n, ">}");
+  } else if (is_extension) {
+    n_chars += snprintf(out, n, "}");
+  } else if (is_dictionary) {
+    n_chars += snprintf(out, n, ">");
+  }
+
+  return n_chars;
+}
+
+ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader,
+                                       const char* metadata) {
+  reader->metadata = metadata;
+
+  if (reader->metadata == NULL) {
+    reader->offset = 0;
+    reader->remaining_keys = 0;
+  } else {
+    memcpy(&reader->remaining_keys, reader->metadata, sizeof(int32_t));
+    reader->offset = sizeof(int32_t);
+  }
+
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowMetadataReaderRead(struct ArrowMetadataReader* reader,
+                                       struct ArrowStringView* key_out,
+                                       struct ArrowStringView* value_out) {
+  if (reader->remaining_keys <= 0) {
+    return EINVAL;
+  }
+
+  int64_t pos = 0;
+
+  int32_t key_size;
+  memcpy(&key_size, reader->metadata + reader->offset + pos, sizeof(int32_t));
+  pos += sizeof(int32_t);
+
+  key_out->data = reader->metadata + reader->offset + pos;
+  key_out->size_bytes = key_size;
+  pos += key_size;
+
+  int32_t value_size;
+  memcpy(&value_size, reader->metadata + reader->offset + pos, sizeof(int32_t));
+  pos += sizeof(int32_t);
+
+  value_out->data = reader->metadata + reader->offset + pos;
+  value_out->size_bytes = value_size;
+  pos += value_size;
+
+  reader->offset += pos;
+  reader->remaining_keys--;
+  return NANOARROW_OK;
+}
+
+int64_t ArrowMetadataSizeOf(const char* metadata) {
+  if (metadata == NULL) {
+    return 0;
+  }
+
+  struct ArrowMetadataReader reader;
+  struct ArrowStringView key;
+  struct ArrowStringView value;
+  ArrowMetadataReaderInit(&reader, metadata);
+
+  int64_t size = sizeof(int32_t);
+  while (ArrowMetadataReaderRead(&reader, &key, &value) == NANOARROW_OK) {
+    size += sizeof(int32_t) + key.size_bytes + sizeof(int32_t) + value.size_bytes;
+  }
+
+  return size;
+}
+
+static ArrowErrorCode ArrowMetadataGetValueInternal(const char* metadata,
+                                                    struct ArrowStringView* key,
+                                                    struct ArrowStringView* value_out) {
+  struct ArrowMetadataReader reader;
+  struct ArrowStringView existing_key;
+  struct ArrowStringView existing_value;
+  ArrowMetadataReaderInit(&reader, metadata);
+
+  while (ArrowMetadataReaderRead(&reader, &existing_key, &existing_value) ==
+         NANOARROW_OK) {
+    int key_equal = key->size_bytes == existing_key.size_bytes &&
+                    strncmp(key->data, existing_key.data, existing_key.size_bytes) == 0;
+    if (key_equal) {
+      value_out->data = existing_value.data;
+      value_out->size_bytes = existing_value.size_bytes;
+      break;
+    }
+  }
+
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowMetadataGetValue(const char* metadata, struct ArrowStringView key,
+                                     struct ArrowStringView* value_out) {
+  if (value_out == NULL) {
+    return EINVAL;
+  }
+
+  return ArrowMetadataGetValueInternal(metadata, &key, value_out);
+}
+
+char ArrowMetadataHasKey(const char* metadata, struct ArrowStringView key) {
+  struct ArrowStringView value = ArrowCharView(NULL);
+  ArrowMetadataGetValue(metadata, key, &value);
+  return value.data != NULL;
+}
+
+ArrowErrorCode ArrowMetadataBuilderInit(struct ArrowBuffer* buffer,
+                                        const char* metadata) {
+  ArrowBufferInit(buffer);
+  return ArrowBufferAppend(buffer, metadata, ArrowMetadataSizeOf(metadata));
+}
+
+static ArrowErrorCode ArrowMetadataBuilderAppendInternal(struct ArrowBuffer* buffer,
+                                                         struct ArrowStringView* key,
+                                                         struct ArrowStringView* value) {
+  if (value == NULL) {
+    return NANOARROW_OK;
+  }
+
+  if (buffer->capacity_bytes == 0) {
+    NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(buffer, 0));
+  }
+
+  if (((size_t)buffer->capacity_bytes) < sizeof(int32_t)) {
+    return EINVAL;
+  }
+
+  int32_t n_keys;
+  memcpy(&n_keys, buffer->data, sizeof(int32_t));
+
+  int32_t key_size = (int32_t)key->size_bytes;
+  int32_t value_size = (int32_t)value->size_bytes;
+  NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(
+      buffer, sizeof(int32_t) + key_size + sizeof(int32_t) + value_size));
+
+  ArrowBufferAppendUnsafe(buffer, &key_size, sizeof(int32_t));
+  ArrowBufferAppendUnsafe(buffer, key->data, key_size);
+  ArrowBufferAppendUnsafe(buffer, &value_size, sizeof(int32_t));
+  ArrowBufferAppendUnsafe(buffer, value->data, value_size);
+
+  n_keys++;
+  memcpy(buffer->data, &n_keys, sizeof(int32_t));
+
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowMetadataBuilderSetInternal(struct ArrowBuffer* buffer,
+                                                      struct ArrowStringView* key,
+                                                      struct ArrowStringView* value) {
+  // Inspect the current value to see if we can avoid copying the buffer
+  struct ArrowStringView current_value = ArrowCharView(NULL);
+  NANOARROW_RETURN_NOT_OK(
+      ArrowMetadataGetValueInternal((const char*)buffer->data, key, &current_value));
+
+  // The key should be removed but no key exists
+  if (value == NULL && current_value.data == NULL) {
+    return NANOARROW_OK;
+  }
+
+  // The key/value can be appended because no key exists
+  if (value != NULL && current_value.data == NULL) {
+    return ArrowMetadataBuilderAppendInternal(buffer, key, value);
+  }
+
+  struct ArrowMetadataReader reader;
+  struct ArrowStringView existing_key;
+  struct ArrowStringView existing_value;
+  NANOARROW_RETURN_NOT_OK(ArrowMetadataReaderInit(&reader, (const char*)buffer->data));
+
+  struct ArrowBuffer new_buffer;
+  NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderInit(&new_buffer, NULL));
+
+  while (reader.remaining_keys > 0) {
+    int result = ArrowMetadataReaderRead(&reader, &existing_key, &existing_value);
+    if (result != NANOARROW_OK) {
+      ArrowBufferReset(&new_buffer);
+      return result;
+    }
+
+    if (key->size_bytes == existing_key.size_bytes &&
+        strncmp((const char*)key->data, (const char*)existing_key.data,
+                existing_key.size_bytes) == 0) {
+      result = ArrowMetadataBuilderAppendInternal(&new_buffer, key, value);
+      value = NULL;
+    } else {
+      result =
+          ArrowMetadataBuilderAppendInternal(&new_buffer, &existing_key, &existing_value);
+    }
+
+    if (result != NANOARROW_OK) {
+      ArrowBufferReset(&new_buffer);
+      return result;
+    }
+  }
+
+  ArrowBufferReset(buffer);
+  ArrowBufferMove(&new_buffer, buffer);
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowMetadataBuilderAppend(struct ArrowBuffer* buffer,
+                                          struct ArrowStringView key,
+                                          struct ArrowStringView value) {
+  return ArrowMetadataBuilderAppendInternal(buffer, &key, &value);
+}
+
+ArrowErrorCode ArrowMetadataBuilderSet(struct ArrowBuffer* buffer,
+                                       struct ArrowStringView key,
+                                       struct ArrowStringView value) {
+  return ArrowMetadataBuilderSetInternal(buffer, &key, &value);
+}
+
+ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer,
+                                          struct ArrowStringView key) {
+  return ArrowMetadataBuilderSetInternal(buffer, &key, NULL);
+}
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "pandas/vendored/nanoarrow.h"
+
+static void ArrowArrayRelease(struct ArrowArray* array) {
+  // Release buffers held by this array
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+  if (private_data != NULL) {
+    ArrowBitmapReset(&private_data->bitmap);
+    ArrowBufferReset(&private_data->buffers[0]);
+    ArrowBufferReset(&private_data->buffers[1]);
+    ArrowFree(private_data);
+  }
+
+  // This object owns the memory for all the children, but those
+  // children may have been generated elsewhere and might have
+  // their own release() callback.
+  if (array->children != NULL) {
+    for (int64_t i = 0; i < array->n_children; i++) {
+      if (array->children[i] != NULL) {
+        if (array->children[i]->release != NULL) {
+          array->children[i]->release(array->children[i]);
+        }
+
+        ArrowFree(array->children[i]);
+      }
+    }
+
+    ArrowFree(array->children);
+  }
+
+  // This object owns the memory for the dictionary but it
+  // may have been generated somewhere else and have its own
+  // release() callback.
+  if (array->dictionary != NULL) {
+    if (array->dictionary->release != NULL) {
+      array->dictionary->release(array->dictionary);
+    }
+
+    ArrowFree(array->dictionary);
+  }
+
+  // Mark released
+  array->release = NULL;
+}
+
+static ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array,
+                                               enum ArrowType storage_type) {
+  switch (storage_type) {
+    case NANOARROW_TYPE_UNINITIALIZED:
+    case NANOARROW_TYPE_NA:
+      array->n_buffers = 0;
+      break;
+
+    case NANOARROW_TYPE_FIXED_SIZE_LIST:
+    case NANOARROW_TYPE_STRUCT:
+    case NANOARROW_TYPE_SPARSE_UNION:
+      array->n_buffers = 1;
+      break;
+
+    case NANOARROW_TYPE_LIST:
+    case NANOARROW_TYPE_LARGE_LIST:
+    case NANOARROW_TYPE_MAP:
+    case NANOARROW_TYPE_BOOL:
+    case NANOARROW_TYPE_UINT8:
+    case NANOARROW_TYPE_INT8:
+    case NANOARROW_TYPE_UINT16:
+    case NANOARROW_TYPE_INT16:
+    case NANOARROW_TYPE_UINT32:
+    case NANOARROW_TYPE_INT32:
+    case NANOARROW_TYPE_UINT64:
+    case NANOARROW_TYPE_INT64:
+    case NANOARROW_TYPE_HALF_FLOAT:
+    case NANOARROW_TYPE_FLOAT:
+    case NANOARROW_TYPE_DOUBLE:
+    case NANOARROW_TYPE_DECIMAL128:
+    case NANOARROW_TYPE_DECIMAL256:
+    case NANOARROW_TYPE_INTERVAL_MONTHS:
+    case NANOARROW_TYPE_INTERVAL_DAY_TIME:
+    case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
+    case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+    case NANOARROW_TYPE_DENSE_UNION:
+      array->n_buffers = 2;
+      break;
+
+    case NANOARROW_TYPE_STRING:
+    case NANOARROW_TYPE_LARGE_STRING:
+    case NANOARROW_TYPE_BINARY:
+    case NANOARROW_TYPE_LARGE_BINARY:
+      array->n_buffers = 3;
+      break;
+
+    default:
+      return EINVAL;
+
+      return NANOARROW_OK;
+  }
+
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+  private_data->storage_type = storage_type;
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array,
+                                      enum ArrowType storage_type) {
+  array->length = 0;
+  array->null_count = 0;
+  array->offset = 0;
+  array->n_buffers = 0;
+  array->n_children = 0;
+  array->buffers = NULL;
+  array->children = NULL;
+  array->dictionary = NULL;
+  array->release = &ArrowArrayRelease;
+  array->private_data = NULL;
+
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)ArrowMalloc(sizeof(struct ArrowArrayPrivateData));
+  if (private_data == NULL) {
+    array->release = NULL;
+    return ENOMEM;
+  }
+
+  ArrowBitmapInit(&private_data->bitmap);
+  ArrowBufferInit(&private_data->buffers[0]);
+  ArrowBufferInit(&private_data->buffers[1]);
+  private_data->buffer_data[0] = NULL;
+  private_data->buffer_data[1] = NULL;
+  private_data->buffer_data[2] = NULL;
+
+  array->private_data = private_data;
+  array->buffers = (const void**)(&private_data->buffer_data);
+
+  int result = ArrowArraySetStorageType(array, storage_type);
+  if (result != NANOARROW_OK) {
+    array->release(array);
+    return result;
+  }
+
+  ArrowLayoutInit(&private_data->layout, storage_type);
+  // We can only know this not to be true when initializing based on a schema
+  // so assume this to be true.
+  private_data->union_type_id_is_child_index = 1;
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array,
+                                           struct ArrowArrayView* array_view,
+                                           struct ArrowError* error) {
+  NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+      ArrowArrayInitFromType(array, array_view->storage_type), error);
+  int result;
+
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+  private_data->layout = array_view->layout;
+
+  if (array_view->n_children > 0) {
+    result = ArrowArrayAllocateChildren(array, array_view->n_children);
+    if (result != NANOARROW_OK) {
+      array->release(array);
+      return result;
+    }
+
+    for (int64_t i = 0; i < array_view->n_children; i++) {
+      result =
+          ArrowArrayInitFromArrayView(array->children[i], array_view->children[i], error);
+      if (result != NANOARROW_OK) {
+        array->release(array);
+        return result;
+      }
+    }
+  }
+
+  if (array_view->dictionary != NULL) {
+    result = ArrowArrayAllocateDictionary(array);
+    if (result != NANOARROW_OK) {
+      array->release(array);
+      return result;
+    }
+
+    result =
+        ArrowArrayInitFromArrayView(array->dictionary, array_view->dictionary, error);
+    if (result != NANOARROW_OK) {
+      array->release(array);
+      return result;
+    }
+  }
+
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array,
+                                        struct ArrowSchema* schema,
+                                        struct ArrowError* error) {
+  struct ArrowArrayView array_view;
+  NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromSchema(&array_view, schema, error));
+  NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromArrayView(array, &array_view, error));
+  if (array_view.storage_type == NANOARROW_TYPE_DENSE_UNION ||
+      array_view.storage_type == NANOARROW_TYPE_SPARSE_UNION) {
+    struct ArrowArrayPrivateData* private_data =
+        (struct ArrowArrayPrivateData*)array->private_data;
+    // We can still build arrays if this isn't true; however, the append
+    // functions won't work. Instead, we store this value and error only
+    // when StartAppending is called.
+    private_data->union_type_id_is_child_index =
+        _ArrowUnionTypeIdsWillEqualChildIndices(schema->format + 4, schema->n_children);
+  }
+
+  ArrowArrayViewReset(&array_view);
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray* array, int64_t n_children) {
+  if (array->children != NULL) {
+    return EINVAL;
+  }
+
+  if (n_children == 0) {
+    return NANOARROW_OK;
+  }
+
+  array->children =
+      (struct ArrowArray**)ArrowMalloc(n_children * sizeof(struct ArrowArray*));
+  if (array->children == NULL) {
+    return ENOMEM;
+  }
+
+  memset(array->children, 0, n_children * sizeof(struct ArrowArray*));
+
+  for (int64_t i = 0; i < n_children; i++) {
+    array->children[i] = (struct ArrowArray*)ArrowMalloc(sizeof(struct ArrowArray));
+    if (array->children[i] == NULL) {
+      return ENOMEM;
+    }
+    array->children[i]->release = NULL;
+  }
+
+  array->n_children = n_children;
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayAllocateDictionary(struct ArrowArray* array) {
+  if (array->dictionary != NULL) {
+    return EINVAL;
+  }
+
+  array->dictionary = (struct ArrowArray*)ArrowMalloc(sizeof(struct ArrowArray));
+  if (array->dictionary == NULL) {
+    return ENOMEM;
+  }
+
+  array->dictionary->release = NULL;
+  return NANOARROW_OK;
+}
+
+void ArrowArraySetValidityBitmap(struct ArrowArray* array, struct ArrowBitmap* bitmap) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+  ArrowBufferMove(&bitmap->buffer, &private_data->bitmap.buffer);
+  private_data->bitmap.size_bits = bitmap->size_bits;
+  bitmap->size_bits = 0;
+  private_data->buffer_data[0] = private_data->bitmap.buffer.data;
+  array->null_count = -1;
+}
+
+ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i,
+                                   struct ArrowBuffer* buffer) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+
+  switch (i) {
+    case 0:
+      ArrowBufferMove(buffer, &private_data->bitmap.buffer);
+      private_data->buffer_data[i] = private_data->bitmap.buffer.data;
+      break;
+    case 1:
+    case 2:
+      ArrowBufferMove(buffer, &private_data->buffers[i - 1]);
+      private_data->buffer_data[i] = private_data->buffers[i - 1].data;
+      break;
+    default:
+      return EINVAL;
+  }
+
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowArrayViewInitFromArray(struct ArrowArrayView* array_view,
+                                                  struct ArrowArray* array) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+
+  ArrowArrayViewInitFromType(array_view, private_data->storage_type);
+  array_view->layout = private_data->layout;
+  array_view->array = array;
+  array_view->length = array->length;
+  array_view->offset = array->offset;
+  array_view->null_count = array->null_count;
+
+  array_view->buffer_views[0].data.as_uint8 = private_data->bitmap.buffer.data;
+  array_view->buffer_views[0].size_bytes = private_data->bitmap.buffer.size_bytes;
+  array_view->buffer_views[1].data.as_uint8 = private_data->buffers[0].data;
+  array_view->buffer_views[1].size_bytes = private_data->buffers[0].size_bytes;
+  array_view->buffer_views[2].data.as_uint8 = private_data->buffers[1].data;
+  array_view->buffer_views[2].size_bytes = private_data->buffers[1].size_bytes;
+
+  int result = ArrowArrayViewAllocateChildren(array_view, array->n_children);
+  if (result != NANOARROW_OK) {
+    ArrowArrayViewReset(array_view);
+    return result;
+  }
+
+  for (int64_t i = 0; i < array->n_children; i++) {
+    result = ArrowArrayViewInitFromArray(array_view->children[i], array->children[i]);
+    if (result != NANOARROW_OK) {
+      ArrowArrayViewReset(array_view);
+      return result;
+    }
+  }
+
+  if (array->dictionary != NULL) {
+    result = ArrowArrayViewAllocateDictionary(array_view);
+    if (result != NANOARROW_OK) {
+      ArrowArrayViewReset(array_view);
+      return result;
+    }
+
+    result = ArrowArrayViewInitFromArray(array_view->dictionary, array->dictionary);
+    if (result != NANOARROW_OK) {
+      ArrowArrayViewReset(array_view);
+      return result;
+    }
+  }
+
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowArrayReserveInternal(struct ArrowArray* array,
+                                                struct ArrowArrayView* array_view) {
+  // Loop through buffers and reserve the extra space that we know about
+  for (int64_t i = 0; i < array->n_buffers; i++) {
+    // Don't reserve on a validity buffer that hasn't been allocated yet
+    if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
+        ArrowArrayBuffer(array, i)->data == NULL) {
+      continue;
+    }
+
+    int64_t additional_size_bytes =
+        array_view->buffer_views[i].size_bytes - ArrowArrayBuffer(array, i)->size_bytes;
+
+    if (additional_size_bytes > 0) {
+      NANOARROW_RETURN_NOT_OK(
+          ArrowBufferReserve(ArrowArrayBuffer(array, i), additional_size_bytes));
+    }
+  }
+
+  // Recursively reserve children
+  for (int64_t i = 0; i < array->n_children; i++) {
+    NANOARROW_RETURN_NOT_OK(
+        ArrowArrayReserveInternal(array->children[i], array_view->children[i]));
+  }
+
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayReserve(struct ArrowArray* array,
+                                 int64_t additional_size_elements) {
+  struct ArrowArrayView array_view;
+  NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromArray(&array_view, array));
+
+  // Calculate theoretical buffer sizes (recursively)
+  ArrowArrayViewSetLength(&array_view, array->length + additional_size_elements);
+
+  // Walk the structure (recursively)
+  int result = ArrowArrayReserveInternal(array, &array_view);
+  ArrowArrayViewReset(&array_view);
+  if (result != NANOARROW_OK) {
+    return result;
+  }
+
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowArrayFinalizeBuffers(struct ArrowArray* array) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+
+  // The only buffer finalizing this currently does is make sure the data
+  // buffer for (Large)String|Binary is never NULL
+  switch (private_data->storage_type) {
+    case NANOARROW_TYPE_BINARY:
+    case NANOARROW_TYPE_STRING:
+    case NANOARROW_TYPE_LARGE_BINARY:
+    case NANOARROW_TYPE_LARGE_STRING:
+      if (ArrowArrayBuffer(array, 2)->data == NULL) {
+        ArrowBufferAppendUInt8(ArrowArrayBuffer(array, 2), 0);
+      }
+      break;
+    default:
+      break;
+  }
+
+  for (int64_t i = 0; i < array->n_children; i++) {
+    NANOARROW_RETURN_NOT_OK(ArrowArrayFinalizeBuffers(array->children[i]));
+  }
+
+  if (array->dictionary != NULL) {
+    NANOARROW_RETURN_NOT_OK(ArrowArrayFinalizeBuffers(array->dictionary));
+  }
+
+  return NANOARROW_OK;
+}
+
+static void ArrowArrayFlushInternalPointers(struct ArrowArray* array) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+
+  for (int64_t i = 0; i < 3; i++) {
+    private_data->buffer_data[i] = ArrowArrayBuffer(array, i)->data;
+  }
+
+  for (int64_t i = 0; i < array->n_children; i++) {
+    ArrowArrayFlushInternalPointers(array->children[i]);
+  }
+
+  if (array->dictionary != NULL) {
+    ArrowArrayFlushInternalPointers(array->dictionary);
+  }
+}
+
+ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
+                                        enum ArrowValidationLevel validation_level,
+                                        struct ArrowError* error) {
+  // Even if the data buffer is size zero, the pointer value needed to be non-null
+  // in some implementations (at least one version of Arrow C++ at the time this
+  // was added). Only do this fix if we can assume CPU data access.
+  if (validation_level >= NANOARROW_VALIDATION_LEVEL_DEFAULT) {
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowArrayFinalizeBuffers(array), error);
+  }
+
+  // Make sure the value we get with array->buffers[i] is set to the actual
+  // pointer (which may have changed from the original due to reallocation)
+  ArrowArrayFlushInternalPointers(array);
+
+  if (validation_level == NANOARROW_VALIDATION_LEVEL_NONE) {
+    return NANOARROW_OK;
+  }
+
+  // For validation, initialize an ArrowArrayView with our known buffer sizes
+  struct ArrowArrayView array_view;
+  NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowArrayViewInitFromArray(&array_view, array),
+                                     error);
+  int result = ArrowArrayViewValidate(&array_view, validation_level, error);
+  ArrowArrayViewReset(&array_view);
+  return result;
+}
+
+ArrowErrorCode ArrowArrayFinishBuildingDefault(struct ArrowArray* array,
+                                               struct ArrowError* error) {
+  return ArrowArrayFinishBuilding(array, NANOARROW_VALIDATION_LEVEL_DEFAULT, error);
+}
+
+void ArrowArrayViewInitFromType(struct ArrowArrayView* array_view,
+                                enum ArrowType storage_type) {
+  memset(array_view, 0, sizeof(struct ArrowArrayView));
+  array_view->storage_type = storage_type;
+  ArrowLayoutInit(&array_view->layout, storage_type);
+}
+
+ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view,
+                                              int64_t n_children) {
+  if (array_view->children != NULL) {
+    return EINVAL;
+  }
+
+  array_view->children =
+      (struct ArrowArrayView**)ArrowMalloc(n_children * sizeof(struct ArrowArrayView*));
+  if (array_view->children == NULL) {
+    return ENOMEM;
+  }
+
+  for (int64_t i = 0; i < n_children; i++) {
+    array_view->children[i] = NULL;
+  }
+
+  array_view->n_children = n_children;
+
+  for (int64_t i = 0; i < n_children; i++) {
+    array_view->children[i] =
+        (struct ArrowArrayView*)ArrowMalloc(sizeof(struct ArrowArrayView));
+    if (array_view->children[i] == NULL) {
+      return ENOMEM;
+    }
+    ArrowArrayViewInitFromType(array_view->children[i], NANOARROW_TYPE_UNINITIALIZED);
+  }
+
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayViewAllocateDictionary(struct ArrowArrayView* array_view) {
+  if (array_view->dictionary != NULL) {
+    return EINVAL;
+  }
+
+  array_view->dictionary =
+      (struct ArrowArrayView*)ArrowMalloc(sizeof(struct ArrowArrayView));
+  if (array_view->dictionary == NULL) {
+    return ENOMEM;
+  }
+
+  ArrowArrayViewInitFromType(array_view->dictionary, NANOARROW_TYPE_UNINITIALIZED);
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
+                                            struct ArrowSchema* schema,
+                                            struct ArrowError* error) {
+  struct ArrowSchemaView schema_view;
+  int result = ArrowSchemaViewInit(&schema_view, schema, error);
+  if (result != NANOARROW_OK) {
+    return result;
+  }
+
+  ArrowArrayViewInitFromType(array_view, schema_view.storage_type);
+  array_view->layout = schema_view.layout;
+
+  result = ArrowArrayViewAllocateChildren(array_view, schema->n_children);
+  if (result != NANOARROW_OK) {
+    ArrowErrorSet(error, "ArrowArrayViewAllocateChildren() failed");
+    ArrowArrayViewReset(array_view);
+    return result;
+  }
+
+  for (int64_t i = 0; i < schema->n_children; i++) {
+    result =
+        ArrowArrayViewInitFromSchema(array_view->children[i], schema->children[i], error);
+    if (result != NANOARROW_OK) {
+      ArrowArrayViewReset(array_view);
+      return result;
+    }
+  }
+
+  if (schema->dictionary != NULL) {
+    result = ArrowArrayViewAllocateDictionary(array_view);
+    if (result != NANOARROW_OK) {
+      ArrowArrayViewReset(array_view);
+      return result;
+    }
+
+    result =
+        ArrowArrayViewInitFromSchema(array_view->dictionary, schema->dictionary, error);
+    if (result != NANOARROW_OK) {
+      ArrowArrayViewReset(array_view);
+      return result;
+    }
+  }
+
+  if (array_view->storage_type == NANOARROW_TYPE_SPARSE_UNION ||
+      array_view->storage_type == NANOARROW_TYPE_DENSE_UNION) {
+    array_view->union_type_id_map = (int8_t*)ArrowMalloc(256 * sizeof(int8_t));
+    if (array_view->union_type_id_map == NULL) {
+      return ENOMEM;
+    }
+
+    memset(array_view->union_type_id_map, -1, 256);
+    int8_t n_type_ids = _ArrowParseUnionTypeIds(schema_view.union_type_ids,
+                                                array_view->union_type_id_map + 128);
+    for (int8_t child_index = 0; child_index < n_type_ids; child_index++) {
+      int8_t type_id = array_view->union_type_id_map[128 + child_index];
+      array_view->union_type_id_map[type_id] = child_index;
+    }
+  }
+
+  return NANOARROW_OK;
+}
+
+void ArrowArrayViewReset(struct ArrowArrayView* array_view) {
+  if (array_view->children != NULL) {
+    for (int64_t i = 0; i < array_view->n_children; i++) {
+      if (array_view->children[i] != NULL) {
+        ArrowArrayViewReset(array_view->children[i]);
+        ArrowFree(array_view->children[i]);
+      }
+    }
+
+    ArrowFree(array_view->children);
+  }
+
+  if (array_view->dictionary != NULL) {
+    ArrowArrayViewReset(array_view->dictionary);
+    ArrowFree(array_view->dictionary);
+  }
+
+  if (array_view->union_type_id_map != NULL) {
+    ArrowFree(array_view->union_type_id_map);
+  }
+
+  ArrowArrayViewInitFromType(array_view, NANOARROW_TYPE_UNINITIALIZED);
+}
+
+void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length) {
+  for (int i = 0; i < 3; i++) {
+    int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8;
+
+    switch (array_view->layout.buffer_type[i]) {
+      case NANOARROW_BUFFER_TYPE_VALIDITY:
+        array_view->buffer_views[i].size_bytes = _ArrowBytesForBits(length);
+        continue;
+      case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
+        // Probably don't want/need to rely on the producer to have allocated an
+        // offsets buffer of length 1 for a zero-size array
+        array_view->buffer_views[i].size_bytes =
+            (length != 0) * element_size_bytes * (length + 1);
+        continue;
+      case NANOARROW_BUFFER_TYPE_DATA:
+        array_view->buffer_views[i].size_bytes =
+            _ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i] * length) /
+            8;
+        continue;
+      case NANOARROW_BUFFER_TYPE_TYPE_ID:
+      case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
+        array_view->buffer_views[i].size_bytes = element_size_bytes * length;
+        continue;
+      case NANOARROW_BUFFER_TYPE_NONE:
+        array_view->buffer_views[i].size_bytes = 0;
+        continue;
+    }
+  }
+
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_STRUCT:
+    case NANOARROW_TYPE_SPARSE_UNION:
+      for (int64_t i = 0; i < array_view->n_children; i++) {
+        ArrowArrayViewSetLength(array_view->children[i], length);
+      }
+      break;
+    case NANOARROW_TYPE_FIXED_SIZE_LIST:
+      if (array_view->n_children >= 1) {
+        ArrowArrayViewSetLength(array_view->children[0],
+                                length * array_view->layout.child_size_elements);
+      }
+    default:
+      break;
+  }
+}
+
+// This version recursively extracts information from the array and stores it
+// in the array view, performing any checks that require the original array.
+static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view,
+                                          struct ArrowArray* array,
+                                          struct ArrowError* error) {
+  // Check length and offset
+  if (array->offset < 0) {
+    ArrowErrorSet(error, "Expected array offset >= 0 but found array offset of %ld",
+                  (long)array->offset);
+    return EINVAL;
+  }
+
+  if (array->length < 0) {
+    ArrowErrorSet(error, "Expected array length >= 0 but found array length of %ld",
+                  (long)array->length);
+    return EINVAL;
+  }
+
+  array_view->array = array;
+  array_view->offset = array->offset;
+  array_view->length = array->length;
+  array_view->null_count = array->null_count;
+
+  int64_t buffers_required = 0;
+  for (int i = 0; i < 3; i++) {
+    if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) {
+      break;
+    }
+
+    buffers_required++;
+
+    // Set buffer pointer
+    array_view->buffer_views[i].data.data = array->buffers[i];
+
+    // If non-null, set buffer size to unknown.
+    if (array->buffers[i] == NULL) {
+      array_view->buffer_views[i].size_bytes = 0;
+    } else {
+      array_view->buffer_views[i].size_bytes = -1;
+    }
+  }
+
+  // Check the number of buffers
+  if (buffers_required != array->n_buffers) {
+    ArrowErrorSet(error, "Expected array with %d buffer(s) but found %d buffer(s)",
+                  (int)buffers_required, (int)array->n_buffers);
+    return EINVAL;
+  }
+
+  // Check number of children
+  if (array_view->n_children != array->n_children) {
+    ArrowErrorSet(error, "Expected %ld children but found %ld children",
+                  (long)array_view->n_children, (long)array->n_children);
+    return EINVAL;
+  }
+
+  // Recurse for children
+  for (int64_t i = 0; i < array_view->n_children; i++) {
+    NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view->children[i],
+                                                           array->children[i], error));
+  }
+
+  // Check dictionary
+  if (array->dictionary == NULL && array_view->dictionary != NULL) {
+    ArrowErrorSet(error, "Expected dictionary but found NULL");
+    return EINVAL;
+  }
+
+  if (array->dictionary != NULL && array_view->dictionary == NULL) {
+    ArrowErrorSet(error, "Expected NULL dictionary but found dictionary member");
+    return EINVAL;
+  }
+
+  if (array->dictionary != NULL) {
+    NANOARROW_RETURN_NOT_OK(
+        ArrowArrayViewSetArrayInternal(array_view->dictionary, array->dictionary, error));
+  }
+
+  return NANOARROW_OK;
+}
+
+static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view,
+                                         struct ArrowError* error) {
+  // Calculate buffer sizes that do not require buffer access. If marked as
+  // unknown, assign the buffer size; otherwise, validate it.
+  int64_t offset_plus_length = array_view->offset + array_view->length;
+
+  // Only loop over the first two buffers because the size of the third buffer
+  // is always data dependent for all current Arrow types.
+  for (int i = 0; i < 2; i++) {
+    int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8;
+    // Initialize with a value that will cause an error if accidentally used uninitialized
+    int64_t min_buffer_size_bytes = array_view->buffer_views[i].size_bytes + 1;
+
+    switch (array_view->layout.buffer_type[i]) {
+      case NANOARROW_BUFFER_TYPE_VALIDITY:
+        if (array_view->null_count == 0 && array_view->buffer_views[i].size_bytes == 0) {
+          continue;
+        }
+
+        min_buffer_size_bytes = _ArrowBytesForBits(offset_plus_length);
+        break;
+      case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
+        // Probably don't want/need to rely on the producer to have allocated an
+        // offsets buffer of length 1 for a zero-size array
+        min_buffer_size_bytes =
+            (offset_plus_length != 0) * element_size_bytes * (offset_plus_length + 1);
+        break;
+      case NANOARROW_BUFFER_TYPE_DATA:
+        min_buffer_size_bytes =
+            _ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i] *
+                                       offset_plus_length) /
+            8;
+        break;
+      case NANOARROW_BUFFER_TYPE_TYPE_ID:
+      case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
+        min_buffer_size_bytes = element_size_bytes * offset_plus_length;
+        break;
+      case NANOARROW_BUFFER_TYPE_NONE:
+        continue;
+    }
+
+    // Assign or validate buffer size
+    if (array_view->buffer_views[i].size_bytes == -1) {
+      array_view->buffer_views[i].size_bytes = min_buffer_size_bytes;
+    } else if (array_view->buffer_views[i].size_bytes < min_buffer_size_bytes) {
+      ArrowErrorSet(error,
+                    "Expected %s array buffer %d to have size >= %ld bytes but found "
+                    "buffer with %ld bytes",
+                    ArrowTypeString(array_view->storage_type), (int)i,
+                    (long)min_buffer_size_bytes,
+                    (long)array_view->buffer_views[i].size_bytes);
+      return EINVAL;
+    }
+  }
+
+  // For list, fixed-size list and map views, we can validate the number of children
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_LIST:
+    case NANOARROW_TYPE_LARGE_LIST:
+    case NANOARROW_TYPE_FIXED_SIZE_LIST:
+    case NANOARROW_TYPE_MAP:
+      if (array_view->n_children != 1) {
+        ArrowErrorSet(error, "Expected 1 child of %s array but found %ld child arrays",
+                      ArrowTypeString(array_view->storage_type),
+                      (long)array_view->n_children);
+        return EINVAL;
+      }
+    default:
+      break;
+  }
+
+  // For struct, the sparse union, and the fixed-size list views, we can validate child
+  // lengths.
+  int64_t child_min_length;
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_SPARSE_UNION:
+    case NANOARROW_TYPE_STRUCT:
+      child_min_length = (array_view->offset + array_view->length);
+      for (int64_t i = 0; i < array_view->n_children; i++) {
+        if (array_view->children[i]->length < child_min_length) {
+          ArrowErrorSet(
+              error,
+              "Expected struct child %d to have length >= %ld but found child with "
+              "length %ld",
+              (int)(i + 1), (long)(child_min_length),
+              (long)array_view->children[i]->length);
+          return EINVAL;
+        }
+      }
+      break;
+
+    case NANOARROW_TYPE_FIXED_SIZE_LIST:
+      child_min_length = (array_view->offset + array_view->length) *
+                         array_view->layout.child_size_elements;
+      if (array_view->children[0]->length < child_min_length) {
+        ArrowErrorSet(error,
+                      "Expected child of fixed_size_list array to have length >= %ld but "
+                      "found array with length %ld",
+                      (long)child_min_length, (long)array_view->children[0]->length);
+        return EINVAL;
+      }
+      break;
+    default:
+      break;
+  }
+
+  // Recurse for children
+  for (int64_t i = 0; i < array_view->n_children; i++) {
+    NANOARROW_RETURN_NOT_OK(
+        ArrowArrayViewValidateMinimal(array_view->children[i], error));
+  }
+
+  // Recurse for dictionary
+  if (array_view->dictionary != NULL) {
+    NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view->dictionary, error));
+  }
+
+  return NANOARROW_OK;
+}
+
+static int ArrowArrayViewValidateDefault(struct ArrowArrayView* array_view,
+                                         struct ArrowError* error) {
+  // Perform minimal validation. This will validate or assign
+  // buffer sizes as long as buffer access is not required.
+  NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view, error));
+
+  // Calculate buffer sizes or child lengths that require accessing the offsets
+  // buffer. Where appropriate, validate that the first offset is >= 0.
+  // If a buffer size is marked as unknown, assign it; otherwise, validate it.
+  int64_t offset_plus_length = array_view->offset + array_view->length;
+
+  int64_t first_offset;
+  int64_t last_offset;
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_STRING:
+    case NANOARROW_TYPE_BINARY:
+      if (array_view->buffer_views[1].size_bytes != 0) {
+        first_offset = array_view->buffer_views[1].data.as_int32[0];
+        if (first_offset < 0) {
+          ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
+                        (long)first_offset);
+          return EINVAL;
+        }
+
+        last_offset = array_view->buffer_views[1].data.as_int32[offset_plus_length];
+
+        // If the data buffer size is unknown, assign it; otherwise, check it
+        if (array_view->buffer_views[2].size_bytes == -1) {
+          array_view->buffer_views[2].size_bytes = last_offset;
+        } else if (array_view->buffer_views[2].size_bytes < last_offset) {
+          ArrowErrorSet(error,
+                        "Expected %s array buffer 2 to have size >= %ld bytes but found "
+                        "buffer with %ld bytes",
+                        ArrowTypeString(array_view->storage_type), (long)last_offset,
+                        (long)array_view->buffer_views[2].size_bytes);
+          return EINVAL;
+        }
+      }
+      break;
+
+    case NANOARROW_TYPE_LARGE_STRING:
+    case NANOARROW_TYPE_LARGE_BINARY:
+      if (array_view->buffer_views[1].size_bytes != 0) {
+        first_offset = array_view->buffer_views[1].data.as_int64[0];
+        if (first_offset < 0) {
+          ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
+                        (long)first_offset);
+          return EINVAL;
+        }
+
+        last_offset = array_view->buffer_views[1].data.as_int64[offset_plus_length];
+
+        // If the data buffer size is unknown, assign it; otherwise, check it
+        if (array_view->buffer_views[2].size_bytes == -1) {
+          array_view->buffer_views[2].size_bytes = last_offset;
+        } else if (array_view->buffer_views[2].size_bytes < last_offset) {
+          ArrowErrorSet(error,
+                        "Expected %s array buffer 2 to have size >= %ld bytes but found "
+                        "buffer with %ld bytes",
+                        ArrowTypeString(array_view->storage_type), (long)last_offset,
+                        (long)array_view->buffer_views[2].size_bytes);
+          return EINVAL;
+        }
+      }
+      break;
+
+    case NANOARROW_TYPE_STRUCT:
+      for (int64_t i = 0; i < array_view->n_children; i++) {
+        if (array_view->children[i]->length < offset_plus_length) {
+          ArrowErrorSet(
+              error,
+              "Expected struct child %d to have length >= %ld but found child with "
+              "length %ld",
+              (int)(i + 1), (long)offset_plus_length,
+              (long)array_view->children[i]->length);
+          return EINVAL;
+        }
+      }
+      break;
+
+    case NANOARROW_TYPE_LIST:
+    case NANOARROW_TYPE_MAP:
+      if (array_view->buffer_views[1].size_bytes != 0) {
+        first_offset = array_view->buffer_views[1].data.as_int32[0];
+        if (first_offset < 0) {
+          ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
+                        (long)first_offset);
+          return EINVAL;
+        }
+
+        last_offset = array_view->buffer_views[1].data.as_int32[offset_plus_length];
+        if (array_view->children[0]->length < last_offset) {
+          ArrowErrorSet(
+              error,
+              "Expected child of %s array to have length >= %ld but found array with "
+              "length %ld",
+              ArrowTypeString(array_view->storage_type), (long)last_offset,
+              (long)array_view->children[0]->length);
+          return EINVAL;
+        }
+      }
+      break;
+
+    case NANOARROW_TYPE_LARGE_LIST:
+      if (array_view->buffer_views[1].size_bytes != 0) {
+        first_offset = array_view->buffer_views[1].data.as_int64[0];
+        if (first_offset < 0) {
+          ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
+                        (long)first_offset);
+          return EINVAL;
+        }
+
+        last_offset = array_view->buffer_views[1].data.as_int64[offset_plus_length];
+        if (array_view->children[0]->length < last_offset) {
+          ArrowErrorSet(
+              error,
+              "Expected child of large list array to have length >= %ld but found array "
+              "with length %ld",
+              (long)last_offset, (long)array_view->children[0]->length);
+          return EINVAL;
+        }
+      }
+      break;
+    default:
+      break;
+  }
+
+  // Recurse for children
+  for (int64_t i = 0; i < array_view->n_children; i++) {
+    NANOARROW_RETURN_NOT_OK(
+        ArrowArrayViewValidateDefault(array_view->children[i], error));
+  }
+
+  // Recurse for dictionary
+  if (array_view->dictionary != NULL) {
+    NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view->dictionary, error));
+  }
+
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
+                                      struct ArrowArray* array,
+                                      struct ArrowError* error) {
+  // Extract information from the array into the array view
+  NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view, array, error));
+
+  // Run default validation. Because we've marked all non-NULL buffers as having unknown
+  // size, validation will also update the buffer sizes as it goes.
+  NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view, error));
+
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayViewSetArrayMinimal(struct ArrowArrayView* array_view,
+                                             struct ArrowArray* array,
+                                             struct ArrowError* error) {
+  // Extract information from the array into the array view
+  NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view, array, error));
+
+  // Run default validation. Because we've marked all non-NULL buffers as having unknown
+  // size, validation will also update the buffer sizes as it goes.
+  NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view, error));
+
+  return NANOARROW_OK;
+}
+
+static int ArrowAssertIncreasingInt32(struct ArrowBufferView view,
+                                      struct ArrowError* error) {
+  if (view.size_bytes <= (int64_t)sizeof(int32_t)) {
+    return NANOARROW_OK;
+  }
+
+  for (int64_t i = 1; i < view.size_bytes / (int64_t)sizeof(int32_t); i++) {
+    int32_t diff = view.data.as_int32[i] - view.data.as_int32[i - 1];
+    if (diff < 0) {
+      ArrowErrorSet(error, "[%ld] Expected element size >= 0 but found element size %ld",
+                    (long)i, (long)diff);
+      return EINVAL;
+    }
+  }
+
+  return NANOARROW_OK;
+}
+
+static int ArrowAssertIncreasingInt64(struct ArrowBufferView view,
+                                      struct ArrowError* error) {
+  if (view.size_bytes <= (int64_t)sizeof(int64_t)) {
+    return NANOARROW_OK;
+  }
+
+  for (int64_t i = 1; i < view.size_bytes / (int64_t)sizeof(int64_t); i++) {
+    int64_t diff = view.data.as_int64[i] - view.data.as_int64[i - 1];
+    if (diff < 0) {
+      ArrowErrorSet(error, "[%ld] Expected element size >= 0 but found element size %ld",
+                    (long)i, (long)diff);
+      return EINVAL;
+    }
+  }
+
+  return NANOARROW_OK;
+}
+
+static int ArrowAssertRangeInt8(struct ArrowBufferView view, int8_t min_value,
+                                int8_t max_value, struct ArrowError* error) {
+  for (int64_t i = 0; i < view.size_bytes; i++) {
+    if (view.data.as_int8[i] < min_value || view.data.as_int8[i] > max_value) {
+      ArrowErrorSet(error,
+                    "[%ld] Expected buffer value between %d and %d but found value %d",
+                    (long)i, (int)min_value, (int)max_value, (int)view.data.as_int8[i]);
+      return EINVAL;
+    }
+  }
+
+  return NANOARROW_OK;
+}
+
+static int ArrowAssertInt8In(struct ArrowBufferView view, const int8_t* values,
+                             int64_t n_values, struct ArrowError* error) {
+  for (int64_t i = 0; i < view.size_bytes; i++) {
+    int item_found = 0;
+    for (int64_t j = 0; j < n_values; j++) {
+      if (view.data.as_int8[i] == values[j]) {
+        item_found = 1;
+        break;
+      }
+    }
+
+    if (!item_found) {
+      ArrowErrorSet(error, "[%ld] Unexpected buffer value %d", (long)i,
+                    (int)view.data.as_int8[i]);
+      return EINVAL;
+    }
+  }
+
+  return NANOARROW_OK;
+}
+
+static int ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
+                                      struct ArrowError* error) {
+  for (int i = 0; i < 3; i++) {
+    switch (array_view->layout.buffer_type[i]) {
+      case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
+        if (array_view->layout.element_size_bits[i] == 32) {
+          NANOARROW_RETURN_NOT_OK(
+              ArrowAssertIncreasingInt32(array_view->buffer_views[i], error));
+        } else {
+          NANOARROW_RETURN_NOT_OK(
+              ArrowAssertIncreasingInt64(array_view->buffer_views[i], error));
+        }
+        break;
+      default:
+        break;
+    }
+  }
+
+  if (array_view->storage_type == NANOARROW_TYPE_DENSE_UNION ||
+      array_view->storage_type == NANOARROW_TYPE_SPARSE_UNION) {
+    if (array_view->union_type_id_map == NULL) {
+      // If the union_type_id map is NULL (e.g., when using ArrowArrayInitFromType() +
+      // ArrowArrayAllocateChildren() + ArrowArrayFinishBuilding()), we don't have enough
+      // information to validate this buffer.
+      ArrowErrorSet(error,
+                    "Insufficient information provided for validation of union array");
+      return EINVAL;
+    } else if (_ArrowParsedUnionTypeIdsWillEqualChildIndices(
+                   array_view->union_type_id_map, array_view->n_children,
+                   array_view->n_children)) {
+      NANOARROW_RETURN_NOT_OK(ArrowAssertRangeInt8(
+          array_view->buffer_views[0], 0, (int8_t)(array_view->n_children - 1), error));
+    } else {
+      NANOARROW_RETURN_NOT_OK(ArrowAssertInt8In(array_view->buffer_views[0],
+                                                array_view->union_type_id_map + 128,
+                                                array_view->n_children, error));
+    }
+  }
+
+  if (array_view->storage_type == NANOARROW_TYPE_DENSE_UNION &&
+      array_view->union_type_id_map != NULL) {
+    // Check that offsets refer to child elements that actually exist
+    for (int64_t i = 0; i < array_view->length; i++) {
+      int8_t child_id = ArrowArrayViewUnionChildIndex(array_view, i);
+      int64_t offset = ArrowArrayViewUnionChildOffset(array_view, i);
+      int64_t child_length = array_view->children[child_id]->length;
+      if (offset < 0 || offset > child_length) {
+        ArrowErrorSet(
+            error,
+            "[%ld] Expected union offset for child id %d to be between 0 and %ld but "
+            "found offset value %ld",
+            (long)i, (int)child_id, (long)child_length, offset);
+        return EINVAL;
+      }
+    }
+  }
+
+  // Recurse for children
+  for (int64_t i = 0; i < array_view->n_children; i++) {
+    NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->children[i], error));
+  }
+
+  // Dictionary valiation not implemented
+  if (array_view->dictionary != NULL) {
+    ArrowErrorSet(error, "Validation for dictionary-encoded arrays is not implemented");
+    return ENOTSUP;
+  }
+
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayViewValidate(struct ArrowArrayView* array_view,
+                                      enum ArrowValidationLevel validation_level,
+                                      struct ArrowError* error) {
+  switch (validation_level) {
+    case NANOARROW_VALIDATION_LEVEL_NONE:
+      return NANOARROW_OK;
+    case NANOARROW_VALIDATION_LEVEL_MINIMAL:
+      return ArrowArrayViewValidateMinimal(array_view, error);
+    case NANOARROW_VALIDATION_LEVEL_DEFAULT:
+      return ArrowArrayViewValidateDefault(array_view, error);
+    case NANOARROW_VALIDATION_LEVEL_FULL:
+      NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view, error));
+      return ArrowArrayViewValidateFull(array_view, error);
+  }
+
+  ArrowErrorSet(error, "validation_level not recognized");
+  return EINVAL;
+}
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+
+#include "nanoarrow.h"
+
+struct BasicArrayStreamPrivate {
+  struct ArrowSchema schema;
+  int64_t n_arrays;
+  struct ArrowArray* arrays;
+  int64_t arrays_i;
+};
+
+static int ArrowBasicArrayStreamGetSchema(struct ArrowArrayStream* array_stream,
+                                          struct ArrowSchema* schema) {
+  if (array_stream == NULL || array_stream->release == NULL) {
+    return EINVAL;
+  }
+
+  struct BasicArrayStreamPrivate* private_data =
+      (struct BasicArrayStreamPrivate*)array_stream->private_data;
+  return ArrowSchemaDeepCopy(&private_data->schema, schema);
+}
+
+static int ArrowBasicArrayStreamGetNext(struct ArrowArrayStream* array_stream,
+                                        struct ArrowArray* array) {
+  if (array_stream == NULL || array_stream->release == NULL) {
+    return EINVAL;
+  }
+
+  struct BasicArrayStreamPrivate* private_data =
+      (struct BasicArrayStreamPrivate*)array_stream->private_data;
+
+  if (private_data->arrays_i == private_data->n_arrays) {
+    array->release = NULL;
+    return NANOARROW_OK;
+  }
+
+  ArrowArrayMove(&private_data->arrays[private_data->arrays_i++], array);
+  return NANOARROW_OK;
+}
+
+static const char* ArrowBasicArrayStreamGetLastError(
+    struct ArrowArrayStream* array_stream) {
+  return NULL;
+}
+
+static void ArrowBasicArrayStreamRelease(struct ArrowArrayStream* array_stream) {
+  if (array_stream == NULL || array_stream->release == NULL) {
+    return;
+  }
+
+  struct BasicArrayStreamPrivate* private_data =
+      (struct BasicArrayStreamPrivate*)array_stream->private_data;
+
+  if (private_data->schema.release != NULL) {
+    private_data->schema.release(&private_data->schema);
+  }
+
+  for (int64_t i = 0; i < private_data->n_arrays; i++) {
+    if (private_data->arrays[i].release != NULL) {
+      private_data->arrays[i].release(&private_data->arrays[i]);
+    }
+  }
+
+  if (private_data->arrays != NULL) {
+    ArrowFree(private_data->arrays);
+  }
+
+  ArrowFree(private_data);
+  array_stream->release = NULL;
+}
+
+ArrowErrorCode ArrowBasicArrayStreamInit(struct ArrowArrayStream* array_stream,
+                                         struct ArrowSchema* schema, int64_t n_arrays) {
+  struct BasicArrayStreamPrivate* private_data =
+      (struct BasicArrayStreamPrivate*)ArrowMalloc(
+          sizeof(struct BasicArrayStreamPrivate));
+  if (private_data == NULL) {
+    return ENOMEM;
+  }
+
+  ArrowSchemaMove(schema, &private_data->schema);
+
+  private_data->n_arrays = n_arrays;
+  private_data->arrays = NULL;
+  private_data->arrays_i = 0;
+
+  if (n_arrays > 0) {
+    private_data->arrays =
+        (struct ArrowArray*)ArrowMalloc(n_arrays * sizeof(struct ArrowArray));
+    if (private_data->arrays == NULL) {
+      ArrowBasicArrayStreamRelease(array_stream);
+      return ENOMEM;
+    }
+  }
+
+  for (int64_t i = 0; i < private_data->n_arrays; i++) {
+    private_data->arrays[i].release = NULL;
+  }
+
+  array_stream->get_schema = &ArrowBasicArrayStreamGetSchema;
+  array_stream->get_next = &ArrowBasicArrayStreamGetNext;
+  array_stream->get_last_error = ArrowBasicArrayStreamGetLastError;
+  array_stream->release = ArrowBasicArrayStreamRelease;
+  array_stream->private_data = private_data;
+  return NANOARROW_OK;
+}
+
+void ArrowBasicArrayStreamSetArray(struct ArrowArrayStream* array_stream, int64_t i,
+                                   struct ArrowArray* array) {
+  struct BasicArrayStreamPrivate* private_data =
+      (struct BasicArrayStreamPrivate*)array_stream->private_data;
+  ArrowArrayMove(array, &private_data->arrays[i]);
+}
+
+ArrowErrorCode ArrowBasicArrayStreamValidate(struct ArrowArrayStream* array_stream,
+                                             struct ArrowError* error) {
+  struct BasicArrayStreamPrivate* private_data =
+      (struct BasicArrayStreamPrivate*)array_stream->private_data;
+
+  struct ArrowArrayView array_view;
+  NANOARROW_RETURN_NOT_OK(
+      ArrowArrayViewInitFromSchema(&array_view, &private_data->schema, error));
+
+  for (int64_t i = 0; i < private_data->n_arrays; i++) {
+    if (private_data->arrays[i].release != NULL) {
+      int result = ArrowArrayViewSetArray(&array_view, &private_data->arrays[i], error);
+      if (result != NANOARROW_OK) {
+        ArrowArrayViewReset(&array_view);
+        return result;
+      }
+    }
+  }
+
+  ArrowArrayViewReset(&array_view);
+  return NANOARROW_OK;
+}

From a39581bc65259fcd8b2c6c5158869562c90f1047 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 11 Aug 2023 17:59:14 -0400
Subject: [PATCH 007/126] more to_numpy adds

---
 pandas/tests/arrays/boolean/test_comparison.py    |  2 +-
 pandas/tests/arrays/boolean/test_construction.py  |  4 ++--
 pandas/tests/arrays/boolean/test_function.py      | 14 +++++++-------
 pandas/tests/arrays/boolean/test_logical.py       |  4 ++--
 pandas/tests/arrays/categorical/test_astype.py    |  1 +
 pandas/tests/arrays/floating/test_arithmetic.py   |  2 +-
 pandas/tests/arrays/floating/test_construction.py |  6 +++---
 pandas/tests/arrays/integer/test_arithmetic.py    |  2 +-
 pandas/tests/arrays/integer/test_construction.py  |  4 ++--
 pandas/tests/arrays/integer/test_dtypes.py        |  2 +-
 pandas/tests/arrays/integer/test_function.py      |  2 +-
 pandas/tests/arrays/masked_shared.py              |  8 ++++----
 pandas/tests/indexes/test_old_base.py             |  6 ++++--
 13 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/pandas/tests/arrays/boolean/test_comparison.py b/pandas/tests/arrays/boolean/test_comparison.py
index 2eeb9da574b1e..b6f04deca1a7b 100644
--- a/pandas/tests/arrays/boolean/test_comparison.py
+++ b/pandas/tests/arrays/boolean/test_comparison.py
@@ -46,7 +46,7 @@ def test_array(self, comparison_op):
         result = op(a, b)
 
         values = op(a._data, b._data)
-        mask = a._mask | b._mask
+        mask = a._mask.to_numpy() | b._mask.to_numpy()
         expected = BooleanArray(values, mask)
         tm.assert_extension_array_equal(result, expected)
 
diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py
index d26eea19c06e9..cb24a9dd778a0 100644
--- a/pandas/tests/arrays/boolean/test_construction.py
+++ b/pandas/tests/arrays/boolean/test_construction.py
@@ -40,11 +40,11 @@ def test_boolean_array_constructor_copy():
 
     result = BooleanArray(values, mask)
     assert result._data is values
-    assert result._mask is mask
+    # assert result._mask is mask
 
     result = BooleanArray(values, mask, copy=True)
     assert result._data is not values
-    assert result._mask is not mask
+    # assert result._mask is not mask
 
 
 def test_to_boolean_array():
diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py
index 2b3f3d3d16ac6..bf51035678db7 100644
--- a/pandas/tests/arrays/boolean/test_function.py
+++ b/pandas/tests/arrays/boolean/test_function.py
@@ -13,36 +13,36 @@ def test_ufuncs_binary(ufunc):
     a = pd.array([True, False, None], dtype="boolean")
     result = ufunc(a, a)
     expected = pd.array(ufunc(a._data, a._data), dtype="boolean")
-    expected[a._mask] = np.nan
+    expected[a._mask.to_numpy()] = np.nan
     tm.assert_extension_array_equal(result, expected)
 
     s = pd.Series(a)
     result = ufunc(s, a)
     expected = pd.Series(ufunc(a._data, a._data), dtype="boolean")
-    expected[a._mask] = np.nan
+    expected[a._mask.to_numpy()] = np.nan
     tm.assert_series_equal(result, expected)
 
     # Boolean with numpy array
     arr = np.array([True, True, False])
     result = ufunc(a, arr)
     expected = pd.array(ufunc(a._data, arr), dtype="boolean")
-    expected[a._mask] = np.nan
+    expected[a._mask.to_numpy()] = np.nan
     tm.assert_extension_array_equal(result, expected)
 
     result = ufunc(arr, a)
     expected = pd.array(ufunc(arr, a._data), dtype="boolean")
-    expected[a._mask] = np.nan
+    expected[a._mask.to_numpy()] = np.nan
     tm.assert_extension_array_equal(result, expected)
 
     # BooleanArray with scalar
     result = ufunc(a, True)
     expected = pd.array(ufunc(a._data, True), dtype="boolean")
-    expected[a._mask] = np.nan
+    expected[a._mask.to_numpy()] = np.nan
     tm.assert_extension_array_equal(result, expected)
 
     result = ufunc(True, a)
     expected = pd.array(ufunc(True, a._data), dtype="boolean")
-    expected[a._mask] = np.nan
+    expected[a._mask.to_numpy()] = np.nan
     tm.assert_extension_array_equal(result, expected)
 
     # not handled types
@@ -56,7 +56,7 @@ def test_ufuncs_unary(ufunc):
     a = pd.array([True, False, None], dtype="boolean")
     result = ufunc(a)
     expected = pd.array(ufunc(a._data), dtype="boolean")
-    expected[a._mask] = np.nan
+    expected[a._mask.to_numpy()] = np.nan
     tm.assert_extension_array_equal(result, expected)
 
     ser = pd.Series(a)
diff --git a/pandas/tests/arrays/boolean/test_logical.py b/pandas/tests/arrays/boolean/test_logical.py
index 66c117ea3fc66..4cdaf3a90b21d 100644
--- a/pandas/tests/arrays/boolean/test_logical.py
+++ b/pandas/tests/arrays/boolean/test_logical.py
@@ -238,8 +238,8 @@ def test_no_masked_assumptions(self, other, all_logical_operators):
         tm.assert_extension_array_equal(result, expected)
 
         if isinstance(other, BooleanArray):
-            other._data[other._mask] = True
-            a._data[a._mask] = False
+            other._data[other._mask.to_numpy()] = True
+            a._data[a._mask.to_numpy()] = False
 
             result = getattr(a, all_logical_operators)(other)
             expected = getattr(b, all_logical_operators)(other)
diff --git a/pandas/tests/arrays/categorical/test_astype.py b/pandas/tests/arrays/categorical/test_astype.py
index d2f9f6dffab49..a7d5ecda3c644 100644
--- a/pandas/tests/arrays/categorical/test_astype.py
+++ b/pandas/tests/arrays/categorical/test_astype.py
@@ -146,6 +146,7 @@ def test_astype_object_timestamp_categories(self):
         expected = np.array([Timestamp("2014-01-01 00:00:00")], dtype="object")
         tm.assert_numpy_array_equal(result, expected)
 
+    @pytest.skip("not applicable with bitmask")
     def test_astype_category_readonly_mask_values(self):
         # GH#53658
         arr = array([0, 1, 2], dtype="Int64")
diff --git a/pandas/tests/arrays/floating/test_arithmetic.py b/pandas/tests/arrays/floating/test_arithmetic.py
index 056c22d8c1131..8ee291d6fd6f5 100644
--- a/pandas/tests/arrays/floating/test_arithmetic.py
+++ b/pandas/tests/arrays/floating/test_arithmetic.py
@@ -67,7 +67,7 @@ def test_pow_scalar(dtype):
     # TODO np.nan should be converted to pd.NA / missing before operation?
     expected = FloatingArray(
         np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype=dtype.numpy_dtype),
-        mask=a._mask,
+        mask=a._mask.to_numpy(),
     )
     tm.assert_extension_array_equal(result, expected)
 
diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py
index 4007ee6b415c9..699153b2c0639 100644
--- a/pandas/tests/arrays/floating/test_construction.py
+++ b/pandas/tests/arrays/floating/test_construction.py
@@ -23,7 +23,7 @@ def test_floating_array_constructor():
     expected = pd.array([1, 2, 3, np.nan], dtype="Float64")
     tm.assert_extension_array_equal(result, expected)
     tm.assert_numpy_array_equal(result._data, values)
-    tm.assert_numpy_array_equal(result._mask, mask)
+    tm.assert_numpy_array_equal(result._mask.to_numpy(), mask)
 
     msg = r".* should be .* numpy array. Use the 'pd.array' function instead"
     with pytest.raises(TypeError, match=msg):
@@ -62,11 +62,11 @@ def test_floating_array_constructor_copy():
 
     result = FloatingArray(values, mask)
     assert result._data is values
-    assert result._mask is mask
+    # assert result._mask is mask
 
     result = FloatingArray(values, mask, copy=True)
     assert result._data is not values
-    assert result._mask is not mask
+    # assert result._mask is not mask
 
 
 def test_to_array():
diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py
index ce6c245cd0f37..286d884994c44 100644
--- a/pandas/tests/arrays/integer/test_arithmetic.py
+++ b/pandas/tests/arrays/integer/test_arithmetic.py
@@ -248,7 +248,7 @@ def test_arith_coerce_scalar(data, all_arithmetic_operators):
     # rmod results in NaN that wasn't NA in original nullable Series -> unmask it
     if all_arithmetic_operators == "__rmod__":
         mask = (s == 0).fillna(False).to_numpy(bool)
-        expected.array._mask[mask] = False
+        expected.array._mask[mask.to_numpy()] = False
 
     tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/arrays/integer/test_construction.py b/pandas/tests/arrays/integer/test_construction.py
index 9ecfc51cb2208..f6ef5db17044b 100644
--- a/pandas/tests/arrays/integer/test_construction.py
+++ b/pandas/tests/arrays/integer/test_construction.py
@@ -100,11 +100,11 @@ def test_integer_array_constructor_copy():
 
     result = IntegerArray(values, mask)
     assert result._data is values
-    assert result._mask is mask
+    # assert result._mask is mask
 
     result = IntegerArray(values, mask, copy=True)
     assert result._data is not values
-    assert result._mask is not mask
+    # assert result._mask is not mask
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py
index f50b4cfd0b520..312fa90844847 100644
--- a/pandas/tests/arrays/integer/test_dtypes.py
+++ b/pandas/tests/arrays/integer/test_dtypes.py
@@ -163,7 +163,7 @@ def test_astype_copy():
     result = arr.astype("Int64", copy=False)
     assert result is arr
     assert np.shares_memory(result._data, arr._data)
-    assert np.shares_memory(result._mask, arr._mask)
+    # assert np.shares_memory(result._mask, arr._mask)
     result[0] = 10
     assert arr[0] == 10
     result[0] = pd.NA
diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py
index d48b636a98feb..40c9dcc697f46 100644
--- a/pandas/tests/arrays/integer/test_function.py
+++ b/pandas/tests/arrays/integer/test_function.py
@@ -26,7 +26,7 @@ def test_ufuncs_single_float(ufunc):
     a = pd.array([1, 2, -3, np.nan])
     with np.errstate(invalid="ignore"):
         result = ufunc(a)
-        expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask)
+        expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask.to_numpy())
     tm.assert_extension_array_equal(result, expected)
 
     s = pd.Series(a)
diff --git a/pandas/tests/arrays/masked_shared.py b/pandas/tests/arrays/masked_shared.py
index 3e74402263cf9..7adaf5f0a5859 100644
--- a/pandas/tests/arrays/masked_shared.py
+++ b/pandas/tests/arrays/masked_shared.py
@@ -16,7 +16,7 @@ def _compare_other(self, data, op, other):
         expected = pd.Series(op(data._data, other), dtype="boolean")
 
         # fill the nan locations
-        expected[data._mask] = pd.NA
+        expected[data._mask.to_numpy()] = pd.NA
 
         tm.assert_series_equal(result, expected)
 
@@ -28,7 +28,7 @@ def _compare_other(self, data, op, other):
         expected = op(pd.Series(data._data), other).astype("boolean")
 
         # fill the nan locations
-        expected[data._mask] = pd.NA
+        expected[data._mask.to_numpy()] = pd.NA
 
         tm.assert_series_equal(result, expected)
 
@@ -43,7 +43,7 @@ def test_scalar(self, other, comparison_op, dtype):
             expected = pd.array([None, None, None], dtype="boolean")
         else:
             values = op(left._data, other)
-            expected = pd.arrays.BooleanArray(values, left._mask, copy=True)
+            expected = pd.arrays.BooleanArray(values, left._mask.to_numpy(), copy=True)
         tm.assert_extension_array_equal(result, expected)
 
         # ensure we haven't mutated anything inplace
@@ -74,7 +74,7 @@ def test_array(self, comparison_op, dtype):
 
         result = op(left, right)
         values = op(left._data, right._data)
-        mask = left._mask | right._mask
+        mask = left._mask.to_numpy() | right._mask.to_numpy()
 
         expected = pd.arrays.BooleanArray(values, mask)
         tm.assert_extension_array_equal(result, expected)
diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py
index 3b627f2fae845..eb7b2f585b426 100644
--- a/pandas/tests/indexes/test_old_base.py
+++ b/pandas/tests/indexes/test_old_base.py
@@ -277,9 +277,11 @@ def test_ensure_copied_data(self, index):
                 tm.assert_numpy_array_equal(
                     index._values._data, result._values._data, check_same="same"
                 )
-                assert np.shares_memory(index._values._mask, result._values._mask)
+                # assert np.shares_memory(index._values._mask, result._values._mask)
                 tm.assert_numpy_array_equal(
-                    index._values._mask, result._values._mask, check_same="same"
+                    index._values._mask.to_numpy(),
+                    result._values._mask.to_numpy(),
+                    check_same="same",
                 )
             elif index.dtype == "string[python]":
                 assert np.shares_memory(index._values._ndarray, result._values._ndarray)

From cb1b2740c79f81bf0c77f9ba282ce780275367ca Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sat, 12 Aug 2023 08:06:04 -0400
Subject: [PATCH 008/126] Revert "more to_numpy adds"

This reverts commit a39581bc65259fcd8b2c6c5158869562c90f1047.
---
 pandas/tests/arrays/boolean/test_comparison.py    |  2 +-
 pandas/tests/arrays/boolean/test_construction.py  |  4 ++--
 pandas/tests/arrays/boolean/test_function.py      | 14 +++++++-------
 pandas/tests/arrays/boolean/test_logical.py       |  4 ++--
 pandas/tests/arrays/categorical/test_astype.py    |  1 -
 pandas/tests/arrays/floating/test_arithmetic.py   |  2 +-
 pandas/tests/arrays/floating/test_construction.py |  6 +++---
 pandas/tests/arrays/integer/test_arithmetic.py    |  2 +-
 pandas/tests/arrays/integer/test_construction.py  |  4 ++--
 pandas/tests/arrays/integer/test_dtypes.py        |  2 +-
 pandas/tests/arrays/integer/test_function.py      |  2 +-
 pandas/tests/arrays/masked_shared.py              |  8 ++++----
 pandas/tests/indexes/test_old_base.py             |  6 ++----
 13 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/pandas/tests/arrays/boolean/test_comparison.py b/pandas/tests/arrays/boolean/test_comparison.py
index b6f04deca1a7b..2eeb9da574b1e 100644
--- a/pandas/tests/arrays/boolean/test_comparison.py
+++ b/pandas/tests/arrays/boolean/test_comparison.py
@@ -46,7 +46,7 @@ def test_array(self, comparison_op):
         result = op(a, b)
 
         values = op(a._data, b._data)
-        mask = a._mask.to_numpy() | b._mask.to_numpy()
+        mask = a._mask | b._mask
         expected = BooleanArray(values, mask)
         tm.assert_extension_array_equal(result, expected)
 
diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py
index cb24a9dd778a0..d26eea19c06e9 100644
--- a/pandas/tests/arrays/boolean/test_construction.py
+++ b/pandas/tests/arrays/boolean/test_construction.py
@@ -40,11 +40,11 @@ def test_boolean_array_constructor_copy():
 
     result = BooleanArray(values, mask)
     assert result._data is values
-    # assert result._mask is mask
+    assert result._mask is mask
 
     result = BooleanArray(values, mask, copy=True)
     assert result._data is not values
-    # assert result._mask is not mask
+    assert result._mask is not mask
 
 
 def test_to_boolean_array():
diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py
index bf51035678db7..2b3f3d3d16ac6 100644
--- a/pandas/tests/arrays/boolean/test_function.py
+++ b/pandas/tests/arrays/boolean/test_function.py
@@ -13,36 +13,36 @@ def test_ufuncs_binary(ufunc):
     a = pd.array([True, False, None], dtype="boolean")
     result = ufunc(a, a)
     expected = pd.array(ufunc(a._data, a._data), dtype="boolean")
-    expected[a._mask.to_numpy()] = np.nan
+    expected[a._mask] = np.nan
     tm.assert_extension_array_equal(result, expected)
 
     s = pd.Series(a)
     result = ufunc(s, a)
     expected = pd.Series(ufunc(a._data, a._data), dtype="boolean")
-    expected[a._mask.to_numpy()] = np.nan
+    expected[a._mask] = np.nan
     tm.assert_series_equal(result, expected)
 
     # Boolean with numpy array
     arr = np.array([True, True, False])
     result = ufunc(a, arr)
     expected = pd.array(ufunc(a._data, arr), dtype="boolean")
-    expected[a._mask.to_numpy()] = np.nan
+    expected[a._mask] = np.nan
     tm.assert_extension_array_equal(result, expected)
 
     result = ufunc(arr, a)
     expected = pd.array(ufunc(arr, a._data), dtype="boolean")
-    expected[a._mask.to_numpy()] = np.nan
+    expected[a._mask] = np.nan
     tm.assert_extension_array_equal(result, expected)
 
     # BooleanArray with scalar
     result = ufunc(a, True)
     expected = pd.array(ufunc(a._data, True), dtype="boolean")
-    expected[a._mask.to_numpy()] = np.nan
+    expected[a._mask] = np.nan
     tm.assert_extension_array_equal(result, expected)
 
     result = ufunc(True, a)
     expected = pd.array(ufunc(True, a._data), dtype="boolean")
-    expected[a._mask.to_numpy()] = np.nan
+    expected[a._mask] = np.nan
     tm.assert_extension_array_equal(result, expected)
 
     # not handled types
@@ -56,7 +56,7 @@ def test_ufuncs_unary(ufunc):
     a = pd.array([True, False, None], dtype="boolean")
     result = ufunc(a)
     expected = pd.array(ufunc(a._data), dtype="boolean")
-    expected[a._mask.to_numpy()] = np.nan
+    expected[a._mask] = np.nan
     tm.assert_extension_array_equal(result, expected)
 
     ser = pd.Series(a)
diff --git a/pandas/tests/arrays/boolean/test_logical.py b/pandas/tests/arrays/boolean/test_logical.py
index 4cdaf3a90b21d..66c117ea3fc66 100644
--- a/pandas/tests/arrays/boolean/test_logical.py
+++ b/pandas/tests/arrays/boolean/test_logical.py
@@ -238,8 +238,8 @@ def test_no_masked_assumptions(self, other, all_logical_operators):
         tm.assert_extension_array_equal(result, expected)
 
         if isinstance(other, BooleanArray):
-            other._data[other._mask.to_numpy()] = True
-            a._data[a._mask.to_numpy()] = False
+            other._data[other._mask] = True
+            a._data[a._mask] = False
 
             result = getattr(a, all_logical_operators)(other)
             expected = getattr(b, all_logical_operators)(other)
diff --git a/pandas/tests/arrays/categorical/test_astype.py b/pandas/tests/arrays/categorical/test_astype.py
index a7d5ecda3c644..d2f9f6dffab49 100644
--- a/pandas/tests/arrays/categorical/test_astype.py
+++ b/pandas/tests/arrays/categorical/test_astype.py
@@ -146,7 +146,6 @@ def test_astype_object_timestamp_categories(self):
         expected = np.array([Timestamp("2014-01-01 00:00:00")], dtype="object")
         tm.assert_numpy_array_equal(result, expected)
 
-    @pytest.skip("not applicable with bitmask")
     def test_astype_category_readonly_mask_values(self):
         # GH#53658
         arr = array([0, 1, 2], dtype="Int64")
diff --git a/pandas/tests/arrays/floating/test_arithmetic.py b/pandas/tests/arrays/floating/test_arithmetic.py
index 8ee291d6fd6f5..056c22d8c1131 100644
--- a/pandas/tests/arrays/floating/test_arithmetic.py
+++ b/pandas/tests/arrays/floating/test_arithmetic.py
@@ -67,7 +67,7 @@ def test_pow_scalar(dtype):
     # TODO np.nan should be converted to pd.NA / missing before operation?
     expected = FloatingArray(
         np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype=dtype.numpy_dtype),
-        mask=a._mask.to_numpy(),
+        mask=a._mask,
     )
     tm.assert_extension_array_equal(result, expected)
 
diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py
index 699153b2c0639..4007ee6b415c9 100644
--- a/pandas/tests/arrays/floating/test_construction.py
+++ b/pandas/tests/arrays/floating/test_construction.py
@@ -23,7 +23,7 @@ def test_floating_array_constructor():
     expected = pd.array([1, 2, 3, np.nan], dtype="Float64")
     tm.assert_extension_array_equal(result, expected)
     tm.assert_numpy_array_equal(result._data, values)
-    tm.assert_numpy_array_equal(result._mask.to_numpy(), mask)
+    tm.assert_numpy_array_equal(result._mask, mask)
 
     msg = r".* should be .* numpy array. Use the 'pd.array' function instead"
     with pytest.raises(TypeError, match=msg):
@@ -62,11 +62,11 @@ def test_floating_array_constructor_copy():
 
     result = FloatingArray(values, mask)
     assert result._data is values
-    # assert result._mask is mask
+    assert result._mask is mask
 
     result = FloatingArray(values, mask, copy=True)
     assert result._data is not values
-    # assert result._mask is not mask
+    assert result._mask is not mask
 
 
 def test_to_array():
diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py
index 286d884994c44..ce6c245cd0f37 100644
--- a/pandas/tests/arrays/integer/test_arithmetic.py
+++ b/pandas/tests/arrays/integer/test_arithmetic.py
@@ -248,7 +248,7 @@ def test_arith_coerce_scalar(data, all_arithmetic_operators):
     # rmod results in NaN that wasn't NA in original nullable Series -> unmask it
     if all_arithmetic_operators == "__rmod__":
         mask = (s == 0).fillna(False).to_numpy(bool)
-        expected.array._mask[mask.to_numpy()] = False
+        expected.array._mask[mask] = False
 
     tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/arrays/integer/test_construction.py b/pandas/tests/arrays/integer/test_construction.py
index f6ef5db17044b..9ecfc51cb2208 100644
--- a/pandas/tests/arrays/integer/test_construction.py
+++ b/pandas/tests/arrays/integer/test_construction.py
@@ -100,11 +100,11 @@ def test_integer_array_constructor_copy():
 
     result = IntegerArray(values, mask)
     assert result._data is values
-    # assert result._mask is mask
+    assert result._mask is mask
 
     result = IntegerArray(values, mask, copy=True)
     assert result._data is not values
-    # assert result._mask is not mask
+    assert result._mask is not mask
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py
index 312fa90844847..f50b4cfd0b520 100644
--- a/pandas/tests/arrays/integer/test_dtypes.py
+++ b/pandas/tests/arrays/integer/test_dtypes.py
@@ -163,7 +163,7 @@ def test_astype_copy():
     result = arr.astype("Int64", copy=False)
     assert result is arr
     assert np.shares_memory(result._data, arr._data)
-    # assert np.shares_memory(result._mask, arr._mask)
+    assert np.shares_memory(result._mask, arr._mask)
     result[0] = 10
     assert arr[0] == 10
     result[0] = pd.NA
diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py
index 40c9dcc697f46..d48b636a98feb 100644
--- a/pandas/tests/arrays/integer/test_function.py
+++ b/pandas/tests/arrays/integer/test_function.py
@@ -26,7 +26,7 @@ def test_ufuncs_single_float(ufunc):
     a = pd.array([1, 2, -3, np.nan])
     with np.errstate(invalid="ignore"):
         result = ufunc(a)
-        expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask.to_numpy())
+        expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask)
     tm.assert_extension_array_equal(result, expected)
 
     s = pd.Series(a)
diff --git a/pandas/tests/arrays/masked_shared.py b/pandas/tests/arrays/masked_shared.py
index 7adaf5f0a5859..3e74402263cf9 100644
--- a/pandas/tests/arrays/masked_shared.py
+++ b/pandas/tests/arrays/masked_shared.py
@@ -16,7 +16,7 @@ def _compare_other(self, data, op, other):
         expected = pd.Series(op(data._data, other), dtype="boolean")
 
         # fill the nan locations
-        expected[data._mask.to_numpy()] = pd.NA
+        expected[data._mask] = pd.NA
 
         tm.assert_series_equal(result, expected)
 
@@ -28,7 +28,7 @@ def _compare_other(self, data, op, other):
         expected = op(pd.Series(data._data), other).astype("boolean")
 
         # fill the nan locations
-        expected[data._mask.to_numpy()] = pd.NA
+        expected[data._mask] = pd.NA
 
         tm.assert_series_equal(result, expected)
 
@@ -43,7 +43,7 @@ def test_scalar(self, other, comparison_op, dtype):
             expected = pd.array([None, None, None], dtype="boolean")
         else:
             values = op(left._data, other)
-            expected = pd.arrays.BooleanArray(values, left._mask.to_numpy(), copy=True)
+            expected = pd.arrays.BooleanArray(values, left._mask, copy=True)
         tm.assert_extension_array_equal(result, expected)
 
         # ensure we haven't mutated anything inplace
@@ -74,7 +74,7 @@ def test_array(self, comparison_op, dtype):
 
         result = op(left, right)
         values = op(left._data, right._data)
-        mask = left._mask.to_numpy() | right._mask.to_numpy()
+        mask = left._mask | right._mask
 
         expected = pd.arrays.BooleanArray(values, mask)
         tm.assert_extension_array_equal(result, expected)
diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py
index eb7b2f585b426..3b627f2fae845 100644
--- a/pandas/tests/indexes/test_old_base.py
+++ b/pandas/tests/indexes/test_old_base.py
@@ -277,11 +277,9 @@ def test_ensure_copied_data(self, index):
                 tm.assert_numpy_array_equal(
                     index._values._data, result._values._data, check_same="same"
                 )
-                # assert np.shares_memory(index._values._mask, result._values._mask)
+                assert np.shares_memory(index._values._mask, result._values._mask)
                 tm.assert_numpy_array_equal(
-                    index._values._mask.to_numpy(),
-                    result._values._mask.to_numpy(),
-                    check_same="same",
+                    index._values._mask, result._values._mask, check_same="same"
                 )
             elif index.dtype == "string[python]":
                 assert np.shares_memory(index._values._ndarray, result._values._ndarray)

From dabe1b6efb5ce11e0a0cf65fae88e7259ee914ca Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sat, 12 Aug 2023 08:08:24 -0400
Subject: [PATCH 009/126] implement __or__

---
 pandas/_libs/arrays.pyx | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index c8440de16bc38..242e2d0ea8667 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -242,6 +242,9 @@ cdef class BitMaskArray:
     def __invert__(self):
         return ~self.to_numpy()
 
+    def __or__(self, other):
+        return self.to_numpy().__or__(other)
+
     def to_numpy(self) -> ndarray:
         cdef ndarray[uint8_t] result
         result = np.empty(self.array_len, dtype=bool)

From 28f7ab18661a63d052c95a2c0d310c4f5671725d Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sat, 12 Aug 2023 13:33:08 -0400
Subject: [PATCH 010/126] checkpoint

---
 pandas/_libs/arrays.pyx               | 110 ++++++++++++++++++++++----
 pandas/_libs/index.pyx                |   2 +-
 pandas/core/algorithms.py             |   2 +-
 pandas/core/arrays/boolean.py         |   2 +-
 pandas/core/arrays/masked.py          |  34 ++++----
 pandas/core/arrays/numeric.py         |   2 +-
 pandas/core/reshape/merge.py          |   4 +-
 pandas/tests/indexes/test_old_base.py |   8 +-
 8 files changed, 123 insertions(+), 41 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 242e2d0ea8667..27f36c80802a8 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -21,7 +21,10 @@ from libc.stdlib cimport (
     malloc,
 )
 
-from pandas._libs.lib import is_list_like
+from pandas._libs.lib import (
+    is_list_like,
+    is_scalar,
+)
 
 
 cdef extern from "pandas/vendored/nanoarrow.h":
@@ -209,34 +212,105 @@ cdef class NDArrayBacked:
         return to_concat[0]._from_backing_data(new_arr)
 
 
+def _unpickle_bitmaskarray(array):
+    bma = BitMaskArray(array)
+    return bma
+
+
 cdef class BitMaskArray:
-    cdef array_len
     cdef uint8_t* validity_buffer
-
-    def __cinit__(self, np_array):
-        self.array_len = len(np_array)
-        nbytes = len(np_array) // 8 + 1
-        self.validity_buffer = <uint8_t *>malloc(nbytes)
-        for index, value in enumerate(np_array):
-            self[index] = value
+    cdef public:
+        int array_len
+        int nbytes
+
+    def __cinit__(self, data):
+        if isinstance(data, np.ndarray):
+            self.array_len = len(data)
+            self.nbytes = len(data) // 8 + 1
+            self.validity_buffer = <uint8_t *>malloc(self.nbytes)
+            for index, value in enumerate(data):
+                self[index] = value
+        elif isinstance(data, type(self)):
+            self.array_len = data.array_len
+            self.nbytes = data.nbytes
+            self.validity_buffer = <uint8_t *>malloc(self.nbytes)
+
+            # TODO: tried making validity_buffer public with memcpy but got
+            # Cannot convert Python object to 'const void *' error
+            for i in range(self.nbytes):
+                if data[i]:
+                    ArrowBitSet(self.validity_buffer, i)
+                else:
+                    ArrowBitClear(self.validity_buffer, i)
+        else:
+            raise TypeError("Unsupported argument to BitMaskArray constructor")
 
     def __dealloc__(self):
         free(self.validity_buffer)
 
     def __setitem__(self, key, value):
         if is_list_like(key):
-            for k in key:
+            if is_scalar(value):
+                for index, k in enumerate(key):
+                    if not k:
+                        continue
+                    if value:
+                        ArrowBitSet(self.validity_buffer, index)
+                    else:
+                        ArrowBitClear(self.validity_buffer, index)
+            else:
+                if len(key) != len(value):
+                    raise ValueError("Must provide an equal number of elements to mask")
+                for index, (k, v) in enumerate(zip(key, value)):
+                    if not k:
+                        continue
+                    if v:
+                        ArrowBitSet(self.validity_buffer, index)
+                    else:
+                        ArrowBitClear(self.validity_buffer, index)
+        elif isinstance(key, slice):
+            pos = key.start if key.start else 0
+            end = key.stop
+            step = key.step if key.step else 1
+
+            if not end:
+                return
+
+            if step > 0:
+                while pos < end:
+                    if value:
+                        ArrowBitSet(self.validity_buffer, pos)
+                    else:
+                        ArrowBitClear(self.validity_buffer, pos)
+
+                    pos += step
+            elif step < 0:
+                while pos > end:
+                    if value:
+                        ArrowBitSet(self.validity_buffer, pos)
+                    else:
+                        ArrowBitClear(self.validity_buffer, pos)
+
+                    pos += step
+        else:
+            if is_scalar(value):
                 if value:
-                    ArrowBitSet(self.validity_buffer, k)
+                    ArrowBitSet(self.validity_buffer, key)
                 else:
-                    ArrowBitClear(self.validity_buffer, k)
-        else:
-            if value:
-                ArrowBitSet(self.validity_buffer, key)
+                    ArrowBitClear(self.validity_buffer, key)
             else:
-                ArrowBitClear(self.validity_buffer, key)
+                for val in value:
+                    if val:
+                        ArrowBitSet(self.validity_buffer, key)
+                    else:
+                        ArrowBitClear(self.validity_buffer, key)
 
     def __getitem__(self, key):
+        if is_list_like(key):
+            return np.array([bool(ArrowBitGet(self.validity_buffer, k)) for k in key])
+        elif isinstance(key, slice):
+            return self.to_numpy()[key]
+
         return bool(ArrowBitGet(self.validity_buffer, key))
 
     def __invert__(self):
@@ -245,6 +319,10 @@ cdef class BitMaskArray:
     def __or__(self, other):
         return self.to_numpy().__or__(other)
 
+    def __reduce__(self):
+        object_state = (self.to_numpy(),)
+        return (_unpickle_bitmaskarray, object_state)
+
     def to_numpy(self) -> ndarray:
         cdef ndarray[uint8_t] result
         result = np.empty(self.array_len, dtype=bool)
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index e974b5d0eec46..24c81b01e897b 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -1182,7 +1182,7 @@ cdef class MaskedIndexEngine(IndexEngine):
 
     def _get_mask(self, object values) -> np.ndarray:
         if hasattr(values, "_mask"):
-            return values._mask
+            return values._mask.to_numpy()
         # We are an ArrowExtensionArray
         return values.isna()
 
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 14dee202a9d8d..3861a18316563 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -998,7 +998,7 @@ def duplicated(
     """
     if hasattr(values, "dtype") and isinstance(values.dtype, BaseMaskedDtype):
         values = cast("BaseMaskedArray", values)
-        return htable.duplicated(values._data, keep=keep, mask=values._mask)
+        return htable.duplicated(values._data, keep=keep, mask=values._mask.to_numpy())
 
     values = _ensure_data(values)
     return htable.duplicated(values, keep=keep)
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index 43344f04085ae..f63f642b37450 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -168,7 +168,7 @@ def coerce_to_array(
     if isinstance(values, BooleanArray):
         if mask is not None:
             raise ValueError("cannot pass mask for BooleanArray input")
-        values, mask = values._data, values._mask
+        values, mask = values._data, values._mask.to_numpy()
         if copy:
             values = values.copy()
             mask = mask.copy()
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index fb47982d3807c..5e5813d405f98 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -237,7 +237,7 @@ def fillna(
             if method is not None:
                 func = missing.get_fill_func(method, ndim=self.ndim)
                 npvalues = self._data.T
-                new_mask = mask.to_numpy().T
+                new_mask = mask.T
                 if copy:
                     npvalues = npvalues.copy()
                     new_mask = new_mask.copy()
@@ -623,7 +623,7 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
         inputs2 = []
         for x in inputs:
             if isinstance(x, BaseMaskedArray):
-                mask |= x._mask
+                mask |= x._mask.to_numpy()
                 inputs2.append(x._data)
             else:
                 inputs2.append(x)
@@ -1095,7 +1095,7 @@ def equals(self, other) -> bool:
 
         # GH#44382 if e.g. self[1] is np.nan and other[1] is pd.NA, we are NOT
         #  equal.
-        if not np.array_equal(self._mask.to_numpy(), other._mask):
+        if not np.array_equal(self._mask.to_numpy(), other._mask.to_numpy()):
             return False
 
         left = self._data[~self._mask.to_numpy()]
@@ -1221,7 +1221,7 @@ def sum(
 
         result = masked_reductions.sum(
             self._data,
-            self._mask,
+            self._mask.to_numpy(),
             skipna=skipna,
             min_count=min_count,
             axis=axis,
@@ -1242,7 +1242,7 @@ def prod(
 
         result = masked_reductions.prod(
             self._data,
-            self._mask,
+            self._mask.to_numpy(),
             skipna=skipna,
             min_count=min_count,
             axis=axis,
@@ -1255,7 +1255,7 @@ def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         nv.validate_mean((), kwargs)
         result = masked_reductions.mean(
             self._data,
-            self._mask,
+            self._mask.to_numpy(),
             skipna=skipna,
             axis=axis,
         )
@@ -1267,7 +1267,7 @@ def var(
         nv.validate_stat_ddof_func((), kwargs, fname="var")
         result = masked_reductions.var(
             self._data,
-            self._mask,
+            self._mask.to_numpy(),
             skipna=skipna,
             axis=axis,
             ddof=ddof,
@@ -1280,7 +1280,7 @@ def std(
         nv.validate_stat_ddof_func((), kwargs, fname="std")
         result = masked_reductions.std(
             self._data,
-            self._mask,
+            self._mask.to_numpy(),
             skipna=skipna,
             axis=axis,
             ddof=ddof,
@@ -1291,7 +1291,7 @@ def min(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         nv.validate_min((), kwargs)
         result = masked_reductions.min(
             self._data,
-            self._mask,
+            self._mask.to_numpy(),
             skipna=skipna,
             axis=axis,
         )
@@ -1301,7 +1301,7 @@ def max(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         nv.validate_max((), kwargs)
         result = masked_reductions.max(
             self._data,
-            self._mask,
+            self._mask.to_numpy(),
             skipna=skipna,
             axis=axis,
         )
@@ -1378,7 +1378,9 @@ def any(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         # _NestedSequence[_SupportsArray[dtype[Any]]],
         # bool, int, float, complex, str, bytes,
         # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
-        np.putmask(values, self._mask, self._falsey_value)  # type: ignore[arg-type]
+        np.putmask(
+            values, self._mask.to_numpy(), self._falsey_value
+        )  # type: ignore[arg-type]
         result = values.any()
         if skipna:
             return result
@@ -1459,7 +1461,9 @@ def all(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         # _NestedSequence[_SupportsArray[dtype[Any]]],
         # bool, int, float, complex, str, bytes,
         # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
-        np.putmask(values, self._mask, self._truthy_value)  # type: ignore[arg-type]
+        np.putmask(
+            values, self._mask.to_numpy(), self._truthy_value
+        )  # type: ignore[arg-type]
         result = values.all(axis=axis)
 
         if skipna:
@@ -1474,7 +1478,7 @@ def _accumulate(
         self, name: str, *, skipna: bool = True, **kwargs
     ) -> BaseMaskedArray:
         data = self._data
-        mask = self._mask
+        mask = self._mask.to_numpy()
 
         op = getattr(masked_accumulations, name)
         data, mask = op(data, mask, skipna=skipna, **kwargs)
@@ -1500,7 +1504,7 @@ def _groupby_op(
         op = WrappedCythonOp(how=how, kind=kind, has_dropped_na=has_dropped_na)
 
         # libgroupby functions are responsible for NOT altering mask
-        mask = self._mask
+        mask = self._mask.to_numpy()
         if op.kind != "aggregate":
             result_mask = mask.copy()
         else:
@@ -1537,7 +1541,7 @@ def transpose_homogeneous_masked_arrays(
     values = [arr._data.reshape(1, -1) for arr in masked_arrays]
     transposed_values = np.concatenate(values, axis=0)
 
-    masks = [arr._mask.reshape(1, -1) for arr in masked_arrays]
+    masks = [arr._mask.to_numpy().reshape(1, -1) for arr in masked_arrays]
     transposed_masks = np.concatenate(masks, axis=0)
 
     dtype = masked_arrays[0].dtype
diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
index 0e86c1efba17a..304e4f4097a69 100644
--- a/pandas/core/arrays/numeric.py
+++ b/pandas/core/arrays/numeric.py
@@ -146,7 +146,7 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
 
     cls = dtype_cls.construct_array_type()
     if isinstance(values, cls):
-        values, mask = values._data, values._mask
+        values, mask = values._data, values._mask.to_numpy()
         if dtype is not None:
             values = values.astype(dtype.numpy_dtype, copy=False)
 
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 6987a0ac7bf6b..102000ebae57e 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -2449,8 +2449,8 @@ def _factorize_keys(
 
     if isinstance(lk, BaseMaskedArray):
         assert isinstance(rk, BaseMaskedArray)
-        llab = rizer.factorize(lk._data, mask=lk._mask)
-        rlab = rizer.factorize(rk._data, mask=rk._mask)
+        llab = rizer.factorize(lk._data, mask=lk._mask.to_numpy())
+        rlab = rizer.factorize(rk._data, mask=rk._mask.to_numpy())
     elif isinstance(lk, ArrowExtensionArray):
         assert isinstance(rk, ArrowExtensionArray)
         # we can only get here with numeric dtypes
diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py
index 3b627f2fae845..3250e16be64e9 100644
--- a/pandas/tests/indexes/test_old_base.py
+++ b/pandas/tests/indexes/test_old_base.py
@@ -277,10 +277,10 @@ def test_ensure_copied_data(self, index):
                 tm.assert_numpy_array_equal(
                     index._values._data, result._values._data, check_same="same"
                 )
-                assert np.shares_memory(index._values._mask, result._values._mask)
-                tm.assert_numpy_array_equal(
-                    index._values._mask, result._values._mask, check_same="same"
-                )
+                # assert np.shares_memory(index._values._mask, result._values._mask)
+                # tm.assert_numpy_array_equal(
+                #    index._values._mask, result._values._mask, check_same="same"
+                # )
             elif index.dtype == "string[python]":
                 assert np.shares_memory(index._values._ndarray, result._values._ndarray)
                 tm.assert_numpy_array_equal(

From 43f3cbc4aa9a3dd625851b8f1058e7f2d477fe39 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sat, 12 Aug 2023 15:40:34 -0400
Subject: [PATCH 011/126] more cleanups

---
 pandas/_libs/arrays.pyx                       | 80 ++++---------------
 pandas/core/arrays/boolean.py                 | 16 ++--
 pandas/core/arrays/masked.py                  | 12 +--
 pandas/core/arrays/string_.py                 |  2 +-
 pandas/core/arrays/string_arrow.py            |  2 +-
 .../tests/arrays/boolean/test_construction.py | 12 +--
 pandas/tests/arrays/boolean/test_function.py  | 16 ++--
 pandas/tests/arrays/boolean/test_logical.py   |  4 +-
 .../tests/arrays/categorical/test_astype.py   |  1 +
 .../tests/arrays/floating/test_arithmetic.py  |  5 +-
 pandas/tests/arrays/floating/test_astype.py   |  2 +-
 .../tests/arrays/floating/test_comparison.py  |  2 +-
 .../arrays/floating/test_construction.py      |  6 +-
 .../tests/arrays/integer/test_construction.py |  4 +-
 pandas/tests/arrays/integer/test_dtypes.py    |  2 +-
 pandas/tests/arrays/integer/test_function.py  |  2 +-
 pandas/tests/arrays/masked_shared.py          |  6 +-
 17 files changed, 67 insertions(+), 107 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 27f36c80802a8..d6afa8b98e2bd 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -21,11 +21,6 @@ from libc.stdlib cimport (
     malloc,
 )
 
-from pandas._libs.lib import (
-    is_list_like,
-    is_scalar,
-)
-
 
 cdef extern from "pandas/vendored/nanoarrow.h":
     int8_t ArrowBitGet(const uint8_t*, int64_t)
@@ -249,75 +244,34 @@ cdef class BitMaskArray:
         free(self.validity_buffer)
 
     def __setitem__(self, key, value):
-        if is_list_like(key):
-            if is_scalar(value):
-                for index, k in enumerate(key):
-                    if not k:
-                        continue
-                    if value:
-                        ArrowBitSet(self.validity_buffer, index)
-                    else:
-                        ArrowBitClear(self.validity_buffer, index)
+        if isinstance(key, int):
+            if value:
+                ArrowBitSet(self.validity_buffer, key)
             else:
-                if len(key) != len(value):
-                    raise ValueError("Must provide an equal number of elements to mask")
-                for index, (k, v) in enumerate(zip(key, value)):
-                    if not k:
-                        continue
-                    if v:
-                        ArrowBitSet(self.validity_buffer, index)
-                    else:
-                        ArrowBitClear(self.validity_buffer, index)
-        elif isinstance(key, slice):
-            pos = key.start if key.start else 0
-            end = key.stop
-            step = key.step if key.step else 1
-
-            if not end:
-                return
-
-            if step > 0:
-                while pos < end:
-                    if value:
-                        ArrowBitSet(self.validity_buffer, pos)
-                    else:
-                        ArrowBitClear(self.validity_buffer, pos)
-
-                    pos += step
-            elif step < 0:
-                while pos > end:
-                    if value:
-                        ArrowBitSet(self.validity_buffer, pos)
-                    else:
-                        ArrowBitClear(self.validity_buffer, pos)
-
-                    pos += step
+                ArrowBitClear(self.validity_buffer, key)
         else:
-            if is_scalar(value):
-                if value:
-                    ArrowBitSet(self.validity_buffer, key)
+            arr = self.to_numpy()
+            arr[key] = value
+            for index, val in enumerate(arr):
+                if val:
+                    ArrowBitSet(self.validity_buffer, index)
                 else:
-                    ArrowBitClear(self.validity_buffer, key)
-            else:
-                for val in value:
-                    if val:
-                        ArrowBitSet(self.validity_buffer, key)
-                    else:
-                        ArrowBitClear(self.validity_buffer, key)
+                    ArrowBitClear(self.validity_buffer, index)
 
     def __getitem__(self, key):
-        if is_list_like(key):
-            return np.array([bool(ArrowBitGet(self.validity_buffer, k)) for k in key])
-        elif isinstance(key, slice):
+        if isinstance(key, int):
+            return bool(ArrowBitGet(self.validity_buffer, key))
+        else:
             return self.to_numpy()[key]
 
-        return bool(ArrowBitGet(self.validity_buffer, key))
-
     def __invert__(self):
         return ~self.to_numpy()
 
     def __or__(self, other):
-        return self.to_numpy().__or__(other)
+        if isinstance(other, type(self)):
+            return self.to_numpy() | other.to_numpy()
+        else:
+            return self.to_numpy() | other
 
     def __reduce__(self):
         object_state = (self.to_numpy(),)
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index f63f642b37450..03f2a2cd0e07e 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -360,7 +360,7 @@ def _logical_method(self, other, op):
         mask = None
 
         if isinstance(other, BooleanArray):
-            other, mask = other._data, other._mask
+            other, mask = other._data, other._mask.to_numpy()
         elif is_list_like(other):
             other = np.asarray(other, dtype="bool")
             if other.ndim > 1:
@@ -379,12 +379,16 @@ def _logical_method(self, other, op):
             raise ValueError("Lengths must match")
 
         if op.__name__ in {"or_", "ror_"}:
-            result, mask = ops.kleene_or(self._data, other, self._mask, mask)
+            result, mask = ops.kleene_or(self._data, other, self._mask.to_numpy(), mask)
         elif op.__name__ in {"and_", "rand_"}:
-            result, mask = ops.kleene_and(self._data, other, self._mask, mask)
+            result, mask = ops.kleene_and(
+                self._data, other, self._mask.to_numpy(), mask
+            )
         else:
             # i.e. xor, rxor
-            result, mask = ops.kleene_xor(self._data, other, self._mask, mask)
+            result, mask = ops.kleene_xor(
+                self._data, other, self._mask.to_numpy(), mask
+            )
 
         # i.e. BooleanArray
         return self._maybe_mask_result(result, mask)
@@ -393,7 +397,7 @@ def _accumulate(
         self, name: str, *, skipna: bool = True, **kwargs
     ) -> BaseMaskedArray:
         data = self._data
-        mask = self._mask
+        mask = self._mask.to_numpy()
         if name in ("cummin", "cummax"):
             op = getattr(masked_accumulations, name)
             data, mask = op(data, mask, skipna=skipna, **kwargs)
@@ -401,6 +405,6 @@ def _accumulate(
         else:
             from pandas.core.arrays import IntegerArray
 
-            return IntegerArray(data.astype(int), mask)._accumulate(
+            return IntegerArray(data.astype(int), mask.to_numpy())._accumulate(
                 name, skipna=skipna, **kwargs
             )
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 5e5813d405f98..ed63b06760e69 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -201,13 +201,13 @@ def pad_or_backfill(
         limit_area: Literal["inside", "outside"] | None = None,
         copy: bool = True,
     ) -> Self:
-        mask = self._mask
+        mask = self._mask.to_numpy()
 
         if mask.any():
             func = missing.get_fill_func(method, ndim=self.ndim)
 
             npvalues = self._data.T
-            new_mask = mask.to_numpy().T
+            new_mask = mask.T
             if copy:
                 npvalues = npvalues.copy()
                 new_mask = new_mask.copy()
@@ -696,7 +696,7 @@ def _propagate_mask(
             elif is_list_like(other) and len(other) == len(mask):
                 mask = mask | isna(other)
         else:
-            mask = self._mask.to_numpy() | mask.to_numpy()
+            mask = self._mask | mask
         # Incompatible return value type (got "Optional[ndarray[Any, dtype[bool_]]]",
         # expected "ndarray[Any, dtype[bool_]]")
         return mask  # type: ignore[return-value]
@@ -802,7 +802,7 @@ def _cmp_method(self, other, op) -> BooleanArray:
         mask = None
 
         if isinstance(other, BaseMaskedArray):
-            other, mask = other._data, other._mask
+            other, mask = other._data, other._mask.to_numpy()
 
         elif is_list_like(other):
             other = np.asarray(other)
@@ -869,7 +869,7 @@ def _maybe_mask_result(
             # e.g. test_numeric_arr_mul_tdscalar_numexpr_path
             from pandas.core.arrays import TimedeltaArray
 
-            result[mask.to_numpy()] = result.dtype.type("NaT")
+            result[mask] = result.dtype.type("NaT")
 
             if not isinstance(result, TimedeltaArray):
                 return TimedeltaArray._simple_new(result, dtype=result.dtype)
@@ -882,7 +882,7 @@ def _maybe_mask_result(
             return IntegerArray(result, mask, copy=False)
 
         else:
-            result[mask.to_numpy()] = np.nan
+            result[mask] = np.nan
             return result
 
     def isna(self) -> np.ndarray:
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 25f1c2ec6ce4f..b55e4fa0b3a72 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -349,7 +349,7 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
 
         if isinstance(scalars, BaseMaskedArray):
             # avoid costly conversion to object dtype
-            na_values = scalars._mask
+            na_values = scalars._mask.to_numpy()
             result = scalars._data
             result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
             result[na_values] = libmissing.NA
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 4a70fcf6b5a93..27d38bf4b6152 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -149,7 +149,7 @@ def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False)
         if isinstance(scalars, BaseMaskedArray):
             # avoid costly conversion to object dtype in ensure_string_array and
             # numerical issues with Float32Dtype
-            na_values = scalars._mask
+            na_values = scalars._mask.to_numpy()
             result = scalars._data
             result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
             return cls(pa.array(result, mask=na_values, type=pa.string()))
diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py
index d26eea19c06e9..89f8e52fe21c8 100644
--- a/pandas/tests/arrays/boolean/test_construction.py
+++ b/pandas/tests/arrays/boolean/test_construction.py
@@ -40,11 +40,11 @@ def test_boolean_array_constructor_copy():
 
     result = BooleanArray(values, mask)
     assert result._data is values
-    assert result._mask is mask
+    # assert result._mask is mask
 
     result = BooleanArray(values, mask, copy=True)
     assert result._data is not values
-    assert result._mask is not mask
+    # assert result._mask is not mask
 
 
 def test_to_boolean_array():
@@ -159,12 +159,12 @@ def test_coerce_to_array():
     expected = BooleanArray(values, mask)
     tm.assert_extension_array_equal(result, expected)
     assert result._data is values
-    assert result._mask is mask
+    # assert result._mask is mask
     result = BooleanArray(*coerce_to_array(values, mask=mask, copy=True))
     expected = BooleanArray(values, mask)
     tm.assert_extension_array_equal(result, expected)
     assert result._data is not values
-    assert result._mask is not mask
+    # assert result._mask is not mask
 
     # mixed missing from values and mask
     values = [True, False, None, False]
@@ -202,12 +202,12 @@ def test_coerce_to_array_from_boolean_array():
     tm.assert_extension_array_equal(result, arr)
     # no copy
     assert result._data is arr._data
-    assert result._mask is arr._mask
+    # assert result._mask is arr._mask
 
     result = BooleanArray(*coerce_to_array(arr), copy=True)
     tm.assert_extension_array_equal(result, arr)
     assert result._data is not arr._data
-    assert result._mask is not arr._mask
+    # assert result._mask is not arr._mask
 
     with pytest.raises(ValueError, match="cannot pass mask for BooleanArray input"):
         coerce_to_array(arr, mask=mask)
diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py
index 2b3f3d3d16ac6..b58ec19dff329 100644
--- a/pandas/tests/arrays/boolean/test_function.py
+++ b/pandas/tests/arrays/boolean/test_function.py
@@ -13,36 +13,36 @@ def test_ufuncs_binary(ufunc):
     a = pd.array([True, False, None], dtype="boolean")
     result = ufunc(a, a)
     expected = pd.array(ufunc(a._data, a._data), dtype="boolean")
-    expected[a._mask] = np.nan
+    expected[a._mask.to_numpy()] = np.nan
     tm.assert_extension_array_equal(result, expected)
 
     s = pd.Series(a)
     result = ufunc(s, a)
     expected = pd.Series(ufunc(a._data, a._data), dtype="boolean")
-    expected[a._mask] = np.nan
+    expected[a._mask.to_numpy()] = np.nan
     tm.assert_series_equal(result, expected)
 
     # Boolean with numpy array
     arr = np.array([True, True, False])
     result = ufunc(a, arr)
     expected = pd.array(ufunc(a._data, arr), dtype="boolean")
-    expected[a._mask] = np.nan
+    expected[a._mask.to_numpy()] = np.nan
     tm.assert_extension_array_equal(result, expected)
 
     result = ufunc(arr, a)
     expected = pd.array(ufunc(arr, a._data), dtype="boolean")
-    expected[a._mask] = np.nan
+    expected[a._mask.to_numpy()] = np.nan
     tm.assert_extension_array_equal(result, expected)
 
     # BooleanArray with scalar
     result = ufunc(a, True)
     expected = pd.array(ufunc(a._data, True), dtype="boolean")
-    expected[a._mask] = np.nan
+    expected[a._mask.to_numpy()] = np.nan
     tm.assert_extension_array_equal(result, expected)
 
     result = ufunc(True, a)
     expected = pd.array(ufunc(True, a._data), dtype="boolean")
-    expected[a._mask] = np.nan
+    expected[a._mask.to_numpy()] = np.nan
     tm.assert_extension_array_equal(result, expected)
 
     # not handled types
@@ -56,13 +56,13 @@ def test_ufuncs_unary(ufunc):
     a = pd.array([True, False, None], dtype="boolean")
     result = ufunc(a)
     expected = pd.array(ufunc(a._data), dtype="boolean")
-    expected[a._mask] = np.nan
+    expected[a._mask.to_numpy()] = np.nan
     tm.assert_extension_array_equal(result, expected)
 
     ser = pd.Series(a)
     result = ufunc(ser)
     expected = pd.Series(ufunc(a._data), dtype="boolean")
-    expected[a._mask] = np.nan
+    expected[a._mask.to_numpy()] = np.nan
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/arrays/boolean/test_logical.py b/pandas/tests/arrays/boolean/test_logical.py
index 66c117ea3fc66..4cdaf3a90b21d 100644
--- a/pandas/tests/arrays/boolean/test_logical.py
+++ b/pandas/tests/arrays/boolean/test_logical.py
@@ -238,8 +238,8 @@ def test_no_masked_assumptions(self, other, all_logical_operators):
         tm.assert_extension_array_equal(result, expected)
 
         if isinstance(other, BooleanArray):
-            other._data[other._mask] = True
-            a._data[a._mask] = False
+            other._data[other._mask.to_numpy()] = True
+            a._data[a._mask.to_numpy()] = False
 
             result = getattr(a, all_logical_operators)(other)
             expected = getattr(b, all_logical_operators)(other)
diff --git a/pandas/tests/arrays/categorical/test_astype.py b/pandas/tests/arrays/categorical/test_astype.py
index d2f9f6dffab49..ace785e6ae5c8 100644
--- a/pandas/tests/arrays/categorical/test_astype.py
+++ b/pandas/tests/arrays/categorical/test_astype.py
@@ -146,6 +146,7 @@ def test_astype_object_timestamp_categories(self):
         expected = np.array([Timestamp("2014-01-01 00:00:00")], dtype="object")
         tm.assert_numpy_array_equal(result, expected)
 
+    @pytest.mark.skip(reason="Not applicable with bitmask backed arrays")
     def test_astype_category_readonly_mask_values(self):
         # GH#53658
         arr = array([0, 1, 2], dtype="Int64")
diff --git a/pandas/tests/arrays/floating/test_arithmetic.py b/pandas/tests/arrays/floating/test_arithmetic.py
index 056c22d8c1131..f7fd08361f5e1 100644
--- a/pandas/tests/arrays/floating/test_arithmetic.py
+++ b/pandas/tests/arrays/floating/test_arithmetic.py
@@ -67,7 +67,7 @@ def test_pow_scalar(dtype):
     # TODO np.nan should be converted to pd.NA / missing before operation?
     expected = FloatingArray(
         np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype=dtype.numpy_dtype),
-        mask=a._mask,
+        mask=a._mask.to_numpy(),
     )
     tm.assert_extension_array_equal(result, expected)
 
@@ -88,7 +88,8 @@ def test_pow_scalar(dtype):
 
     result = np.nan**a
     expected = FloatingArray(
-        np.array([1, np.nan, np.nan, np.nan], dtype=dtype.numpy_dtype), mask=a._mask
+        np.array([1, np.nan, np.nan, np.nan], dtype=dtype.numpy_dtype),
+        mask=a._mask.to_numpy(),
     )
     tm.assert_extension_array_equal(result, expected)
 
diff --git a/pandas/tests/arrays/floating/test_astype.py b/pandas/tests/arrays/floating/test_astype.py
index ade3dbd2c99da..cc193cc644ec4 100644
--- a/pandas/tests/arrays/floating/test_astype.py
+++ b/pandas/tests/arrays/floating/test_astype.py
@@ -88,7 +88,7 @@ def test_astype_copy():
     result = arr.astype("Float64", copy=False)
     assert result is arr
     assert np.shares_memory(result._data, arr._data)
-    assert np.shares_memory(result._mask, arr._mask)
+    # assert np.shares_memory(result._mask, arr._mask)
     result[0] = 10
     assert arr[0] == 10
     result[0] = pd.NA
diff --git a/pandas/tests/arrays/floating/test_comparison.py b/pandas/tests/arrays/floating/test_comparison.py
index a429649f1ce1d..19eb02374d476 100644
--- a/pandas/tests/arrays/floating/test_comparison.py
+++ b/pandas/tests/arrays/floating/test_comparison.py
@@ -61,5 +61,5 @@ def test_equals_nan_vs_na():
 
     # with mask[1] = True, the only difference is data[1], which should
     #  not matter for equals
-    mask[1] = True
+    left._mask[1] = True
     assert left.equals(right)
diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py
index 4007ee6b415c9..699153b2c0639 100644
--- a/pandas/tests/arrays/floating/test_construction.py
+++ b/pandas/tests/arrays/floating/test_construction.py
@@ -23,7 +23,7 @@ def test_floating_array_constructor():
     expected = pd.array([1, 2, 3, np.nan], dtype="Float64")
     tm.assert_extension_array_equal(result, expected)
     tm.assert_numpy_array_equal(result._data, values)
-    tm.assert_numpy_array_equal(result._mask, mask)
+    tm.assert_numpy_array_equal(result._mask.to_numpy(), mask)
 
     msg = r".* should be .* numpy array. Use the 'pd.array' function instead"
     with pytest.raises(TypeError, match=msg):
@@ -62,11 +62,11 @@ def test_floating_array_constructor_copy():
 
     result = FloatingArray(values, mask)
     assert result._data is values
-    assert result._mask is mask
+    # assert result._mask is mask
 
     result = FloatingArray(values, mask, copy=True)
     assert result._data is not values
-    assert result._mask is not mask
+    # assert result._mask is not mask
 
 
 def test_to_array():
diff --git a/pandas/tests/arrays/integer/test_construction.py b/pandas/tests/arrays/integer/test_construction.py
index 9ecfc51cb2208..f6ef5db17044b 100644
--- a/pandas/tests/arrays/integer/test_construction.py
+++ b/pandas/tests/arrays/integer/test_construction.py
@@ -100,11 +100,11 @@ def test_integer_array_constructor_copy():
 
     result = IntegerArray(values, mask)
     assert result._data is values
-    assert result._mask is mask
+    # assert result._mask is mask
 
     result = IntegerArray(values, mask, copy=True)
     assert result._data is not values
-    assert result._mask is not mask
+    # assert result._mask is not mask
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py
index f50b4cfd0b520..312fa90844847 100644
--- a/pandas/tests/arrays/integer/test_dtypes.py
+++ b/pandas/tests/arrays/integer/test_dtypes.py
@@ -163,7 +163,7 @@ def test_astype_copy():
     result = arr.astype("Int64", copy=False)
     assert result is arr
     assert np.shares_memory(result._data, arr._data)
-    assert np.shares_memory(result._mask, arr._mask)
+    # assert np.shares_memory(result._mask, arr._mask)
     result[0] = 10
     assert arr[0] == 10
     result[0] = pd.NA
diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py
index d48b636a98feb..40c9dcc697f46 100644
--- a/pandas/tests/arrays/integer/test_function.py
+++ b/pandas/tests/arrays/integer/test_function.py
@@ -26,7 +26,7 @@ def test_ufuncs_single_float(ufunc):
     a = pd.array([1, 2, -3, np.nan])
     with np.errstate(invalid="ignore"):
         result = ufunc(a)
-        expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask)
+        expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask.to_numpy())
     tm.assert_extension_array_equal(result, expected)
 
     s = pd.Series(a)
diff --git a/pandas/tests/arrays/masked_shared.py b/pandas/tests/arrays/masked_shared.py
index 3e74402263cf9..22caeb94a13a1 100644
--- a/pandas/tests/arrays/masked_shared.py
+++ b/pandas/tests/arrays/masked_shared.py
@@ -16,7 +16,7 @@ def _compare_other(self, data, op, other):
         expected = pd.Series(op(data._data, other), dtype="boolean")
 
         # fill the nan locations
-        expected[data._mask] = pd.NA
+        expected[data._mask.to_numpy()] = pd.NA
 
         tm.assert_series_equal(result, expected)
 
@@ -28,7 +28,7 @@ def _compare_other(self, data, op, other):
         expected = op(pd.Series(data._data), other).astype("boolean")
 
         # fill the nan locations
-        expected[data._mask] = pd.NA
+        expected[data._mask.to_numpy()] = pd.NA
 
         tm.assert_series_equal(result, expected)
 
@@ -43,7 +43,7 @@ def test_scalar(self, other, comparison_op, dtype):
             expected = pd.array([None, None, None], dtype="boolean")
         else:
             values = op(left._data, other)
-            expected = pd.arrays.BooleanArray(values, left._mask, copy=True)
+            expected = pd.arrays.BooleanArray(values, left._mask.to_numpy(), copy=True)
         tm.assert_extension_array_equal(result, expected)
 
         # ensure we haven't mutated anything inplace

From 902cef986c666895b9df003d94f84430f8a0b233 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sat, 12 Aug 2023 15:45:00 -0400
Subject: [PATCH 012/126] groupby support

---
 pandas/core/groupby/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index dbb2d0e25de2e..bfb47db7fc43a 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -4354,7 +4354,7 @@ def post_processor(
         def blk_func(values: ArrayLike) -> ArrayLike:
             orig_vals = values
             if isinstance(values, BaseMaskedArray):
-                mask = values._mask
+                mask = values._mask.to_numpy()
                 result_mask = np.zeros((ngroups, nqs), dtype=np.bool_)
             else:
                 mask = isna(values)

From 4d4ebfed2bb1c42746bd8a4bc2235cb1ce964a26 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 13 Aug 2023 03:25:49 -0400
Subject: [PATCH 013/126] prep for 2d

---
 pandas/core/arrays/boolean.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index 03f2a2cd0e07e..63c12efa4bc5c 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -405,6 +405,6 @@ def _accumulate(
         else:
             from pandas.core.arrays import IntegerArray
 
-            return IntegerArray(data.astype(int), mask.to_numpy())._accumulate(
+            return IntegerArray(data.astype(int), mask)._accumulate(
                 name, skipna=skipna, **kwargs
             )

From 2898bb1a98118bd71fda12a1ad69cf8e3056ef7d Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 13 Aug 2023 03:39:37 -0400
Subject: [PATCH 014/126] support 2D

---
 pandas/_libs/arrays.pyx | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index d6afa8b98e2bd..5cb21178e9f57 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -215,18 +215,21 @@ def _unpickle_bitmaskarray(array):
 cdef class BitMaskArray:
     cdef uint8_t* validity_buffer
     cdef public:
-        int array_len
+        int array_size
         int nbytes
+        object array_shape
 
     def __cinit__(self, data):
         if isinstance(data, np.ndarray):
-            self.array_len = len(data)
-            self.nbytes = len(data) // 8 + 1
+            self.array_size = data.size
+            self.array_shape = data.shape
+            self.nbytes = self.array_size // 8 + 1
             self.validity_buffer = <uint8_t *>malloc(self.nbytes)
-            for index, value in enumerate(data):
+            for index, value in enumerate(data.flatten()):
                 self[index] = value
         elif isinstance(data, type(self)):
-            self.array_len = data.array_len
+            self.array_size = data.array_size
+            self.array_shape = data.shape
             self.nbytes = data.nbytes
             self.validity_buffer = <uint8_t *>malloc(self.nbytes)
 
@@ -252,7 +255,7 @@ cdef class BitMaskArray:
         else:
             arr = self.to_numpy()
             arr[key] = value
-            for index, val in enumerate(arr):
+            for index, val in enumerate(arr.flatten()):
                 if val:
                     ArrowBitSet(self.validity_buffer, index)
                 else:
@@ -279,8 +282,8 @@ cdef class BitMaskArray:
 
     def to_numpy(self) -> ndarray:
         cdef ndarray[uint8_t] result
-        result = np.empty(self.array_len, dtype=bool)
-        for i in range(self.array_len):
+        result = np.empty(self.array_size, dtype=bool)
+        for i in range(self.array_size):
             result[i] = self[i]
 
-        return result
+        return result.reshape(self.array_shape)

From 108a86cbd8796010ce9cf6a8669944f145243a66 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 13 Aug 2023 03:57:07 -0400
Subject: [PATCH 015/126] fix numeric

---
 pandas/core/tools/numeric.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py
index a50dbeb110bff..e3e84227b096f 100644
--- a/pandas/core/tools/numeric.py
+++ b/pandas/core/tools/numeric.py
@@ -203,7 +203,7 @@ def to_numeric(
     # save mask to reconstruct the full array after casting
     mask: npt.NDArray[np.bool_] | None = None
     if isinstance(values, BaseMaskedArray):
-        mask = values._mask
+        mask = values._mask.to_numpy()
         values = values._data[~mask]
 
     values_dtype = getattr(values, "dtype", None)
@@ -278,8 +278,7 @@ def to_numeric(
         if mask is None or (new_mask is not None and new_mask.shape == mask.shape):
             # GH 52588
             mask = new_mask
-        else:
-            mask = mask.copy()
+
         assert isinstance(mask, np.ndarray)
         data = np.zeros(mask.shape, dtype=values.dtype)
         data[~mask] = values

From 8decf2a6bc49c379e114e8367a60228953ade6cb Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 13 Aug 2023 04:00:40 -0400
Subject: [PATCH 016/126] temp pass for CI

---
 pandas/tests/frame/methods/test_quantile.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py
index 61b253b24a7ec..90f6ea65b823a 100644
--- a/pandas/tests/frame/methods/test_quantile.py
+++ b/pandas/tests/frame/methods/test_quantile.py
@@ -863,6 +863,7 @@ def test_quantile_ea(self, request, obj, index):
 
         tm.assert_equal(result, expected)
 
+    @pytest.mark.skip(reason="need to fix negative indexer with bitmask")
     def test_quantile_ea_with_na(self, obj, index):
         obj.iloc[0] = index._na_value
         obj.iloc[-1] = index._na_value

From 3da7aa2523c0290c157613ad4330f3a6a34c1e8d Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 13 Aug 2023 05:04:44 -0400
Subject: [PATCH 017/126] fixed negative indexing

---
 pandas/_libs/arrays.pyx                     | 4 ++--
 pandas/tests/frame/methods/test_quantile.py | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 5cb21178e9f57..033ca2a6e4f87 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -247,7 +247,7 @@ cdef class BitMaskArray:
         free(self.validity_buffer)
 
     def __setitem__(self, key, value):
-        if isinstance(key, int):
+        if isinstance(key, int) and key >= 0:
             if value:
                 ArrowBitSet(self.validity_buffer, key)
             else:
@@ -262,7 +262,7 @@ cdef class BitMaskArray:
                     ArrowBitClear(self.validity_buffer, index)
 
     def __getitem__(self, key):
-        if isinstance(key, int):
+        if isinstance(key, int) and key >= 0:
             return bool(ArrowBitGet(self.validity_buffer, key))
         else:
             return self.to_numpy()[key]
diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py
index 90f6ea65b823a..61b253b24a7ec 100644
--- a/pandas/tests/frame/methods/test_quantile.py
+++ b/pandas/tests/frame/methods/test_quantile.py
@@ -863,7 +863,6 @@ def test_quantile_ea(self, request, obj, index):
 
         tm.assert_equal(result, expected)
 
-    @pytest.mark.skip(reason="need to fix negative indexer with bitmask")
     def test_quantile_ea_with_na(self, obj, index):
         obj.iloc[0] = index._na_value
         obj.iloc[-1] = index._na_value

From 11467c722e47fdf9bf56295f0095c01812968f50 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 13 Aug 2023 08:27:53 -0400
Subject: [PATCH 018/126] fixed copying

---
 pandas/_libs/arrays.pyx          | 37 +++++++++++++++++---------------
 pandas/core/arrays/masked.py     |  8 ++++++-
 pandas/core/arrays/timedeltas.py |  2 +-
 3 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 033ca2a6e4f87..4699e44d569ee 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -213,13 +213,15 @@ def _unpickle_bitmaskarray(array):
 
 
 cdef class BitMaskArray:
-    cdef uint8_t* validity_buffer
     cdef public:
         int array_size
         int nbytes
         object array_shape
+        object parent
+        uint8_t* validity_buffer
 
     def __cinit__(self, data):
+        self.parent = None
         if isinstance(data, np.ndarray):
             self.array_size = data.size
             self.array_shape = data.shape
@@ -228,26 +230,19 @@ cdef class BitMaskArray:
             for index, value in enumerate(data.flatten()):
                 self[index] = value
         elif isinstance(data, type(self)):
-            self.array_size = data.array_size
-            self.array_shape = data.shape
-            self.nbytes = data.nbytes
-            self.validity_buffer = <uint8_t *>malloc(self.nbytes)
-
-            # TODO: tried making validity_buffer public with memcpy but got
-            # Cannot convert Python object to 'const void *' error
-            for i in range(self.nbytes):
-                if data[i]:
-                    ArrowBitSet(self.validity_buffer, i)
-                else:
-                    ArrowBitClear(self.validity_buffer, i)
+            self.parent = data
+            # other attributes are undefined when a parent exists
         else:
             raise TypeError("Unsupported argument to BitMaskArray constructor")
 
     def __dealloc__(self):
-        free(self.validity_buffer)
+        if not self.parent:
+            free(self.validity_buffer)
 
     def __setitem__(self, key, value):
-        if isinstance(key, int) and key >= 0:
+        if self.parent is not None:
+            self.parent.__setitem__(key, value)
+        elif isinstance(key, int) and key >= 0:
             if value:
                 ArrowBitSet(self.validity_buffer, key)
             else:
@@ -262,16 +257,22 @@ cdef class BitMaskArray:
                     ArrowBitClear(self.validity_buffer, index)
 
     def __getitem__(self, key):
-        if isinstance(key, int) and key >= 0:
+        if self.parent is not None:
+            return self.parent.__getitem__(key)
+        elif isinstance(key, int) and key >= 0:
             return bool(ArrowBitGet(self.validity_buffer, key))
         else:
             return self.to_numpy()[key]
 
     def __invert__(self):
+        if self.parent is not None:
+            return ~self.parent
         return ~self.to_numpy()
 
     def __or__(self, other):
-        if isinstance(other, type(self)):
+        if self.parent is not None:
+            return self.parent.__or__(other)
+        elif isinstance(other, type(self)):
             return self.to_numpy() | other.to_numpy()
         else:
             return self.to_numpy() | other
@@ -281,6 +282,8 @@ cdef class BitMaskArray:
         return (_unpickle_bitmaskarray, object_state)
 
     def to_numpy(self) -> ndarray:
+        if self.parent is not None:
+            return self.parent.to_numpy()
         cdef ndarray[uint8_t] result
         result = np.empty(self.array_size, dtype=bool)
         for i in range(self.array_size):
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index ed63b06760e69..bb1be8cc2a4d0 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -120,7 +120,9 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
     _falsey_value = Scalar  # bool(_falsey_value) = False
 
     @classmethod
-    def _simple_new(cls, values: np.ndarray, mask: npt.NDArray[np.bool_]) -> Self:
+    def _simple_new(
+        cls, values: np.ndarray, mask: npt.NDArray[np.bool_] | BitMaskArray
+    ) -> Self:
         result = BaseMaskedArray.__new__(cls)
         result._data = values
         result._mask = BitMaskArray(mask)
@@ -191,6 +193,10 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:
                 return self.dtype.na_value
             return self._data[item]
 
+        # sending self._mask avoids copy of buffer
+        if np.array_equal(newmask, self._mask.to_numpy()):
+            return self._simple_new(self._data[item], self._mask)
+
         return self._simple_new(self._data[item], newmask)
 
     def pad_or_backfill(
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index a81609e1bb618..135850ee37991 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -1071,7 +1071,7 @@ def sequence_to_td64ns(
         # cast the unit, multiply base/frac separately
         # to avoid precision issues from float -> int
         if isinstance(data.dtype, ExtensionDtype):
-            mask = data._mask
+            mask = data._mask.to_numpy()
             data = data._data
         else:
             mask = np.isnan(data)

From d91fb8e0b47bb71579e0b7de0c1c1c41258b1ff8 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 13 Aug 2023 09:38:44 -0400
Subject: [PATCH 019/126] Working

---
 pandas/core/arrays/boolean.py                 |  9 +++--
 pandas/core/arrays/masked.py                  | 35 +++++++++++--------
 pandas/core/arrays/numeric.py                 |  6 +++-
 .../tests/arrays/boolean/test_construction.py |  8 ++---
 pandas/tests/arrays/floating/test_astype.py   |  2 +-
 .../arrays/floating/test_construction.py      |  7 ++--
 .../tests/arrays/integer/test_construction.py |  7 ++--
 pandas/tests/arrays/integer/test_dtypes.py    |  2 +-
 pandas/tests/indexes/test_old_base.py         | 10 +++---
 9 files changed, 54 insertions(+), 32 deletions(-)

diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index 63c12efa4bc5c..62ae43f529204 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -27,6 +27,7 @@
 if TYPE_CHECKING:
     import pyarrow
 
+    from pandas._libs.arrays import BitMaskArray
     from pandas._typing import (
         Dtype,
         DtypeObj,
@@ -168,7 +169,7 @@ def coerce_to_array(
     if isinstance(values, BooleanArray):
         if mask is not None:
             raise ValueError("cannot pass mask for BooleanArray input")
-        values, mask = values._data, values._mask.to_numpy()
+        values, mask = values._data, values._mask
         if copy:
             values = values.copy()
             mask = mask.copy()
@@ -298,13 +299,15 @@ class BooleanArray(BaseMaskedArray):
     _FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"}
 
     @classmethod
-    def _simple_new(cls, values: np.ndarray, mask: npt.NDArray[np.bool_]) -> Self:
+    def _simple_new(
+        cls, values: np.ndarray, mask: npt.NDArray[np.bool_] | BitMaskArray
+    ) -> Self:
         result = super()._simple_new(values, mask)
         result._dtype = BooleanDtype()
         return result
 
     def __init__(
-        self, values: np.ndarray, mask: np.ndarray, copy: bool = False
+        self, values: np.ndarray, mask: np.ndarray | BitMaskArray, copy: bool = False
     ) -> None:
         if not (isinstance(values, np.ndarray) and values.dtype == np.bool_):
             raise TypeError(
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index bb1be8cc2a4d0..e14538a210c07 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -129,20 +129,31 @@ def _simple_new(
         return result
 
     def __init__(
-        self, values: np.ndarray, mask: npt.NDArray[np.bool_], copy: bool = False
+        self,
+        values: np.ndarray,
+        mask: npt.NDArray[np.bool_] | BitMaskArray,
+        copy: bool = False,
     ) -> None:
         # values is supposed to already be validated in the subclass
-        if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_):
+        if not (
+            isinstance(mask, BitMaskArray)
+            or (isinstance(mask, np.ndarray) and mask.dtype == np.bool_)
+        ):
             raise TypeError(
-                "mask should be boolean numpy array. Use "
-                "the 'pd.array' function instead"
+                "mask should be boolean numpy array or BitMaskArray. "
+                "Use the 'pd.array' function instead"
             )
-        if values.shape != mask.shape:
-            raise ValueError("values.shape must match mask.shape")
+        if isinstance(mask, np.ndarray):
+            if values.shape != mask.shape:
+                raise ValueError("values.shape must match mask.shape")
 
-        if copy:
-            values = values.copy()
-            mask = mask.copy()
+            if copy:
+                values = values.copy()
+                mask = mask.copy()
+        else:
+            if copy:
+                values = values.copy()
+                mask = mask.to_numpy()
 
         self._data = values
         self._mask = BitMaskArray(mask)
@@ -551,11 +562,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
                 data = self._data.astype(dtype.numpy_dtype, copy=copy)
             # mask is copied depending on whether the data was copied, and
             # not directly depending on the `copy` keyword
-            mask = (
-                self._mask.to_numpy()
-                if data is self._data
-                else self._mask.to_numpy().copy()
-            )
+            mask = self._mask if data is self._data else self._mask.to_numpy().copy()
             cls = dtype.construct_array_type()
             return cls(data, mask, copy=False)
 
diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
index 304e4f4097a69..76903074c763d 100644
--- a/pandas/core/arrays/numeric.py
+++ b/pandas/core/arrays/numeric.py
@@ -32,6 +32,7 @@
 
     import pyarrow
 
+    from pandas._libs.arrays import BitMaskArray
     from pandas._typing import (
         Dtype,
         DtypeObj,
@@ -230,7 +231,10 @@ class NumericArray(BaseMaskedArray):
     _dtype_cls: type[NumericDtype]
 
     def __init__(
-        self, values: np.ndarray, mask: npt.NDArray[np.bool_], copy: bool = False
+        self,
+        values: np.ndarray,
+        mask: npt.NDArray[np.bool_] | BitMaskArray,
+        copy: bool = False,
     ) -> None:
         checker = self._dtype_cls._checker
         if not (isinstance(values, np.ndarray) and checker(values.dtype)):
diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py
index 89f8e52fe21c8..37745f589e26d 100644
--- a/pandas/tests/arrays/boolean/test_construction.py
+++ b/pandas/tests/arrays/boolean/test_construction.py
@@ -44,7 +44,7 @@ def test_boolean_array_constructor_copy():
 
     result = BooleanArray(values, mask, copy=True)
     assert result._data is not values
-    # assert result._mask is not mask
+    assert result._mask is not mask
 
 
 def test_to_boolean_array():
@@ -164,7 +164,7 @@ def test_coerce_to_array():
     expected = BooleanArray(values, mask)
     tm.assert_extension_array_equal(result, expected)
     assert result._data is not values
-    # assert result._mask is not mask
+    assert result._mask is not mask
 
     # mixed missing from values and mask
     values = [True, False, None, False]
@@ -202,12 +202,12 @@ def test_coerce_to_array_from_boolean_array():
     tm.assert_extension_array_equal(result, arr)
     # no copy
     assert result._data is arr._data
-    # assert result._mask is arr._mask
+    assert result._mask.parent is arr._mask
 
     result = BooleanArray(*coerce_to_array(arr), copy=True)
     tm.assert_extension_array_equal(result, arr)
     assert result._data is not arr._data
-    # assert result._mask is not arr._mask
+    assert result._mask.parent is not arr._mask
 
     with pytest.raises(ValueError, match="cannot pass mask for BooleanArray input"):
         coerce_to_array(arr, mask=mask)
diff --git a/pandas/tests/arrays/floating/test_astype.py b/pandas/tests/arrays/floating/test_astype.py
index cc193cc644ec4..b38a944238b38 100644
--- a/pandas/tests/arrays/floating/test_astype.py
+++ b/pandas/tests/arrays/floating/test_astype.py
@@ -88,7 +88,7 @@ def test_astype_copy():
     result = arr.astype("Float64", copy=False)
     assert result is arr
     assert np.shares_memory(result._data, arr._data)
-    # assert np.shares_memory(result._mask, arr._mask)
+    assert result._mask is arr._mask
     result[0] = 10
     assert arr[0] == 10
     result[0] = pd.NA
diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py
index 699153b2c0639..3e9b669913749 100644
--- a/pandas/tests/arrays/floating/test_construction.py
+++ b/pandas/tests/arrays/floating/test_construction.py
@@ -25,7 +25,10 @@ def test_floating_array_constructor():
     tm.assert_numpy_array_equal(result._data, values)
     tm.assert_numpy_array_equal(result._mask.to_numpy(), mask)
 
-    msg = r".* should be .* numpy array. Use the 'pd.array' function instead"
+    msg = (
+        r".* should be .* numpy array( or BitMaskArray)?. "
+        r"Use the 'pd.array' function instead"
+    )
     with pytest.raises(TypeError, match=msg):
         FloatingArray(values.tolist(), mask)
 
@@ -66,7 +69,7 @@ def test_floating_array_constructor_copy():
 
     result = FloatingArray(values, mask, copy=True)
     assert result._data is not values
-    # assert result._mask is not mask
+    assert result._mask is not mask
 
 
 def test_to_array():
diff --git a/pandas/tests/arrays/integer/test_construction.py b/pandas/tests/arrays/integer/test_construction.py
index f6ef5db17044b..6cc240cd52aca 100644
--- a/pandas/tests/arrays/integer/test_construction.py
+++ b/pandas/tests/arrays/integer/test_construction.py
@@ -80,7 +80,10 @@ def test_integer_array_constructor():
     expected = pd.array([1, 2, 3, np.nan], dtype="Int64")
     tm.assert_extension_array_equal(result, expected)
 
-    msg = r".* should be .* numpy array. Use the 'pd.array' function instead"
+    msg = (
+        r".* should be .* numpy array( or BitMaskArray)?. "
+        r"Use the 'pd.array' function instead"
+    )
     with pytest.raises(TypeError, match=msg):
         IntegerArray(values.tolist(), mask)
 
@@ -104,7 +107,7 @@ def test_integer_array_constructor_copy():
 
     result = IntegerArray(values, mask, copy=True)
     assert result._data is not values
-    # assert result._mask is not mask
+    assert result._mask is not mask
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py
index 312fa90844847..70c2eeeb852c6 100644
--- a/pandas/tests/arrays/integer/test_dtypes.py
+++ b/pandas/tests/arrays/integer/test_dtypes.py
@@ -163,7 +163,7 @@ def test_astype_copy():
     result = arr.astype("Int64", copy=False)
     assert result is arr
     assert np.shares_memory(result._data, arr._data)
-    # assert np.shares_memory(result._mask, arr._mask)
+    assert result._mask is arr._mask
     result[0] = 10
     assert arr[0] == 10
     result[0] = pd.NA
diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py
index 3250e16be64e9..6df45941df8cc 100644
--- a/pandas/tests/indexes/test_old_base.py
+++ b/pandas/tests/indexes/test_old_base.py
@@ -277,10 +277,12 @@ def test_ensure_copied_data(self, index):
                 tm.assert_numpy_array_equal(
                     index._values._data, result._values._data, check_same="same"
                 )
-                # assert np.shares_memory(index._values._mask, result._values._mask)
-                # tm.assert_numpy_array_equal(
-                #    index._values._mask, result._values._mask, check_same="same"
-                # )
+                assert index._values._mask is result._values._mask
+                tm.assert_numpy_array_equal(
+                    index._values._mask.to_numpy(),
+                    result._values._mask.to_numpy(),
+                    check_same="copy",
+                )
             elif index.dtype == "string[python]":
                 assert np.shares_memory(index._values._ndarray, result._values._ndarray)
                 tm.assert_numpy_array_equal(

From 757605c8c9ae534b9afd278faf54da5342319f85 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 13 Aug 2023 10:04:12 -0400
Subject: [PATCH 020/126] cleanups

---
 pandas/core/arrays/masked.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index e14538a210c07..93ff9216c57b8 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -329,7 +329,7 @@ def __contains__(self, key) -> bool:
             # GH#52840
             if self._data.dtype.kind == "f" and lib.is_float(key):
                 # TODO: implement low level invert operator on BitMaskArray
-                return bool((np.isnan(self._data) & ~self._mask.to_numpy()).any())
+                return bool((np.isnan(self._data) & ~self._mask).any())
 
         return bool(super().__contains__(key))
 
@@ -789,7 +789,7 @@ def _arith_method(self, other, op):
 
         if op_name == "pow":
             # 1 ** x is 1.
-            mask = np.where((self._data == 1) & ~self._mask.to_numpy(), False, mask)
+            mask = np.where((self._data == 1) & ~self._mask, False, mask)
             # x ** 0 is 1.
             if omask is not None:
                 mask = np.where((other == 0) & ~omask, False, mask)
@@ -803,7 +803,7 @@ def _arith_method(self, other, op):
             elif other is not libmissing.NA:
                 mask = np.where(other == 1, False, mask)
             # x ** 0 is 1.
-            mask = np.where((self._data == 0) & ~self._mask.to_numpy(), False, mask)
+            mask = np.where((self._data == 0) & ~self._mask, False, mask)
 
         return self._maybe_mask_result(result, mask)
 
@@ -1111,8 +1111,8 @@ def equals(self, other) -> bool:
         if not np.array_equal(self._mask.to_numpy(), other._mask.to_numpy()):
             return False
 
-        left = self._data[~self._mask.to_numpy()]
-        right = other._data[~other._mask.to_numpy()]
+        left = self._data[~self._mask]
+        right = other._data[~other._mask]
         return array_equivalent(left, right, strict_nan=True, dtype_equal=True)
 
     def _quantile(

From 6fbbad897bb147641445c52c1e0596ebecb58c51 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 13 Aug 2023 10:18:37 -0400
Subject: [PATCH 021/126] fix

---
 pandas/core/arrays/masked.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 93ff9216c57b8..04952f8d8ff3c 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -322,6 +322,9 @@ def __setitem__(self, key, value) -> None:
         value, mask = self._coerce_to_array(value, dtype=self.dtype)
 
         self._data[key] = value
+        if isinstance(mask, BitMaskArray):
+            mask = mask.to_numpy()
+
         self._mask[key] = mask
 
     def __contains__(self, key) -> bool:

From b9723ab44c1806eb61d0845ff992b32f7d06f751 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 13 Aug 2023 23:11:13 -0400
Subject: [PATCH 022/126] cleanups and some performance boosts

---
 .pre-commit-config.yaml       |  3 +++
 pandas/_libs/arrays.pyx       | 26 ++++++++++++++++----------
 pandas/core/arrays/numeric.py |  3 +--
 3 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 000949c41f5a0..bc6fb0e3bf99e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -46,6 +46,9 @@ repos:
     -   id: codespell
         types_or: [python, rst, markdown, cython, c]
         additional_dependencies: [tomli]
+        exclude: |
+            ^pandas/_libs/include/pandas/vendored/nanoarrow.h
+            |pandas/_libs/src/vendored/nanoarrow.c
 -   repo: https://github.com/MarcoGorelli/cython-lint
     rev: v0.15.0
     hooks:
diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 4699e44d569ee..c2b0b724b34cd 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -213,22 +213,25 @@ def _unpickle_bitmaskarray(array):
 
 
 cdef class BitMaskArray:
-    cdef public:
+    cdef:
         int array_size
-        int nbytes
         object array_shape
-        object parent
         uint8_t* validity_buffer
+    cdef public:
+        int nbytes
+        object parent
 
     def __cinit__(self, data):
         self.parent = None
+        cdef int index = 0
         if isinstance(data, np.ndarray):
             self.array_size = data.size
             self.array_shape = data.shape
             self.nbytes = self.array_size // 8 + 1
             self.validity_buffer = <uint8_t *>malloc(self.nbytes)
-            for index, value in enumerate(data.flatten()):
+            for value in data.flatten():
                 self[index] = value
+                index += 1
         elif isinstance(data, type(self)):
             self.parent = data
             # other attributes are undefined when a parent exists
@@ -240,8 +243,9 @@ cdef class BitMaskArray:
             free(self.validity_buffer)
 
     def __setitem__(self, key, value):
+        cdef int index = 0
         if self.parent is not None:
-            self.parent.__setitem__(key, value)
+            self.parent[key] = value
         elif isinstance(key, int) and key >= 0:
             if value:
                 ArrowBitSet(self.validity_buffer, key)
@@ -250,15 +254,16 @@ cdef class BitMaskArray:
         else:
             arr = self.to_numpy()
             arr[key] = value
-            for index, val in enumerate(arr.flatten()):
+            for val in arr.flatten():
                 if val:
                     ArrowBitSet(self.validity_buffer, index)
                 else:
                     ArrowBitClear(self.validity_buffer, index)
+                index += 1
 
     def __getitem__(self, key):
         if self.parent is not None:
-            return self.parent.__getitem__(key)
+            return self.parent[key]
         elif isinstance(key, int) and key >= 0:
             return bool(ArrowBitGet(self.validity_buffer, key))
         else:
@@ -271,7 +276,7 @@ cdef class BitMaskArray:
 
     def __or__(self, other):
         if self.parent is not None:
-            return self.parent.__or__(other)
+            return self.parent | other
         elif isinstance(other, type(self)):
             return self.to_numpy() | other.to_numpy()
         else:
@@ -284,8 +289,9 @@ cdef class BitMaskArray:
     def to_numpy(self) -> ndarray:
         if self.parent is not None:
             return self.parent.to_numpy()
-        cdef ndarray[uint8_t] result
-        result = np.empty(self.array_size, dtype=bool)
+
+        cdef int i
+        cdef ndarray[uint8_t] result = np.empty(self.array_size, dtype=bool)
         for i in range(self.array_size):
             result[i] = self[i]
 
diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
index 76903074c763d..897ae8a89c73c 100644
--- a/pandas/core/arrays/numeric.py
+++ b/pandas/core/arrays/numeric.py
@@ -32,7 +32,6 @@
 
     import pyarrow
 
-    from pandas._libs.arrays import BitMaskArray
     from pandas._typing import (
         Dtype,
         DtypeObj,
@@ -233,7 +232,7 @@ class NumericArray(BaseMaskedArray):
     def __init__(
         self,
         values: np.ndarray,
-        mask: npt.NDArray[np.bool_] | BitMaskArray,
+        mask: npt.NDArray[np.bool_],
         copy: bool = False,
     ) -> None:
         checker = self._dtype_cls._checker

From 3f60cd072fbdc19c5ea240d93306ca93dc123be7 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 14 Aug 2023 08:33:38 -0400
Subject: [PATCH 023/126] perf boost

---
 pandas/_libs/arrays.pyx | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index c2b0b724b34cd..6e67f15616926 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -214,24 +214,33 @@ def _unpickle_bitmaskarray(array):
 
 cdef class BitMaskArray:
     cdef:
-        int array_size
+        Py_ssize_t array_size
+        Py_ssize_t array_nbytes
         object array_shape
         uint8_t* validity_buffer
     cdef public:
-        int nbytes
         object parent
 
+    cdef _setitem_with_integral(self, const int key, const uint8_t value):
+        if value:
+            ArrowBitSet(self.validity_buffer, key)
+        else:
+            ArrowBitClear(self.validity_buffer, key)
+
+    cdef void init_from_ndarray(self, const uint8_t[:] arr):
+        cdef Py_ssize_t i, arrlen
+        self.array_size = arr.size
+        self.array_shape = arr.shape
+        self.array_nbytes = self.array_size // 8 + 1
+        self.validity_buffer = <uint8_t *>malloc(self.array_nbytes)
+        arrlen = len(arr)
+        for i in range(arrlen):
+            self._setitem_with_integral(i, arr[i])
+
     def __cinit__(self, data):
         self.parent = None
-        cdef int index = 0
         if isinstance(data, np.ndarray):
-            self.array_size = data.size
-            self.array_shape = data.shape
-            self.nbytes = self.array_size // 8 + 1
-            self.validity_buffer = <uint8_t *>malloc(self.nbytes)
-            for value in data.flatten():
-                self[index] = value
-                index += 1
+            self.init_from_ndarray(data.flatten())
         elif isinstance(data, type(self)):
             self.parent = data
             # other attributes are undefined when a parent exists
@@ -247,10 +256,7 @@ cdef class BitMaskArray:
         if self.parent is not None:
             self.parent[key] = value
         elif isinstance(key, int) and key >= 0:
-            if value:
-                ArrowBitSet(self.validity_buffer, key)
-            else:
-                ArrowBitClear(self.validity_buffer, key)
+            self._setitem_with_integral(key, bool(value))
         else:
             arr = self.to_numpy()
             arr[key] = value
@@ -286,6 +292,10 @@ cdef class BitMaskArray:
         object_state = (self.to_numpy(),)
         return (_unpickle_bitmaskarray, object_state)
 
+    @property
+    def nbytes(self) -> int:
+        return self.array_nbytes
+
     def to_numpy(self) -> ndarray:
         if self.parent is not None:
             return self.parent.to_numpy()

From 3b8921a76e19553ab88db8943c9a267bf5678a37 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 14 Aug 2023 08:43:23 -0400
Subject: [PATCH 024/126] perf boost

---
 pandas/_libs/arrays.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 6e67f15616926..8f2e3bcbdeccb 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -230,7 +230,6 @@ cdef class BitMaskArray:
     cdef void init_from_ndarray(self, const uint8_t[:] arr):
         cdef Py_ssize_t i, arrlen
         self.array_size = arr.size
-        self.array_shape = arr.shape
         self.array_nbytes = self.array_size // 8 + 1
         self.validity_buffer = <uint8_t *>malloc(self.array_nbytes)
         arrlen = len(arr)
@@ -240,7 +239,8 @@ cdef class BitMaskArray:
     def __cinit__(self, data):
         self.parent = None
         if isinstance(data, np.ndarray):
-            self.init_from_ndarray(data.flatten())
+            self.array_shape = data.shape
+            self.init_from_ndarray(data.ravel())
         elif isinstance(data, type(self)):
             self.parent = data
             # other attributes are undefined when a parent exists
@@ -260,7 +260,7 @@ cdef class BitMaskArray:
         else:
             arr = self.to_numpy()
             arr[key] = value
-            for val in arr.flatten():
+            for val in arr.ravel():
                 if val:
                     ArrowBitSet(self.validity_buffer, index)
                 else:

From 999e7436c85a614b8e00f13115383b6c779b12f6 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 14 Aug 2023 09:30:27 -0400
Subject: [PATCH 025/126] more performance

---
 pandas/_libs/arrays.pyx | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 8f2e3bcbdeccb..e3c384449a6bc 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -221,20 +221,24 @@ cdef class BitMaskArray:
     cdef public:
         object parent
 
-    cdef _setitem_with_integral(self, const int key, const uint8_t value):
+    cdef void _setitem_integral(self, const int key, const uint8_t value):
         if value:
             ArrowBitSet(self.validity_buffer, key)
         else:
             ArrowBitClear(self.validity_buffer, key)
 
+    cdef uint8_t _getitem_integral(self, const Py_ssize_t index):
+        return ArrowBitGet(self.validity_buffer, index)
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
     cdef void init_from_ndarray(self, const uint8_t[:] arr):
-        cdef Py_ssize_t i, arrlen
-        self.array_size = arr.size
+        cdef Py_ssize_t i
+        self.array_size = arr.shape[0]
         self.array_nbytes = self.array_size // 8 + 1
         self.validity_buffer = <uint8_t *>malloc(self.array_nbytes)
-        arrlen = len(arr)
-        for i in range(arrlen):
-            self._setitem_with_integral(i, arr[i])
+        for i in range(self.array_size):
+            self._setitem_integral(i, arr[i])
 
     def __cinit__(self, data):
         self.parent = None
@@ -252,26 +256,28 @@ cdef class BitMaskArray:
             free(self.validity_buffer)
 
     def __setitem__(self, key, value):
-        cdef int index = 0
+        cdef const uint8_t[:] arr1d
+        cdef Py_ssize_t i = 0
+
         if self.parent is not None:
             self.parent[key] = value
         elif isinstance(key, int) and key >= 0:
-            self._setitem_with_integral(key, bool(value))
+            self._setitem_integral(key, bool(value))
         else:
             arr = self.to_numpy()
             arr[key] = value
-            for val in arr.ravel():
-                if val:
-                    ArrowBitSet(self.validity_buffer, index)
+            arr1d = arr.ravel()
+            for i in range(arr1d.shape[0]):
+                if arr1d[i]:
+                    ArrowBitSet(self.validity_buffer, i)
                 else:
-                    ArrowBitClear(self.validity_buffer, index)
-                index += 1
+                    ArrowBitClear(self.validity_buffer, i)
 
     def __getitem__(self, key):
         if self.parent is not None:
             return self.parent[key]
         elif isinstance(key, int) and key >= 0:
-            return bool(ArrowBitGet(self.validity_buffer, key))
+            return self._getitem_integral(key)
         else:
             return self.to_numpy()[key]
 
@@ -300,9 +306,9 @@ cdef class BitMaskArray:
         if self.parent is not None:
             return self.parent.to_numpy()
 
-        cdef int i
+        cdef Py_ssize_t i
         cdef ndarray[uint8_t] result = np.empty(self.array_size, dtype=bool)
         for i in range(self.array_size):
-            result[i] = self[i]
+            result[i] = self._getitem_integral(i)
 
         return result.reshape(self.array_shape)

From 2b764ce38cabe844622ed763e2095c16da8941bd Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 14 Aug 2023 11:06:32 -0400
Subject: [PATCH 026/126] better perf

---
 pandas/_libs/arrays.pyx | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index e3c384449a6bc..251e127ebef6a 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -24,8 +24,7 @@ from libc.stdlib cimport (
 
 cdef extern from "pandas/vendored/nanoarrow.h":
     int8_t ArrowBitGet(const uint8_t*, int64_t)
-    void ArrowBitSet(uint8_t*, int64_t)
-    void ArrowBitClear(uint8_t*, int64_t)
+    void ArrowBitSetTo(uint8_t*, int64_t, uint8_t)
 
 
 @cython.freelist(16)
@@ -221,15 +220,6 @@ cdef class BitMaskArray:
     cdef public:
         object parent
 
-    cdef void _setitem_integral(self, const int key, const uint8_t value):
-        if value:
-            ArrowBitSet(self.validity_buffer, key)
-        else:
-            ArrowBitClear(self.validity_buffer, key)
-
-    cdef uint8_t _getitem_integral(self, const Py_ssize_t index):
-        return ArrowBitGet(self.validity_buffer, index)
-
     @cython.boundscheck(False)
     @cython.wraparound(False)
     cdef void init_from_ndarray(self, const uint8_t[:] arr):
@@ -238,7 +228,7 @@ cdef class BitMaskArray:
         self.array_nbytes = self.array_size // 8 + 1
         self.validity_buffer = <uint8_t *>malloc(self.array_nbytes)
         for i in range(self.array_size):
-            self._setitem_integral(i, arr[i])
+            ArrowBitSetTo(self.validity_buffer, i, arr[i])
 
     def __cinit__(self, data):
         self.parent = None
@@ -262,22 +252,19 @@ cdef class BitMaskArray:
         if self.parent is not None:
             self.parent[key] = value
         elif isinstance(key, int) and key >= 0:
-            self._setitem_integral(key, bool(value))
+            ArrowBitSetTo(self.validity_buffer, key, bool(value))
         else:
             arr = self.to_numpy()
             arr[key] = value
             arr1d = arr.ravel()
             for i in range(arr1d.shape[0]):
-                if arr1d[i]:
-                    ArrowBitSet(self.validity_buffer, i)
-                else:
-                    ArrowBitClear(self.validity_buffer, i)
+                ArrowBitSetTo(self.validity_buffer, i, arr1d[i])
 
     def __getitem__(self, key):
         if self.parent is not None:
             return self.parent[key]
         elif isinstance(key, int) and key >= 0:
-            return self._getitem_integral(key)
+            return ArrowBitGet(self.validity_buffer, key)
         else:
             return self.to_numpy()[key]
 
@@ -309,6 +296,6 @@ cdef class BitMaskArray:
         cdef Py_ssize_t i
         cdef ndarray[uint8_t] result = np.empty(self.array_size, dtype=bool)
         for i in range(self.array_size):
-            result[i] = self._getitem_integral(i)
+            result[i] = ArrowBitGet(self.validity_buffer, i)
 
         return result.reshape(self.array_shape)

From 74548e857958300ab979f6bb8c69640d35ef9798 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 14 Aug 2023 12:03:38 -0400
Subject: [PATCH 027/126] code and typing cleanups

---
 pandas/_libs/arrays.pyi      | 22 +++++++++++++++++++++-
 pandas/_libs/arrays.pyx      | 15 ++++++++++-----
 pandas/core/arrays/masked.py | 10 +++++-----
 3 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi
index 86f69c3cdfc75..632e08002e9bd 100644
--- a/pandas/_libs/arrays.pyi
+++ b/pandas/_libs/arrays.pyi
@@ -1,10 +1,16 @@
-from typing import Sequence
+from typing import (
+    Callable,
+    Sequence,
+    Tuple,
+)
 
 import numpy as np
 
 from pandas._typing import (
+    ArrayLike,
     AxisInt,
     DtypeObj,
+    PositionalIndexer,
     Self,
     Shape,
 )
@@ -38,3 +44,17 @@ class NDArrayBacked:
     def _concat_same_type(
         cls, to_concat: Sequence[Self], axis: AxisInt = ...
     ) -> Self: ...
+
+class BitMaskArray:
+    parent: Self
+    def __cinit__(self, data: np.ndarray | Self) -> None: ...
+    def __init__(self, data: np.ndarray | Self) -> None: ...
+    def __dealloc__(self) -> None: ...
+    def __setitem__(self, key: PositionalIndexer, value: ArrayLike | bool) -> None: ...
+    def __getitem__(self, key: PositionalIndexer) -> bool: ...
+    def __invert__(self) -> np.ndarray: ...
+    def __or__(self, other: np.ndarray | Self) -> np.ndarray: ...
+    def __reduce__(self) -> Tuple[Callable[[np.ndarray], Self], Tuple[np.ndarray]]: ...
+    @property
+    def nbytes(self) -> int: ...
+    def to_numpy(self) -> np.ndarray: ...
diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 251e127ebef6a..050360805a441 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -251,7 +251,7 @@ cdef class BitMaskArray:
 
         if self.parent is not None:
             self.parent[key] = value
-        elif isinstance(key, int) and key >= 0:
+        elif isinstance(key, int) and key >= 0 and key < self.array_size:
             ArrowBitSetTo(self.validity_buffer, key, bool(value))
         else:
             arr = self.to_numpy()
@@ -263,7 +263,7 @@ cdef class BitMaskArray:
     def __getitem__(self, key):
         if self.parent is not None:
             return self.parent[key]
-        elif isinstance(key, int) and key >= 0:
+        elif isinstance(key, int) and key >= 0 and key < self.array_size:
             return ArrowBitGet(self.validity_buffer, key)
         else:
             return self.to_numpy()[key]
@@ -289,13 +289,18 @@ cdef class BitMaskArray:
     def nbytes(self) -> int:
         return self.array_nbytes
 
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void convert_to_boolean_array(self, uint8_t[:] out):
+        cdef Py_ssize_t i
+        for i in range(self.array_size):
+            out[i] = ArrowBitGet(self.validity_buffer, i)
+
     def to_numpy(self) -> ndarray:
         if self.parent is not None:
             return self.parent.to_numpy()
 
-        cdef Py_ssize_t i
         cdef ndarray[uint8_t] result = np.empty(self.array_size, dtype=bool)
-        for i in range(self.array_size):
-            result[i] = ArrowBitGet(self.validity_buffer, i)
+        self.convert_to_boolean_array(result)
 
         return result.reshape(self.array_shape)
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 04952f8d8ff3c..f1da44ef4e3da 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -567,7 +567,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
             # not directly depending on the `copy` keyword
             mask = self._mask if data is self._data else self._mask.to_numpy().copy()
             cls = dtype.construct_array_type()
-            return cls(data, mask, copy=False)
+            return cls(data, mask, copy=False)  # type: ignore[arg-type]
 
         if isinstance(dtype, ExtensionDtype):
             eacls = dtype.construct_array_type()
@@ -1395,8 +1395,8 @@ def any(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         # bool, int, float, complex, str, bytes,
         # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
         np.putmask(
-            values, self._mask.to_numpy(), self._falsey_value
-        )  # type: ignore[arg-type]
+            values, self._mask.to_numpy(), self._falsey_value  # type: ignore[arg-type]
+        )
         result = values.any()
         if skipna:
             return result
@@ -1478,8 +1478,8 @@ def all(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         # bool, int, float, complex, str, bytes,
         # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
         np.putmask(
-            values, self._mask.to_numpy(), self._truthy_value
-        )  # type: ignore[arg-type]
+            values, self._mask.to_numpy(), self._truthy_value  # type: ignore[arg-type]
+        )
         result = values.all(axis=axis)
 
         if skipna:

From dce8002fc76d0ff902a8a80064b59d3dd00477c4 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 14 Aug 2023 15:41:36 -0400
Subject: [PATCH 028/126] refactor and lower level invert/or implementation

---
 pandas/_libs/arrays.pyx      | 112 +++++++++++++++++++++++------------
 pandas/core/arrays/masked.py |   2 +-
 2 files changed, 76 insertions(+), 38 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 050360805a441..0da5d775ed948 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -211,14 +211,27 @@ def _unpickle_bitmaskarray(array):
     return bma
 
 
+cdef void buf_invert(uint8_t* dest, uint8_t* src, Py_ssize_t size):
+    cdef Py_ssize_t i
+    for i in range(size):
+        dest[i] = ~src[i]
+
+
+cdef void buf_or(uint8_t* dest, uint8_t* src1, uint8_t* src2, Py_ssize_t size):
+    cdef Py_ssize_t i
+    for i in range(size):
+        dest[i] = src1[i] | src2[i]
+
+
 cdef class BitMaskArray:
     cdef:
         Py_ssize_t array_size
         Py_ssize_t array_nbytes
-        object array_shape
         uint8_t* validity_buffer
+        bint buffer_owner  # set when parent is None, but gives C-level access
     cdef public:
-        object parent
+        object array_shape
+        object parent  # assignments gives RC to ensure proper buffer lifecycle
 
     @cython.boundscheck(False)
     @cython.wraparound(False)
@@ -227,57 +240,84 @@ cdef class BitMaskArray:
         self.array_size = arr.shape[0]
         self.array_nbytes = self.array_size // 8 + 1
         self.validity_buffer = <uint8_t *>malloc(self.array_nbytes)
+        self.buffer_owner = True
         for i in range(self.array_size):
             ArrowBitSetTo(self.validity_buffer, i, arr[i])
 
+    cdef void init_from_bitmaskarray(self, BitMaskArray bma):
+        self.buffer_owner = False
+        self.array_size = bma.array_size
+        self.array_nbytes = bma.array_nbytes
+        self.validity_buffer = bma.validity_buffer
+
     def __cinit__(self, data):
-        self.parent = None
         if isinstance(data, np.ndarray):
-            self.array_shape = data.shape
             self.init_from_ndarray(data.ravel())
+            self.array_shape = data.shape
+            self.parent = None
         elif isinstance(data, type(self)):
+            self.init_from_bitmaskarray(data)
+            self.array_shape = data.array_shape
             self.parent = data
-            # other attributes are undefined when a parent exists
         else:
             raise TypeError("Unsupported argument to BitMaskArray constructor")
 
     def __dealloc__(self):
-        if not self.parent:
+        if self.buffer_owner:
             free(self.validity_buffer)
 
     def __setitem__(self, key, value):
         cdef const uint8_t[:] arr1d
         cdef Py_ssize_t i = 0
-
-        if self.parent is not None:
-            self.parent[key] = value
-        elif isinstance(key, int) and key >= 0 and key < self.array_size:
-            ArrowBitSetTo(self.validity_buffer, key, bool(value))
-        else:
-            arr = self.to_numpy()
-            arr[key] = value
-            arr1d = arr.ravel()
-            for i in range(arr1d.shape[0]):
-                ArrowBitSetTo(self.validity_buffer, i, arr1d[i])
+        cdef Py_ssize_t ckey
+        cdef bint cvalue
+
+        if isinstance(key, int):
+            ckey = key
+            cvalue = value
+            if ckey >= 0 and ckey < self.array_size:
+                ArrowBitSetTo(self.validity_buffer, ckey, cvalue)
+                return
+
+        arr = self.to_numpy()
+        arr[key] = value
+        arr1d = arr.ravel()
+        for i in range(arr1d.shape[0]):
+            ArrowBitSetTo(self.validity_buffer, i, arr1d[i])
 
     def __getitem__(self, key):
-        if self.parent is not None:
-            return self.parent[key]
-        elif isinstance(key, int) and key >= 0 and key < self.array_size:
-            return ArrowBitGet(self.validity_buffer, key)
-        else:
-            return self.to_numpy()[key]
+        cdef Py_ssize_t ckey
+        if isinstance(key, int):
+            ckey = key
+            if ckey >= 0 and ckey < self.array_size:
+                return ArrowBitGet(self.validity_buffer, ckey)
+
+        return self.to_numpy()[key]
 
     def __invert__(self):
-        if self.parent is not None:
-            return ~self.parent
-        return ~self.to_numpy()
+        cdef ndarray[uint8_t] result
+        result = np.empty(self.array_size, dtype=bool)
+
+        cdef uint8_t* inverted = <uint8_t*>malloc(self.array_size)
+        buf_invert(inverted, self.validity_buffer, self.array_size)
+        BitMaskArray.buffer_to_array_1d(result, inverted, self.array_size)
+        free(inverted)
+        return result.reshape(self.array_shape)
 
     def __or__(self, other):
-        if self.parent is not None:
-            return self.parent | other
-        elif isinstance(other, type(self)):
-            return self.to_numpy() | other.to_numpy()
+        cdef ndarray[uint8_t] result
+        cdef uint8_t* ored
+        cdef BitMaskArray other_buf
+        if isinstance(other, type(self)):
+            other_buf = other
+            result = np.empty(self.array_size, dtype=bool)
+            ored = <uint8_t*>malloc(self.array_size)
+            buf_or(
+                ored, self.validity_buffer, other_buf.validity_buffer, self.array_size
+            )
+            BitMaskArray.buffer_to_array_1d(result, ored, self.array_size)
+            free(ored)
+            return result.reshape(self.array_shape)
         else:
             return self.to_numpy() | other
 
@@ -291,16 +331,14 @@ cdef class BitMaskArray:
 
     @cython.boundscheck(False)
     @cython.wraparound(False)
-    cdef void convert_to_boolean_array(self, uint8_t[:] out):
+    @staticmethod
+    cdef void buffer_to_array_1d(uint8_t[:] out, const uint8_t* buf, Py_ssize_t size):
         cdef Py_ssize_t i
-        for i in range(self.array_size):
-            out[i] = ArrowBitGet(self.validity_buffer, i)
+        for i in range(size):
+            out[i] = ArrowBitGet(buf, i)
 
     def to_numpy(self) -> ndarray:
-        if self.parent is not None:
-            return self.parent.to_numpy()
-
         cdef ndarray[uint8_t] result = np.empty(self.array_size, dtype=bool)
-        self.convert_to_boolean_array(result)
+        BitMaskArray.buffer_to_array_1d(result, self.validity_buffer, self.array_size)
 
         return result.reshape(self.array_shape)
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index f1da44ef4e3da..14a64bd021465 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -700,7 +700,7 @@ def _hasna(self) -> bool:
         return self._mask.to_numpy().any()  # type: ignore[return-value]
 
     def _propagate_mask(
-        self, mask: npt.NDArray[np.bool_] | None, other
+        self, mask: npt.NDArray[np.bool_] | BitMaskArray | None, other
     ) -> npt.NDArray[np.bool_]:
         if mask is None:
             mask = (

From e641fedaf6f85d301ae64ec886d1979e048c678c Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 14 Aug 2023 17:05:12 -0400
Subject: [PATCH 029/126] Mass append nanoarrow for buffer performance

---
 pandas/_libs/arrays.pyx               | 48 +++++++++++++++++++--------
 pandas/_libs/src/vendored/nanoarrow.c |  8 ++---
 2 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 0da5d775ed948..005b21ceed520 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -23,6 +23,18 @@ from libc.stdlib cimport (
 
 
 cdef extern from "pandas/vendored/nanoarrow.h":
+    struct ArrowBuffer:
+        uint8_t* data
+        int64_t size_bytes
+
+    struct ArrowBitmap:
+        ArrowBuffer buffer
+        int64_t size_bits
+
+    void ArrowBitmapInit(ArrowBitmap*)
+    void ArrowBitmapReserve(ArrowBitmap*, int64_t)
+    void ArrowBitmapAppendInt8Unsafe(ArrowBitmap*, const int8_t *, int64_t)
+    void ArrowBitmapReset(ArrowBitmap*)
     int8_t ArrowBitGet(const uint8_t*, int64_t)
     void ArrowBitSetTo(uint8_t*, int64_t, uint8_t)
 
@@ -227,7 +239,7 @@ cdef class BitMaskArray:
     cdef:
         Py_ssize_t array_size
         Py_ssize_t array_nbytes
-        uint8_t* validity_buffer
+        ArrowBitmap bitmap
         bint buffer_owner  # set when parent is None, but gives C-level access
     cdef public:
         object array_shape
@@ -235,20 +247,21 @@ cdef class BitMaskArray:
 
     @cython.boundscheck(False)
     @cython.wraparound(False)
-    cdef void init_from_ndarray(self, const uint8_t[:] arr):
-        cdef Py_ssize_t i
+    cdef void init_from_ndarray(self, const uint8_t[::1] arr):
+        cdef ArrowBitmap bitmap
         self.array_size = arr.shape[0]
         self.array_nbytes = self.array_size // 8 + 1
-        self.validity_buffer = <uint8_t *>malloc(self.array_nbytes)
+        ArrowBitmapInit(&bitmap)
+        ArrowBitmapReserve(&bitmap, self.array_size)
+        ArrowBitmapAppendInt8Unsafe(&bitmap, <const int8_t*>&arr[0], self.array_size)
+        self.bitmap = bitmap
         self.buffer_owner = True
-        for i in range(self.array_size):
-            ArrowBitSetTo(self.validity_buffer, i, arr[i])
 
     cdef void init_from_bitmaskarray(self, BitMaskArray bma):
         self.buffer_owner = False
         self.array_size = bma.array_size
         self.array_nbytes = bma.array_nbytes
-        self.validity_buffer = bma.validity_buffer
+        self.bitmap = bma.bitmap
 
     def __cinit__(self, data):
         if isinstance(data, np.ndarray):
@@ -264,7 +277,7 @@ cdef class BitMaskArray:
 
     def __dealloc__(self):
         if self.buffer_owner:
-            free(self.validity_buffer)
+            ArrowBitmapReset(&self.bitmap)
 
     def __setitem__(self, key, value):
         cdef const uint8_t[:] arr1d
@@ -276,21 +289,21 @@ cdef class BitMaskArray:
             ckey = key
             cvalue = value
             if ckey >= 0 and ckey < self.array_size:
-                ArrowBitSetTo(self.validity_buffer, ckey, cvalue)
+                ArrowBitSetTo(self.bitmap.buffer.data, ckey, cvalue)
                 return
 
         arr = self.to_numpy()
         arr[key] = value
         arr1d = arr.ravel()
         for i in range(arr1d.shape[0]):
-            ArrowBitSetTo(self.validity_buffer, i, arr1d[i])
+            ArrowBitSetTo(self.bitmap.buffer.data, i, arr1d[i])
 
     def __getitem__(self, key):
         cdef Py_ssize_t ckey
         if isinstance(key, int):
             ckey = key
             if ckey >= 0 and ckey < self.array_size:
-                return ArrowBitGet(self.validity_buffer, ckey)
+                return ArrowBitGet(self.bitmap.buffer.data, ckey)
 
         return self.to_numpy()[key]
 
@@ -299,7 +312,7 @@ cdef class BitMaskArray:
         result = np.empty(self.array_size, dtype=bool)
 
         cdef uint8_t* inverted = <uint8_t*>malloc(self.array_size)
-        buf_invert(inverted, self.validity_buffer, self.array_size)
+        buf_invert(inverted, self.bitmap.buffer.data, self.array_size)
         BitMaskArray.buffer_to_array_1d(result, inverted, self.array_size)
         free(inverted)
         return result.reshape(self.array_shape)
@@ -313,7 +326,10 @@ cdef class BitMaskArray:
             result = np.empty(self.array_size, dtype=bool)
             ored = <uint8_t*>malloc(self.array_size)
             buf_or(
-                ored, self.validity_buffer, other_buf.validity_buffer, self.array_size
+                ored,
+                self.bitmap.buffer.data,
+                other_buf.bitmap.buffer.data,
+                self.array_size
             )
             BitMaskArray.buffer_to_array_1d(result, ored, self.array_size)
             free(ored)
@@ -339,6 +355,10 @@ cdef class BitMaskArray:
 
     def to_numpy(self) -> ndarray:
         cdef ndarray[uint8_t] result = np.empty(self.array_size, dtype=bool)
-        BitMaskArray.buffer_to_array_1d(result, self.validity_buffer, self.array_size)
+        BitMaskArray.buffer_to_array_1d(
+            result,
+            self.bitmap.buffer.data,
+            self.array_size
+        )
 
         return result.reshape(self.array_shape)
diff --git a/pandas/_libs/src/vendored/nanoarrow.c b/pandas/_libs/src/vendored/nanoarrow.c
index 7cc53b43550d7..fc23c71992c4b 100644
--- a/pandas/_libs/src/vendored/nanoarrow.c
+++ b/pandas/_libs/src/vendored/nanoarrow.c
@@ -22,7 +22,7 @@
 #include <stdlib.h>
 #include <string.h>
 
-#include "pandas/vendored/nanoarrow.h"
+#include "nanoarrow.h"
 
 const char* ArrowNanoarrowVersion(void) { return NANOARROW_VERSION; }
 
@@ -253,7 +253,7 @@ struct ArrowBufferAllocator ArrowBufferDeallocator(
 #include <stdlib.h>
 #include <string.h>
 
-#include "pandas/vendored/nanoarrow.h"
+#include "nanoarrow.h"
 
 static void ArrowSchemaRelease(struct ArrowSchema* schema) {
   if (schema->format != NULL) ArrowFree((void*)schema->format);
@@ -1444,7 +1444,7 @@ static int64_t ArrowSchemaTypeToStringInternal(struct ArrowSchemaView* schema_vi
   }
 }
 
-// Helper for bookeeping to emulate sprintf()-like behaviour spread
+// Helper for bookkeeping to emulate sprintf()-like behaviour spread
 // among multiple sprintf calls.
 static inline void ArrowToStringLogChars(char** out, int64_t n_chars_last,
                                          int64_t* n_remaining, int64_t* n_chars) {
@@ -1777,7 +1777,7 @@ ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer,
 #include <stdlib.h>
 #include <string.h>
 
-#include "pandas/vendored/nanoarrow.h"
+#include "nanoarrow.h"
 
 static void ArrowArrayRelease(struct ArrowArray* array) {
   // Release buffers held by this array

From 109dd57c9d3abe1bd8125197a49ab9a6db812cb1 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 14 Aug 2023 17:19:16 -0400
Subject: [PATCH 030/126] delete duplicative struct members

---
 pandas/_libs/arrays.pyx | 41 +++++++++++++++++++----------------------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 005b21ceed520..68148671a4ee7 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -237,8 +237,6 @@ cdef void buf_or(uint8_t* dest, uint8_t* src1, uint8_t* src2, Py_ssize_t size):
 
 cdef class BitMaskArray:
     cdef:
-        Py_ssize_t array_size
-        Py_ssize_t array_nbytes
         ArrowBitmap bitmap
         bint buffer_owner  # set when parent is None, but gives C-level access
     cdef public:
@@ -249,18 +247,17 @@ cdef class BitMaskArray:
     @cython.wraparound(False)
     cdef void init_from_ndarray(self, const uint8_t[::1] arr):
         cdef ArrowBitmap bitmap
-        self.array_size = arr.shape[0]
-        self.array_nbytes = self.array_size // 8 + 1
+        # As long as we have a 1D arr argument we can use .shape[0] to avoid
+        # a call to Python via .size
+        cdef int64_t nobs = arr.shape[0]
         ArrowBitmapInit(&bitmap)
-        ArrowBitmapReserve(&bitmap, self.array_size)
-        ArrowBitmapAppendInt8Unsafe(&bitmap, <const int8_t*>&arr[0], self.array_size)
-        self.bitmap = bitmap
+        ArrowBitmapReserve(&bitmap, nobs)
+        ArrowBitmapAppendInt8Unsafe(&bitmap, <const int8_t*>&arr[0], nobs)
         self.buffer_owner = True
+        self.bitmap = bitmap
 
     cdef void init_from_bitmaskarray(self, BitMaskArray bma):
         self.buffer_owner = False
-        self.array_size = bma.array_size
-        self.array_nbytes = bma.array_nbytes
         self.bitmap = bma.bitmap
 
     def __cinit__(self, data):
@@ -288,7 +285,7 @@ cdef class BitMaskArray:
         if isinstance(key, int):
             ckey = key
             cvalue = value
-            if ckey >= 0 and ckey < self.array_size:
+            if ckey >= 0 and ckey < self.bitmap.size_bits:
                 ArrowBitSetTo(self.bitmap.buffer.data, ckey, cvalue)
                 return
 
@@ -302,18 +299,18 @@ cdef class BitMaskArray:
         cdef Py_ssize_t ckey
         if isinstance(key, int):
             ckey = key
-            if ckey >= 0 and ckey < self.array_size:
+            if ckey >= 0 and ckey < self.bitmap.size_bits:
                 return ArrowBitGet(self.bitmap.buffer.data, ckey)
 
         return self.to_numpy()[key]
 
     def __invert__(self):
         cdef ndarray[uint8_t] result
-        result = np.empty(self.array_size, dtype=bool)
+        result = np.empty(self.bitmap.size_bits, dtype=bool)
 
-        cdef uint8_t* inverted = <uint8_t*>malloc(self.array_size)
-        buf_invert(inverted, self.bitmap.buffer.data, self.array_size)
-        BitMaskArray.buffer_to_array_1d(result, inverted, self.array_size)
+        cdef uint8_t* inverted = <uint8_t*>malloc(self.bitmap.size_bits)
+        buf_invert(inverted, self.bitmap.buffer.data, self.bitmap.size_bits)
+        BitMaskArray.buffer_to_array_1d(result, inverted, self.bitmap.size_bits)
         free(inverted)
         return result.reshape(self.array_shape)
 
@@ -323,15 +320,15 @@ cdef class BitMaskArray:
         cdef BitMaskArray other_buf
         if isinstance(other, type(self)):
             other_buf = other
-            result = np.empty(self.array_size, dtype=bool)
-            ored = <uint8_t*>malloc(self.array_size)
+            result = np.empty(self.bitmap.size_bits, dtype=bool)
+            ored = <uint8_t*>malloc(self.bitmap.size_bits)
             buf_or(
                 ored,
                 self.bitmap.buffer.data,
                 other_buf.bitmap.buffer.data,
-                self.array_size
+                self.bitmap.size_bits
             )
-            BitMaskArray.buffer_to_array_1d(result, ored, self.array_size)
+            BitMaskArray.buffer_to_array_1d(result, ored, self.bitmap.size_bits)
             free(ored)
             return result.reshape(self.array_shape)
         else:
@@ -343,7 +340,7 @@ cdef class BitMaskArray:
 
     @property
     def nbytes(self) -> int:
-        return self.array_nbytes
+        return self.bitmap.buffer.size_bytes
 
     @cython.boundscheck(False)
     @cython.wraparound(False)
@@ -354,11 +351,11 @@ cdef class BitMaskArray:
             out[i] = ArrowBitGet(buf, i)
 
     def to_numpy(self) -> ndarray:
-        cdef ndarray[uint8_t] result = np.empty(self.array_size, dtype=bool)
+        cdef ndarray[uint8_t] result = np.empty(self.bitmap.size_bits, dtype=bool)
         BitMaskArray.buffer_to_array_1d(
             result,
             self.bitmap.buffer.data,
-            self.array_size
+            self.bitmap.size_bits
         )
 
         return result.reshape(self.array_shape)

From 35f3b9c6cbc6ac8f17d15e3a3b7134741fd21caf Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 14 Aug 2023 17:25:59 -0400
Subject: [PATCH 031/126] fix pickling

---
 pandas/_libs/arrays.pyx | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 68148671a4ee7..285af8de74aad 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -218,8 +218,8 @@ cdef class NDArrayBacked:
         return to_concat[0]._from_backing_data(new_arr)
 
 
-def _unpickle_bitmaskarray(array):
-    bma = BitMaskArray(array)
+def _unpickle_bitmaskarray(array, parent):
+    bma = BitMaskArray(array, parent)
     return bma
 
 
@@ -260,15 +260,23 @@ cdef class BitMaskArray:
         self.buffer_owner = False
         self.bitmap = bma.bitmap
 
-    def __cinit__(self, data):
+    def __cinit__(self, data, parent=None):
+        # parent is only required to reconstruct ref-counting from pickle
+        # but should not be called from user code
         if isinstance(data, np.ndarray):
             self.init_from_ndarray(data.ravel())
             self.array_shape = data.shape
-            self.parent = None
+            if parent:
+                self.parent = parent
+            else:
+                self.parent = None
         elif isinstance(data, type(self)):
             self.init_from_bitmaskarray(data)
             self.array_shape = data.array_shape
-            self.parent = data
+            if parent:
+                self.parent = parent
+            else:
+                self.parent = data
         else:
             raise TypeError("Unsupported argument to BitMaskArray constructor")
 
@@ -335,8 +343,8 @@ cdef class BitMaskArray:
             return self.to_numpy() | other
 
     def __reduce__(self):
-        object_state = (self.to_numpy(),)
-        return (_unpickle_bitmaskarray, object_state)
+        object_state = (self.to_numpy(), self.parent)
+        return (_unpickle_bitmaskarray, object_state, self.parent)
 
     @property
     def nbytes(self) -> int:

From 25e3c51c6a75b6b222425d7cd017f908d020e122 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 14 Aug 2023 17:27:48 -0400
Subject: [PATCH 032/126] nanoarrow typo fixups

---
 pandas/_libs/include/pandas/vendored/nanoarrow.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/include/pandas/vendored/nanoarrow.h b/pandas/_libs/include/pandas/vendored/nanoarrow.h
index 666dea1448326..84440dcbd423f 100644
--- a/pandas/_libs/include/pandas/vendored/nanoarrow.h
+++ b/pandas/_libs/include/pandas/vendored/nanoarrow.h
@@ -264,7 +264,7 @@ typedef int ArrowErrorCode;
 /// \ingroup nanoarrow-errors
 ///
 /// If nanoarrow was built in debug mode (i.e., defined(NANOARROW_DEBUG) is true),
-/// print a message to stderr and abort. If nanoarrow was bulit in release mode,
+/// print a message to stderr and abort. If nanoarrow was built in release mode,
 /// this statement has no effect. You can customize fatal error behaviour
 /// be defining the NANOARROW_PRINT_AND_DIE macro before including nanoarrow.h
 /// This macro is provided as a convenience for users and is not used internally.
@@ -1252,7 +1252,7 @@ ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer,
 /// Contains more readily extractable values than a raw ArrowSchema.
 /// Clients can stack or statically allocate this structure but are
 /// encouraged to use the provided getters to ensure forward
-/// compatiblity.
+/// compatibility.
 struct ArrowSchemaView {
   /// \brief A pointer to the schema represented by this view
   struct ArrowSchema* schema;
@@ -1725,7 +1725,7 @@ ArrowErrorCode ArrowArrayFinishBuildingDefault(struct ArrowArray* array,
 /// (i.e. NANOARROW_VALIDATION_LEVEL_NONE or NANOARROW_VALIDATION_LEVEL_MINIMAL) if CPU
 /// buffer data access is not possible or more validation (i.e.,
 /// NANOARROW_VALIDATION_LEVEL_FULL) if buffer content was obtained from an untrusted or
-/// corruptable source.
+/// corruptible source.
 ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
                                         enum ArrowValidationLevel validation_level,
                                         struct ArrowError* error);
@@ -3059,7 +3059,7 @@ static inline ArrowErrorCode ArrowArrayFinishUnionElement(struct ArrowArray* arr
 
   switch (private_data->storage_type) {
     case NANOARROW_TYPE_DENSE_UNION:
-      // Apppend the target child length to the union offsets buffer
+      // Append the target child length to the union offsets buffer
       _NANOARROW_CHECK_RANGE(array->children[child_index]->length, 0, INT32_MAX);
       NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(
           ArrowArrayBuffer(array, 1), (int32_t)array->children[child_index]->length - 1));

From 82e082e714dfcfc5fa549d739deade2dfef98b15 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Thu, 17 Aug 2023 12:41:35 -0400
Subject: [PATCH 033/126] vectorized to_numpy()

---
 pandas/_libs/arrays.pyx                       | 13 +++--
 .../_libs/include/pandas/vendored/nanoarrow.h | 55 +++++++++++++++++++
 2 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 285af8de74aad..58d1f4659058a 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -35,6 +35,7 @@ cdef extern from "pandas/vendored/nanoarrow.h":
     void ArrowBitmapReserve(ArrowBitmap*, int64_t)
     void ArrowBitmapAppendInt8Unsafe(ArrowBitmap*, const int8_t *, int64_t)
     void ArrowBitmapReset(ArrowBitmap*)
+    void ArrowBitsUnpackInt8(const uint8_t*, int64_t, int64_t, int8_t*)
     int8_t ArrowBitGet(const uint8_t*, int64_t)
     void ArrowBitSetTo(uint8_t*, int64_t, uint8_t)
 
@@ -223,13 +224,17 @@ def _unpickle_bitmaskarray(array, parent):
     return bma
 
 
-cdef void buf_invert(uint8_t* dest, uint8_t* src, Py_ssize_t size):
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef void buf_invert(uint8_t* dest, uint8_t* src, Py_ssize_t size) noexcept:
     cdef Py_ssize_t i
     for i in range(size):
         dest[i] = ~src[i]
 
 
-cdef void buf_or(uint8_t* dest, uint8_t* src1, uint8_t* src2, Py_ssize_t size):
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef void buf_or(uint8_t* dest, uint8_t* src1, uint8_t* src2, Py_ssize_t size) noexcept:
     cdef Py_ssize_t i
     for i in range(size):
         dest[i] = src1[i] | src2[i]
@@ -354,9 +359,7 @@ cdef class BitMaskArray:
     @cython.wraparound(False)
     @staticmethod
     cdef void buffer_to_array_1d(uint8_t[:] out, const uint8_t* buf, Py_ssize_t size):
-        cdef Py_ssize_t i
-        for i in range(size):
-            out[i] = ArrowBitGet(buf, i)
+        ArrowBitsUnpackInt8(buf, 0, size, <const int8_t*>&out[0])
 
     def to_numpy(self) -> ndarray:
         cdef ndarray[uint8_t] result = np.empty(self.bitmap.size_bits, dtype=bool)
diff --git a/pandas/_libs/include/pandas/vendored/nanoarrow.h b/pandas/_libs/include/pandas/vendored/nanoarrow.h
index 84440dcbd423f..1ff568416a05a 100644
--- a/pandas/_libs/include/pandas/vendored/nanoarrow.h
+++ b/pandas/_libs/include/pandas/vendored/nanoarrow.h
@@ -1482,6 +1482,11 @@ static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t l
 /// \brief Count true values in a bitmap
 static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t i_from, int64_t i_to);
 
+
+/// \brief Extract int8 boolean values from a range in a bitmap
+static inline void ArrowBitsUnpackInt8(const uint8_t* bits, int64_t start_offset,
+                                       int64_t length, int8_t* out);
+
 /// \brief Initialize an ArrowBitmap
 ///
 /// Initialize the builder's buffer, empty its cache, and reset the size to zero
@@ -2119,6 +2124,17 @@ static inline int64_t _ArrowBytesForBits(int64_t bits) {
   return (bits >> 3) + ((bits & 7) != 0);
 }
 
+static inline void _ArrowBitsUnpackInt8(const uint8_t word, int8_t* out) {
+  out[0] = (word >> 0) & 1;
+  out[1] = (word >> 1) & 1;
+  out[2] = (word >> 2) & 1;
+  out[3] = (word >> 3) & 1;
+  out[4] = (word >> 4) & 1;
+  out[5] = (word >> 5) & 1;
+  out[6] = (word >> 6) & 1;
+  out[7] = (word >> 7) & 1;
+}
+
 static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) {
   *out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 | values[4] << 4 |
           values[5] << 5 | values[6] << 6 | values[7] << 7);
@@ -2133,6 +2149,45 @@ static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i) {
   return (bits[i >> 3] >> (i & 0x07)) & 1;
 }
 
+static inline void ArrowBitsUnpackInt8(const uint8_t* bits, int64_t start_offset,
+                                               int64_t length, int8_t* out) {
+  if (length == 0) {
+    return;
+  }
+
+  const int64_t i_begin = start_offset;
+  const int64_t i_end = start_offset + length;
+  const int64_t i_last_valid = i_end - 1;
+
+  const int64_t bytes_begin = i_begin / 8;
+  const int64_t bytes_last_valid = i_last_valid / 8;
+
+  if (bytes_begin == bytes_last_valid) {
+    for (int i = 0; i < length; i++) {
+      out[i] = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8);
+    }
+
+    return;
+  }
+
+  // first byte
+  for (int i = 0; i < 8 - (i_begin % 8); i++) {
+    *out++ = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8);
+  }
+
+  // middle bytes
+  for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) {
+    _ArrowBitsUnpackInt8(bits[i], out);
+    out += 8;
+  }
+
+  // last byte
+  const int bits_remaining = i_end % 8 == 0 ? 8 : i_end % 8;
+  for (int i = 0; i < bits_remaining; i++) {
+    *out++ = ArrowBitGet(&bits[bytes_last_valid], i);
+  }
+}
+
 static inline void ArrowBitSet(uint8_t* bits, int64_t i) {
   bits[i / 8] |= _ArrowkBitmask[i % 8];
 }

From c140af4a5f1445118461ddc4f4b1310f17067925 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Thu, 17 Aug 2023 12:53:42 -0400
Subject: [PATCH 034/126] sum impl

---
 pandas/_libs/arrays.pyx      | 4 ++++
 pandas/core/arrays/masked.py | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 58d1f4659058a..536597c514646 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -38,6 +38,7 @@ cdef extern from "pandas/vendored/nanoarrow.h":
     void ArrowBitsUnpackInt8(const uint8_t*, int64_t, int64_t, int8_t*)
     int8_t ArrowBitGet(const uint8_t*, int64_t)
     void ArrowBitSetTo(uint8_t*, int64_t, uint8_t)
+    int64_t ArrowBitCountSet(const uint8_t*, int64_t, int64_t)
 
 
 @cython.freelist(16)
@@ -355,6 +356,9 @@ cdef class BitMaskArray:
     def nbytes(self) -> int:
         return self.bitmap.buffer.size_bytes
 
+    def sum(self) -> bool:
+        return ArrowBitCountSet(self.bitmap.buffer.data, 0, self.bitmap.size_bits)
+
     @cython.boundscheck(False)
     @cython.wraparound(False)
     @staticmethod
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 14a64bd021465..5fc01ee1c6ea3 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -1092,7 +1092,7 @@ def value_counts(self, dropna: bool = True) -> Series:
         # if we want nans, count the mask
         counts = np.empty(len(value_counts) + 1, dtype="int64")
         counts[:-1] = value_counts
-        counts[-1] = self._mask.to_numpy().sum()
+        counts[-1] = self._mask.sum()
 
         index = Index(keys, dtype=self.dtype).insert(len(keys), self.dtype.na_value)
         index = index.astype(self.dtype)

From 86ce6569987bb2d526383de6d6074f676cf85a40 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Thu, 17 Aug 2023 13:01:08 -0400
Subject: [PATCH 035/126] any impl

---
 pandas/_libs/arrays.pyx      | 6 ++++++
 pandas/core/arrays/masked.py | 8 ++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 536597c514646..a9302719a02fa 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -356,6 +356,12 @@ cdef class BitMaskArray:
     def nbytes(self) -> int:
         return self.bitmap.buffer.size_bytes
 
+    def any(self) -> bool:
+        # TODO: we might want to create a short circuiting implementation in
+        # nanoarrow, but even with a complete sum this is cheaper than
+        # serializing to numpy for an any call
+        return ArrowBitCountSet(self.bitmap.buffer.data, 0, self.bitmap.size_bits) > 0
+
     def sum(self) -> bool:
         return ArrowBitCountSet(self.bitmap.buffer.data, 0, self.bitmap.size_bits)
 
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 5fc01ee1c6ea3..e2474707949a4 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -676,7 +676,7 @@ def reconstruct(x: np.ndarray):
             return tuple(reconstruct(x) for x in result)
         elif method == "reduce":
             # e.g. np.add.reduce; test_ufunc_reduce_raises
-            if self._mask.to_numpy().any():
+            if self._mask.any():
                 return self._na_value
             return result
         else:
@@ -697,7 +697,7 @@ def _hasna(self) -> bool:
         # source code using it..
 
         # error: Incompatible return value type (got "bool_", expected "bool")
-        return self._mask.to_numpy().any()  # type: ignore[return-value]
+        return self._mask.any()  # type: ignore[return-value]
 
     def _propagate_mask(
         self, mask: npt.NDArray[np.bool_] | BitMaskArray | None, other
@@ -1401,7 +1401,7 @@ def any(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         if skipna:
             return result
         else:
-            if result or len(self) == 0 or not self._mask.to_numpy().any():
+            if result or len(self) == 0 or not self._mask.any():
                 return result
             else:
                 return self.dtype.na_value
@@ -1485,7 +1485,7 @@ def all(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         if skipna:
             return result
         else:
-            if not result or len(self) == 0 or not self._mask.to_numpy().any():
+            if not result or len(self) == 0 or not self._mask.any():
                 return result
             else:
                 return self.dtype.na_value

From 03b16611e23eaa5c38d98a5143a76a049eb87822 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Thu, 17 Aug 2023 13:46:43 -0400
Subject: [PATCH 036/126] updated cython typing

---
 pandas/_libs/arrays.pyi | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi
index 81398fe0c893e..55347109a8ff2 100644
--- a/pandas/_libs/arrays.pyi
+++ b/pandas/_libs/arrays.pyi
@@ -47,9 +47,7 @@ class NDArrayBacked:
 
 class BitMaskArray:
     parent: Self
-    def __cinit__(self, data: np.ndarray | Self) -> None: ...
     def __init__(self, data: np.ndarray | Self) -> None: ...
-    def __dealloc__(self) -> None: ...
     def __setitem__(self, key: PositionalIndexer, value: ArrayLike | bool) -> None: ...
     def __getitem__(self, key: PositionalIndexer) -> bool: ...
     def __invert__(self) -> np.ndarray: ...

From e9d4da4c47d8bb9a3e11ef37892098e10d43ee94 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Thu, 17 Aug 2023 16:24:36 -0400
Subject: [PATCH 037/126] remove bad __or__ impl

---
 pandas/_libs/arrays.pyx | 31 +++----------------------------
 1 file changed, 3 insertions(+), 28 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index a9302719a02fa..8dc38cdc44d3c 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -232,15 +232,6 @@ cdef void buf_invert(uint8_t* dest, uint8_t* src, Py_ssize_t size) noexcept:
     for i in range(size):
         dest[i] = ~src[i]
 
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef void buf_or(uint8_t* dest, uint8_t* src1, uint8_t* src2, Py_ssize_t size) noexcept:
-    cdef Py_ssize_t i
-    for i in range(size):
-        dest[i] = src1[i] | src2[i]
-
-
 cdef class BitMaskArray:
     cdef:
         ArrowBitmap bitmap
@@ -323,30 +314,14 @@ cdef class BitMaskArray:
         result = np.empty(self.bitmap.size_bits, dtype=bool)
 
         cdef uint8_t* inverted = <uint8_t*>malloc(self.bitmap.size_bits)
-        buf_invert(inverted, self.bitmap.buffer.data, self.bitmap.size_bits)
+        # TODO: upstream invert or make sure we handle size == 0 here
+        buf_invert(inverted, self.bitmap.buffer.data, self.bitmap.size_bits // 8 + 1)
         BitMaskArray.buffer_to_array_1d(result, inverted, self.bitmap.size_bits)
         free(inverted)
         return result.reshape(self.array_shape)
 
     def __or__(self, other):
-        cdef ndarray[uint8_t] result
-        cdef uint8_t* ored
-        cdef BitMaskArray other_buf
-        if isinstance(other, type(self)):
-            other_buf = other
-            result = np.empty(self.bitmap.size_bits, dtype=bool)
-            ored = <uint8_t*>malloc(self.bitmap.size_bits)
-            buf_or(
-                ored,
-                self.bitmap.buffer.data,
-                other_buf.bitmap.buffer.data,
-                self.bitmap.size_bits
-            )
-            BitMaskArray.buffer_to_array_1d(result, ored, self.bitmap.size_bits)
-            free(ored)
-            return result.reshape(self.array_shape)
-        else:
-            return self.to_numpy() | other
+        return self.to_numpy() | other
 
     def __reduce__(self):
         object_state = (self.to_numpy(), self.parent)

From 1993e969137bcbcd193cb25ac4d6b272fc781a6e Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Thu, 17 Aug 2023 16:51:41 -0400
Subject: [PATCH 038/126] fix __or__

---
 pandas/_libs/arrays.pyx      | 3 +++
 pandas/core/arrays/masked.py | 4 +---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 8dc38cdc44d3c..5a1f3cc3109f4 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -321,6 +321,9 @@ cdef class BitMaskArray:
         return result.reshape(self.array_shape)
 
     def __or__(self, other):
+        if isinstance(other, type(self)):
+            return self.to_numpy() | other.to_numpy()
+
         return self.to_numpy() | other
 
     def __reduce__(self):
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index e2474707949a4..7d82760c7faa3 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -695,9 +695,7 @@ def _hasna(self) -> bool:
         # Note: this is expensive right now! The hope is that we can
         # make this faster by having an optional mask, but not have to change
         # source code using it..
-
-        # error: Incompatible return value type (got "bool_", expected "bool")
-        return self._mask.any()  # type: ignore[return-value]
+        return self._mask.any()
 
     def _propagate_mask(
         self, mask: npt.NDArray[np.bool_] | BitMaskArray | None, other

From 10ce5ca01fbcbcf6e000c90dd71073538afa592b Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Thu, 17 Aug 2023 17:29:31 -0400
Subject: [PATCH 039/126] removed faulty inversion

---
 pandas/_libs/arrays.pyx | 23 +----------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 5a1f3cc3109f4..09ce24cceb10e 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -16,12 +16,6 @@ from numpy cimport (
 
 cnp.import_array()
 
-from libc.stdlib cimport (
-    free,
-    malloc,
-)
-
-
 cdef extern from "pandas/vendored/nanoarrow.h":
     struct ArrowBuffer:
         uint8_t* data
@@ -225,13 +219,6 @@ def _unpickle_bitmaskarray(array, parent):
     return bma
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef void buf_invert(uint8_t* dest, uint8_t* src, Py_ssize_t size) noexcept:
-    cdef Py_ssize_t i
-    for i in range(size):
-        dest[i] = ~src[i]
-
 cdef class BitMaskArray:
     cdef:
         ArrowBitmap bitmap
@@ -310,15 +297,7 @@ cdef class BitMaskArray:
         return self.to_numpy()[key]
 
     def __invert__(self):
-        cdef ndarray[uint8_t] result
-        result = np.empty(self.bitmap.size_bits, dtype=bool)
-
-        cdef uint8_t* inverted = <uint8_t*>malloc(self.bitmap.size_bits)
-        # TODO: upstream invert or make sure we handle size == 0 here
-        buf_invert(inverted, self.bitmap.buffer.data, self.bitmap.size_bits // 8 + 1)
-        BitMaskArray.buffer_to_array_1d(result, inverted, self.bitmap.size_bits)
-        free(inverted)
-        return result.reshape(self.array_shape)
+        return ~self.to_numpy()
 
     def __or__(self, other):
         if isinstance(other, type(self)):

From e8b7819d41582b5228361548afbaafcfe2dd7165 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Thu, 17 Aug 2023 18:27:14 -0400
Subject: [PATCH 040/126] more performant bit unpacking

---
 pandas/_libs/include/pandas/vendored/nanoarrow.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/pandas/_libs/include/pandas/vendored/nanoarrow.h b/pandas/_libs/include/pandas/vendored/nanoarrow.h
index 1ff568416a05a..aad56a29eb5ab 100644
--- a/pandas/_libs/include/pandas/vendored/nanoarrow.h
+++ b/pandas/_libs/include/pandas/vendored/nanoarrow.h
@@ -2125,14 +2125,14 @@ static inline int64_t _ArrowBytesForBits(int64_t bits) {
 }
 
 static inline void _ArrowBitsUnpackInt8(const uint8_t word, int8_t* out) {
-  out[0] = (word >> 0) & 1;
-  out[1] = (word >> 1) & 1;
-  out[2] = (word >> 2) & 1;
-  out[3] = (word >> 3) & 1;
-  out[4] = (word >> 4) & 1;
-  out[5] = (word >> 5) & 1;
-  out[6] = (word >> 6) & 1;
-  out[7] = (word >> 7) & 1;
+  out[0] = (word & 0x1) != 0;
+  out[1] = (word & 0x2) != 0;
+  out[2] = (word & 0x4) != 0;
+  out[3] = (word & 0x8) != 0;
+  out[4] = (word & 0x10) != 0;
+  out[5] = (word & 0x20) != 0;
+  out[6] = (word & 0x40) != 0;
+  out[7] = (word & 0x80) != 0;
 }
 
 static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) {

From 9fdb65294df59ee9094db5879cbd1ff174b35975 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 18 Aug 2023 10:19:03 -0400
Subject: [PATCH 041/126] try non-shift nanoarrow packing

---
 pandas/_libs/include/pandas/vendored/nanoarrow.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/include/pandas/vendored/nanoarrow.h b/pandas/_libs/include/pandas/vendored/nanoarrow.h
index aad56a29eb5ab..30fcf04008eba 100644
--- a/pandas/_libs/include/pandas/vendored/nanoarrow.h
+++ b/pandas/_libs/include/pandas/vendored/nanoarrow.h
@@ -2136,8 +2136,15 @@ static inline void _ArrowBitsUnpackInt8(const uint8_t word, int8_t* out) {
 }
 
 static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) {
-  *out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 | values[4] << 4 |
-          values[5] << 5 | values[6] << 6 | values[7] << 7);
+  *out = (values[0]
+          | ((values[1] + 0x1) & 0x2)
+          | ((values[2] + 0x3) & 0x4)
+          | ((values[3] + 0x7) & 0x8)
+          | ((values[4] + 0xf) & 0x10)
+          | ((values[5] + 0x1f) & 0x20)
+          | ((values[6] + 0x3f) & 0x40)
+          | ((values[7] + 0x7f) & 0x80)
+          );
 }
 
 static inline void _ArrowBitmapPackInt32(const int32_t* values, uint8_t* out) {

From 17059cbb287e15aea9e09a887da8f77f04921bca Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 18 Aug 2023 10:44:26 -0400
Subject: [PATCH 042/126] Remove to_numpy + copy chains

---
 pandas/core/arrays/masked.py                  | 16 ++++++++--------
 pandas/tests/arrays/masked/test_arithmetic.py |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 7d82760c7faa3..dc93e58030dbe 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -416,22 +416,22 @@ def round(self, decimals: int = 0, *args, **kwargs):
         values = np.round(self._data, decimals=decimals, **kwargs)
 
         # Usually we'll get same type as self, but ndarray[bool] casts to float
-        return self._maybe_mask_result(values, self._mask.to_numpy().copy())
+        return self._maybe_mask_result(values, self._mask.to_numpy())
 
     # ------------------------------------------------------------------
     # Unary Methods
 
     def __invert__(self) -> Self:
-        return self._simple_new(~self._data, self._mask.to_numpy().copy())
+        return self._simple_new(~self._data, self._mask.to_numpy())
 
     def __neg__(self) -> Self:
-        return self._simple_new(-self._data, self._mask.to_numpy().copy())
+        return self._simple_new(-self._data, self._mask.to_numpy())
 
     def __pos__(self) -> Self:
         return self.copy()
 
     def __abs__(self) -> Self:
-        return self._simple_new(abs(self._data), self._mask.to_numpy().copy())
+        return self._simple_new(abs(self._data), self._mask.to_numpy())
 
     # ------------------------------------------------------------------
 
@@ -565,7 +565,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
                 data = self._data.astype(dtype.numpy_dtype, copy=copy)
             # mask is copied depending on whether the data was copied, and
             # not directly depending on the `copy` keyword
-            mask = self._mask if data is self._data else self._mask.to_numpy().copy()
+            mask = self._mask if data is self._data else self._mask.to_numpy()
             cls = dtype.construct_array_type()
             return cls(data, mask, copy=False)  # type: ignore[arg-type]
 
@@ -702,7 +702,7 @@ def _propagate_mask(
     ) -> npt.NDArray[np.bool_]:
         if mask is None:
             mask = (
-                self._mask.to_numpy().copy()
+                self._mask.to_numpy()
             )  # TODO: need test for BooleanArray needing a copy
             if other is libmissing.NA:
                 # GH#45421 don't alter inplace
@@ -900,7 +900,7 @@ def _maybe_mask_result(
             return result
 
     def isna(self) -> np.ndarray:
-        return self._mask.to_numpy().copy()
+        return self._mask.to_numpy()
 
     @property
     def _na_value(self):
@@ -982,7 +982,7 @@ def isin(self, values) -> BooleanArray:  # type: ignore[override]
 
     def copy(self) -> Self:
         data = self._data.copy()
-        mask = self._mask.to_numpy().copy()
+        mask = self._mask.to_numpy()
         return self._simple_new(data, mask)
 
     def unique(self) -> Self:
diff --git a/pandas/tests/arrays/masked/test_arithmetic.py b/pandas/tests/arrays/masked/test_arithmetic.py
index 21e292e5bbc29..04deac24a9211 100644
--- a/pandas/tests/arrays/masked/test_arithmetic.py
+++ b/pandas/tests/arrays/masked/test_arithmetic.py
@@ -76,7 +76,7 @@ def test_array_NA(data, all_arithmetic_operators):
     scalar = pd.NA
     scalar_array = pd.array([pd.NA] * len(data), dtype=data.dtype)
 
-    mask = data._mask.to_numpy().copy()
+    mask = data._mask.to_numpy()
 
     if is_bool_not_implemented(data, all_arithmetic_operators):
         msg = "operator '.*' not implemented for bool dtypes"

From c5a3584ce1124516def7cae211631dc2c3215430 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 18 Aug 2023 11:34:33 -0400
Subject: [PATCH 043/126] higher performance dunders

---
 pandas/_libs/arrays.pyx | 139 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 133 insertions(+), 6 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 09ce24cceb10e..ec9f74e723738 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -7,6 +7,10 @@ import numpy as np
 
 cimport numpy as cnp
 from cpython cimport PyErr_Clear
+from libc.stdlib cimport (
+    free,
+    malloc,
+)
 from numpy cimport (
     int8_t,
     int64_t,
@@ -297,14 +301,87 @@ cdef class BitMaskArray:
         return self.to_numpy()[key]
 
     def __invert__(self):
+        # TODO: could invert the buffer first then go to numpy
         return ~self.to_numpy()
 
+    def __and__(self, other):
+        cdef ndarray[uint8_t] result
+        cdef BitMaskArray other_bma
+        if isinstance(other, type(self)):
+            other_bma = other
+            if self.bitmap.size_bits == 0:
+                return np.empty(dtype=bool).reshape(self.array_shape)
+
+            buf = <uint8_t*>malloc(self.bitmap.size_bits)
+            BitMaskArray.buf_and(
+                self.bitmap.buffer.data,
+                other_bma.bitmap.buffer.data,
+                self.bitmap.size_bits // 8 + 1,
+                buf
+            )
+            result = np.empty(self.bitmap.size_bits, dtype=bool)
+            BitMaskArray.buffer_to_array_1d(
+                result,
+                buf,
+                self.bitmap.size_bits
+            )
+            free(buf)
+            return result.reshape(self.array_shape)
+
+        return self.to_numpy() & other
+
     def __or__(self, other):
+        cdef ndarray[uint8_t] result
+        cdef BitMaskArray other_bma
         if isinstance(other, type(self)):
-            return self.to_numpy() | other.to_numpy()
+            other_bma = other
+            if self.bitmap.size_bits == 0:
+                return np.empty(dtype=bool).reshape(self.array_shape)
+
+            buf = <uint8_t*>malloc(self.bitmap.size_bits)
+            BitMaskArray.buf_or(
+                self.bitmap.buffer.data,
+                other_bma.bitmap.buffer.data,
+                self.bitmap.size_bits // 8 + 1,
+                buf
+            )
+            result = np.empty(self.bitmap.size_bits, dtype=bool)
+            BitMaskArray.buffer_to_array_1d(
+                result,
+                buf,
+                self.bitmap.size_bits
+            )
+            free(buf)
+            return result.reshape(self.array_shape)
 
         return self.to_numpy() | other
 
+    def __xor__(self, other):
+        cdef ndarray[uint8_t] result
+        cdef BitMaskArray other_bma
+        if isinstance(other, type(self)):
+            other_bma = other
+            if self.bitmap.size_bits == 0:
+                return np.empty(dtype=bool).reshape(self.array_shape)
+
+            buf = <uint8_t*>malloc(self.bitmap.size_bits)
+            BitMaskArray.buf_xor(
+                self.bitmap.buffer.data,
+                other_bma.bitmap.buffer.data,
+                self.bitmap.size_bits // 8 + 1,
+                buf
+            )
+            result = np.empty(self.bitmap.size_bits, dtype=bool)
+            BitMaskArray.buffer_to_array_1d(
+                result,
+                buf,
+                self.bitmap.size_bits
+            )
+            free(buf)
+            return result.reshape(self.array_shape)
+
+        return self.to_numpy() ^ other
+
     def __reduce__(self):
         object_state = (self.to_numpy(), self.parent)
         return (_unpickle_bitmaskarray, object_state, self.parent)
@@ -314,20 +391,70 @@ cdef class BitMaskArray:
         return self.bitmap.buffer.size_bytes
 
     def any(self) -> bool:
-        # TODO: we might want to create a short circuiting implementation in
-        # nanoarrow, but even with a complete sum this is cheaper than
-        # serializing to numpy for an any call
-        return ArrowBitCountSet(self.bitmap.buffer.data, 0, self.bitmap.size_bits) > 0
+        return BitMaskArray.buf_any(self.bitmap.buffer.data, self.bitmap.size_bits)
 
     def sum(self) -> bool:
         return ArrowBitCountSet(self.bitmap.buffer.data, 0, self.bitmap.size_bits)
 
-    @cython.boundscheck(False)
+    @cython.boundscheck(False)  # TODO: Removing this causes an IndexError? Zero size?
     @cython.wraparound(False)
     @staticmethod
     cdef void buffer_to_array_1d(uint8_t[:] out, const uint8_t* buf, Py_ssize_t size):
         ArrowBitsUnpackInt8(buf, 0, size, <const int8_t*>&out[0])
 
+    @staticmethod
+    cdef bint buf_any(const uint8_t* buf1, Py_ssize_t nbits):
+        cdef Py_ssize_t i, nbytes = nbits // 8 + 1, rem = nbits % 8
+        if nbits == 0:
+            return False
+
+        for i in range(nbytes):
+            if buf1[i] > 0:
+                return True
+
+        for i in range(rem):
+            if ArrowBitGet(buf1, nbits - rem):
+                return True
+
+        return False
+
+    # TODO: clean up signatures - don't mix nbits and nbytes
+    # Note that in cases where the size_bits doesn't end on a word
+    # boundary that these will still operate on the remaining bits,
+    # with undefined values therein
+    @staticmethod
+    cdef void buf_or(
+        const uint8_t* buf1,
+        const uint8_t* buf2,
+        Py_ssize_t nbytes,
+        uint8_t* out
+    ):
+        cdef Py_ssize_t i
+        for i in range(nbytes):
+            out[i] = buf1[i] | buf2[i]
+
+    @staticmethod
+    cdef void buf_xor(
+        const uint8_t* buf1,
+        const uint8_t* buf2,
+        Py_ssize_t nbytes,
+        uint8_t* out
+    ):
+        cdef Py_ssize_t i
+        for i in range(nbytes):
+            out[i] = buf1[i] ^ buf2[i]
+
+    @staticmethod
+    cdef void buf_and(
+        const uint8_t* buf1,
+        const uint8_t* buf2,
+        Py_ssize_t nbytes,
+        uint8_t* out
+    ):
+        cdef Py_ssize_t i
+        for i in range(nbytes):
+            out[i] = buf1[i] & buf2[i]
+
     def to_numpy(self) -> ndarray:
         cdef ndarray[uint8_t] result = np.empty(self.bitmap.size_bits, dtype=bool)
         BitMaskArray.buffer_to_array_1d(

From 28b589fd90d971063392ec6222d3a67bb91c0918 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 18 Aug 2023 11:43:39 -0400
Subject: [PATCH 044/126] updated typing

---
 pandas/_libs/arrays.pyi | 4 ++++
 pandas/_libs/arrays.pyx | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi
index 55347109a8ff2..528ee56deeecd 100644
--- a/pandas/_libs/arrays.pyi
+++ b/pandas/_libs/arrays.pyi
@@ -51,8 +51,12 @@ class BitMaskArray:
     def __setitem__(self, key: PositionalIndexer, value: ArrayLike | bool) -> None: ...
     def __getitem__(self, key: PositionalIndexer) -> bool: ...
     def __invert__(self) -> np.ndarray: ...
+    def __and__(self, other: np.ndarray | Self) -> np.ndarray: ...
     def __or__(self, other: np.ndarray | Self) -> np.ndarray: ...
+    def __xor__(self, other: np.ndarray | Self) -> np.ndarray: ...
     def __reduce__(self) -> Tuple[Callable[[np.ndarray], Self], Tuple[np.ndarray]]: ...
     @property
     def nbytes(self) -> int: ...
+    def any(self) -> bool: ...
+    def sum(self) -> int: ...
     def to_numpy(self) -> np.ndarray: ...
diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index ec9f74e723738..e5ac6d1c92572 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -393,7 +393,7 @@ cdef class BitMaskArray:
     def any(self) -> bool:
         return BitMaskArray.buf_any(self.bitmap.buffer.data, self.bitmap.size_bits)
 
-    def sum(self) -> bool:
+    def sum(self) -> int:
         return ArrowBitCountSet(self.bitmap.buffer.data, 0, self.bitmap.size_bits)
 
     @cython.boundscheck(False)  # TODO: Removing this causes an IndexError? Zero size?

From e3618fbfc190c4ec5d2224e83a7c651163d491a9 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 18 Aug 2023 12:10:38 -0400
Subject: [PATCH 045/126] consolidated to_numpy()

---
 pandas/core/arrays/masked.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index dc93e58030dbe..2d22029c5fd5b 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -197,7 +197,8 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:
 
         # TODO: need to change this to special case multiple
         # indexers versus just scalar
-        newmask = self._mask.to_numpy()[item]
+        np_mask = self._mask.to_numpy()
+        newmask = np_mask[item]
         if is_bool(newmask):
             # This is a scalar indexing
             if newmask:
@@ -205,7 +206,7 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:
             return self._data[item]
 
         # sending self._mask avoids copy of buffer
-        if np.array_equal(newmask, self._mask.to_numpy()):
+        if np.array_equal(newmask, np_mask):
             return self._simple_new(self._data[item], self._mask)
 
         return self._simple_new(self._data[item], newmask)

From 4c82771407e663993aba947e859a575cf3830c99 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 18 Aug 2023 13:48:25 -0400
Subject: [PATCH 046/126] fixups

---
 pandas/_libs/arrays.pyx | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index e5ac6d1c92572..4c438bec43a7b 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -402,18 +402,20 @@ cdef class BitMaskArray:
     cdef void buffer_to_array_1d(uint8_t[:] out, const uint8_t* buf, Py_ssize_t size):
         ArrowBitsUnpackInt8(buf, 0, size, <const int8_t*>&out[0])
 
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
     @staticmethod
-    cdef bint buf_any(const uint8_t* buf1, Py_ssize_t nbits):
-        cdef Py_ssize_t i, nbytes = nbits // 8 + 1, rem = nbits % 8
+    cdef bint buf_any(const uint8_t* buf, Py_ssize_t nbits):
+        cdef Py_ssize_t i, bits_remaining = nbits % 8, size_bytes = nbits // 8
         if nbits == 0:
             return False
 
-        for i in range(nbytes):
-            if buf1[i] > 0:
+        for i in range(size_bytes):
+            if buf[i] > 0:
                 return True
 
-        for i in range(rem):
-            if ArrowBitGet(buf1, nbits - rem):
+        for i in range(bits_remaining):
+            if ArrowBitGet(buf, nbits - i - 1):
                 return True
 
         return False
@@ -422,6 +424,8 @@ cdef class BitMaskArray:
     # Note that in cases where the size_bits doesn't end on a word
     # boundary that these will still operate on the remaining bits,
     # with undefined values therein
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
     @staticmethod
     cdef void buf_or(
         const uint8_t* buf1,
@@ -433,6 +437,8 @@ cdef class BitMaskArray:
         for i in range(nbytes):
             out[i] = buf1[i] | buf2[i]
 
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
     @staticmethod
     cdef void buf_xor(
         const uint8_t* buf1,
@@ -444,6 +450,8 @@ cdef class BitMaskArray:
         for i in range(nbytes):
             out[i] = buf1[i] ^ buf2[i]
 
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
     @staticmethod
     cdef void buf_and(
         const uint8_t* buf1,

From 633935d1c368be75f90a5c2a708bae769ad413ad Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 20 Aug 2023 11:25:51 -0400
Subject: [PATCH 047/126] deferred to_numpy() calls in boolean

---
 pandas/_libs/arrays.pyi       |  3 +++
 pandas/_libs/arrays.pyx       | 10 ++++++++
 pandas/core/arrays/boolean.py | 12 +++------
 pandas/core/ops/mask_ops.py   | 47 ++++++++++++++++++++++++-----------
 4 files changed, 49 insertions(+), 23 deletions(-)

diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi
index 528ee56deeecd..285d96df0f2bb 100644
--- a/pandas/_libs/arrays.pyi
+++ b/pandas/_libs/arrays.pyi
@@ -13,6 +13,7 @@ from pandas._typing import (
     PositionalIndexer,
     Self,
     Shape,
+    type_t,
 )
 
 class NDArrayBacked:
@@ -57,6 +58,8 @@ class BitMaskArray:
     def __reduce__(self) -> Tuple[Callable[[np.ndarray], Self], Tuple[np.ndarray]]: ...
     @property
     def nbytes(self) -> int: ...
+    def shape(self) -> tuple[int, ...]: ...
+    def dtype(self) -> type_t[bool]: ...
     def any(self) -> bool: ...
     def sum(self) -> int: ...
     def to_numpy(self) -> np.ndarray: ...
diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 4c438bec43a7b..47e3fc38a698c 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -390,6 +390,16 @@ cdef class BitMaskArray:
     def nbytes(self) -> int:
         return self.bitmap.buffer.size_bytes
 
+    @property
+    def shape(self):
+        """Strictly for NumPy compat in mask_ops"""
+        return self.array_shape
+
+    @property
+    def dtype(self):
+        """Strictly for NumPy compat in mask_ops"""
+        return bool
+
     def any(self) -> bool:
         return BitMaskArray.buf_any(self.bitmap.buffer.data, self.bitmap.size_bits)
 
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index 62ae43f529204..06da136a63d86 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -363,7 +363,7 @@ def _logical_method(self, other, op):
         mask = None
 
         if isinstance(other, BooleanArray):
-            other, mask = other._data, other._mask.to_numpy()
+            other, mask = other._data, other._mask
         elif is_list_like(other):
             other = np.asarray(other, dtype="bool")
             if other.ndim > 1:
@@ -382,16 +382,12 @@ def _logical_method(self, other, op):
             raise ValueError("Lengths must match")
 
         if op.__name__ in {"or_", "ror_"}:
-            result, mask = ops.kleene_or(self._data, other, self._mask.to_numpy(), mask)
+            result, mask = ops.kleene_or(self._data, other, self._mask, mask)
         elif op.__name__ in {"and_", "rand_"}:
-            result, mask = ops.kleene_and(
-                self._data, other, self._mask.to_numpy(), mask
-            )
+            result, mask = ops.kleene_and(self._data, other, self._mask, mask)
         else:
             # i.e. xor, rxor
-            result, mask = ops.kleene_xor(
-                self._data, other, self._mask.to_numpy(), mask
-            )
+            result, mask = ops.kleene_xor(self._data, other, self._mask, mask)
 
         # i.e. BooleanArray
         return self._maybe_mask_result(result, mask)
diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py
index adc1f63c568bf..049a815296a3f 100644
--- a/pandas/core/ops/mask_ops.py
+++ b/pandas/core/ops/mask_ops.py
@@ -9,13 +9,14 @@
     lib,
     missing as libmissing,
 )
+from pandas._libs.arrays import BitMaskArray
 
 
 def kleene_or(
     left: bool | np.ndarray | libmissing.NAType,
     right: bool | np.ndarray | libmissing.NAType,
-    left_mask: np.ndarray | None,
-    right_mask: np.ndarray | None,
+    left_mask: np.ndarray | BitMaskArray | None,
+    right_mask: np.ndarray | BitMaskArray | None,
 ):
     """
     Boolean ``or`` using Kleene logic.
@@ -53,6 +54,8 @@ def kleene_or(
         result = left | right
 
     if right_mask is not None:
+        left_mask = left_mask.to_numpy()
+        right_mask = right_mask.to_numpy()
         # output is unknown where (False & NA), (NA & False), (NA & NA)
         left_false = ~(left | left_mask)
         right_false = ~(right | right_mask)
@@ -63,12 +66,13 @@ def kleene_or(
         )
     else:
         if right is True:
-            mask = np.zeros_like(left_mask)
-        elif right is libmissing.NA:
-            mask = (~left & ~left_mask) | left_mask
+            mask = np.zeros(left_mask.shape, left_mask.dtype)
         else:
-            # False
-            mask = left_mask.copy()
+            left_mask = left_mask.to_numpy()
+            if right is libmissing.NA:
+                mask = (~left & ~left_mask) | left_mask
+            else:
+                mask = left_mask
 
     return result, mask
 
@@ -76,8 +80,8 @@ def kleene_or(
 def kleene_xor(
     left: bool | np.ndarray | libmissing.NAType,
     right: bool | np.ndarray | libmissing.NAType,
-    left_mask: np.ndarray | None,
-    right_mask: np.ndarray | None,
+    left_mask: np.ndarray | BitMaskArray | None,
+    right_mask: np.ndarray | BitMaskArray | None,
 ):
     """
     Boolean ``xor`` using Kleene logic.
@@ -117,9 +121,12 @@ def kleene_xor(
 
     if right_mask is None:
         if right is libmissing.NA:
-            mask = np.ones_like(left_mask)
+            mask = np.ones(left_mask.shape, left_mask.dtype)
         else:
-            mask = left_mask.copy()
+            if isinstance(left_mask, BitMaskArray):
+                mask = left_mask.to_numpy()
+            else:
+                mask = left_mask.copy()
     else:
         mask = left_mask | right_mask
 
@@ -129,8 +136,8 @@ def kleene_xor(
 def kleene_and(
     left: bool | libmissing.NAType | np.ndarray,
     right: bool | libmissing.NAType | np.ndarray,
-    left_mask: np.ndarray | None,
-    right_mask: np.ndarray | None,
+    left_mask: np.ndarray | BitMaskArray | None,
+    right_mask: np.ndarray | BitMaskArray | None,
 ):
     """
     Boolean ``and`` using Kleene logic.
@@ -166,16 +173,26 @@ def kleene_and(
         result = left & right
 
     if right_mask is None:
+        if isinstance(left_mask, BitMaskArray):
+            left_mask = left_mask.to_numpy()
+
         # Scalar `right`
         if right is libmissing.NA:
             mask = (left & ~left_mask) | left_mask
-
         else:
-            mask = left_mask.copy()
+            if not isinstance(left_mask, BitMaskArray):  # already a copy
+                mask = left_mask.copy()
             if right is False:
                 # unmask everything
                 mask[:] = False
     else:
+        # TODO: Cython 3 changed support for radd / ror methods and may
+        # not be working? For now convert to NumPy
+        if isinstance(left_mask, BitMaskArray):
+            left_mask = left_mask.to_numpy()
+        if isinstance(right_mask, BitMaskArray):
+            right_mask = right_mask.to_numpy()
+
         # unmask where either left or right is False
         left_false = ~(left | left_mask)
         right_false = ~(right | right_mask)

From 6ed2c55c23328d1dd509cda8ffc6ee5573706358 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 20 Aug 2023 12:58:35 -0400
Subject: [PATCH 048/126] test fix

---
 pandas/core/ops/mask_ops.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py
index 049a815296a3f..a6dbb1db28af2 100644
--- a/pandas/core/ops/mask_ops.py
+++ b/pandas/core/ops/mask_ops.py
@@ -54,8 +54,10 @@ def kleene_or(
         result = left | right
 
     if right_mask is not None:
-        left_mask = left_mask.to_numpy()
-        right_mask = right_mask.to_numpy()
+        if isinstance(left_mask, BitMaskArray):
+            left_mask = left_mask.to_numpy()
+        if isinstance(right_mask, BitMaskArray):
+            right_mask = right_mask.to_numpy()
         # output is unknown where (False & NA), (NA & False), (NA & NA)
         left_false = ~(left | left_mask)
         right_false = ~(right | right_mask)

From d8e715d699d78ee63b11403934e86dc9a82cab5e Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 20 Aug 2023 15:07:26 -0400
Subject: [PATCH 049/126] take and copy implementations

---
 pandas/_libs/arrays.pyi      |  2 +
 pandas/_libs/arrays.pyx      | 81 +++++++++++++++++++++++++++++++++++-
 pandas/core/algorithms.py    | 15 +++++--
 pandas/core/arrays/masked.py | 14 +++----
 4 files changed, 98 insertions(+), 14 deletions(-)

diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi
index 285d96df0f2bb..9c53fa93f473d 100644
--- a/pandas/_libs/arrays.pyi
+++ b/pandas/_libs/arrays.pyi
@@ -62,4 +62,6 @@ class BitMaskArray:
     def dtype(self) -> type_t[bool]: ...
     def any(self) -> bool: ...
     def sum(self) -> int: ...
+    def take(self, indices: np.ndarray, axis: int, fill_value: bool) -> np.ndarray: ...
+    def copy(self) -> Self: ...
     def to_numpy(self) -> np.ndarray: ...
diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 47e3fc38a698c..78ad8bbdc4ec7 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -11,6 +11,7 @@ from libc.stdlib cimport (
     free,
     malloc,
 )
+from libc.string cimport memcpy
 from numpy cimport (
     int8_t,
     int64_t,
@@ -248,7 +249,10 @@ cdef class BitMaskArray:
         self.buffer_owner = False
         self.bitmap = bma.bitmap
 
-    def __cinit__(self, data, parent=None):
+    def __cinit__(self):
+        self.parent = False
+
+    def __init__(self, data, parent=None):
         # parent is only required to reconstruct ref-counting from pickle
         # but should not be called from user code
         if isinstance(data, np.ndarray):
@@ -272,6 +276,30 @@ cdef class BitMaskArray:
         if self.buffer_owner:
             ArrowBitmapReset(&self.bitmap)
 
+    @staticmethod
+    cdef BitMaskArray copy_from_bitmaskarray(BitMaskArray old_bma):
+        """
+        Constructs a new BitMaskArray from a bitmap pointer. Copies data
+        and manages the subsequenty lifecycle of the bitmap.
+        """
+        # Bypass __init__ calls
+        cdef BitMaskArray bma = BitMaskArray.__new__(BitMaskArray)
+        cdef uint8_t* buf
+        cdef ArrowBitmap bitmap
+        # TODO: this leaks a bit into the internals of the nanoarrow bitmap
+        # We may want to upstream a BitmapCopy function instead
+        ArrowBitmapInit(&bitmap)
+        buf = <uint8_t*>malloc(old_bma.bitmap.size_bits)
+        memcpy(buf, old_bma.bitmap.buffer.data, old_bma.bitmap.size_bits)
+        bitmap.buffer.size_bytes = old_bma.bitmap.buffer.size_bytes
+        bitmap.size_bits = old_bma.bitmap.size_bits
+        bitmap.buffer.data = buf
+
+        bma.bitmap = bitmap
+        bma.array_shape = old_bma.array_shape
+        bma.buffer_owner = True
+        return bma
+
     def __setitem__(self, key, value):
         cdef const uint8_t[:] arr1d
         cdef Py_ssize_t i = 0
@@ -406,6 +434,57 @@ cdef class BitMaskArray:
     def sum(self) -> int:
         return ArrowBitCountSet(self.bitmap.buffer.data, 0, self.bitmap.size_bits)
 
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef void c_take(
+        self,
+        const int64_t[:] indices,
+        uint8_t[:] out,
+        bint fill_value,
+        bint allow_fill
+    ):
+        # TODO: we should try and upstream this into nanoarrow with a better algo
+        cdef Py_ssize_t i
+        cdef uint8_t value
+        if not allow_fill:
+            for i in range(indices.shape[0]):
+                out[i] = ArrowBitGet(self.bitmap.buffer.data, indices[i])
+        else:
+            for i in range(indices.shape[0]):
+                value = ArrowBitGet(self.bitmap.buffer.data, indices[i])
+                if value == 1:
+                    out[i] = fill_value
+                else:
+                    out[i] = value
+
+    def take(
+        self,
+        const int64_t[:] indices,
+            int axis=0,
+            bint fill_value=0,
+            bint allow_fill=0
+    ) -> np.ndarray:
+        if axis != 0:
+            raise NotImplementedError(
+                "BitMaskArray.take only implemented for axis=0"
+            )
+
+        # TODO: would be great to check this here, though most of these functions
+        # are by definition unsafe
+        # if indices.min() < 0:
+        #     raise NotImplementedError(
+        #         "BitMaskArray.take does not support negative index values"
+        #     )
+
+        # TODO: indices.shape gave wrong number of dimensions, expected 1 got 8
+        # len(indices) works the same as long as 1d assumption holds
+        result = np.empty(len(indices), dtype=bool)
+        self.c_take(indices, result, fill_value, allow_fill)
+        return result
+
+    def copy(self):
+        return BitMaskArray.copy_from_bitmaskarray(self)
+
     @cython.boundscheck(False)  # TODO: Removing this causes an IndexError? Zero size?
     @cython.wraparound(False)
     @staticmethod
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 3861a18316563..b7c6cb2130c58 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -21,6 +21,7 @@
     iNaT,
     lib,
 )
+from pandas._libs.arrays import BitMaskArray
 from pandas._typing import (
     AnyArrayLike,
     ArrayLike,
@@ -1286,21 +1287,27 @@ def take(
     ...      fill_value=-10)
     array([ 10,  10, -10])
     """
-    if not isinstance(arr, (np.ndarray, ABCExtensionArray, ABCIndex, ABCSeries)):
+    if not isinstance(
+        arr, (np.ndarray, ABCExtensionArray, ABCIndex, ABCSeries, BitMaskArray)
+    ):
         # GH#52981
         warnings.warn(
             "pd.api.extensions.take accepting non-standard inputs is deprecated "
             "and will raise in a future version. Pass either a numpy.ndarray, "
-            "ExtensionArray, Index, or Series instead.",
+            "ExtensionArray, Index, Series, or BitMaskArray instead.",
             FutureWarning,
             stacklevel=find_stack_level(),
         )
 
-    if not is_array_like(arr):
+    if not isinstance(arr, BitMaskArray) and not is_array_like(arr):
         arr = np.asarray(arr)
 
     indices = ensure_platform_int(indices)
 
+    # BitMaskArray does not support negative indexing
+    if isinstance(arr, BitMaskArray) and indices.size > 0 and indices.min() < 0:
+        arr = arr.to_numpy()
+
     if allow_fill:
         # Pandas style, -1 means NA
         validate_indices(indices, arr.shape[axis])
@@ -1308,7 +1315,7 @@ def take(
             arr, indices, axis=axis, allow_fill=True, fill_value=fill_value
         )
     else:
-        # NumPy style
+        # NumPy / BitMaskArray style
         result = arr.take(indices, axis=axis)
     return result
 
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 2d22029c5fd5b..4b8e8e2681636 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -147,13 +147,9 @@ def __init__(
             if values.shape != mask.shape:
                 raise ValueError("values.shape must match mask.shape")
 
-            if copy:
-                values = values.copy()
-                mask = mask.copy()
-        else:
-            if copy:
-                values = values.copy()
-                mask = mask.to_numpy()
+        if copy:
+            values = values.copy()
+            mask = mask.copy()
 
         self._data = values
         self._mask = BitMaskArray(mask)
@@ -941,7 +937,7 @@ def take(
         )
 
         mask = take(
-            self._mask.to_numpy(),
+            self._mask,
             indexer,
             fill_value=True,
             allow_fill=allow_fill,
@@ -983,7 +979,7 @@ def isin(self, values) -> BooleanArray:  # type: ignore[override]
 
     def copy(self) -> Self:
         data = self._data.copy()
-        mask = self._mask.to_numpy()
+        mask = self._mask.copy()
         return self._simple_new(data, mask)
 
     def unique(self) -> Self:

From 37ccec332e47340a6bdb20d929f10a8cbc58c473 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 20 Aug 2023 15:31:39 -0400
Subject: [PATCH 050/126] small optimization

---
 pandas/_libs/arrays.pyx | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 78ad8bbdc4ec7..edee22f57a01d 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -495,14 +495,16 @@ cdef class BitMaskArray:
     @cython.wraparound(False)
     @staticmethod
     cdef bint buf_any(const uint8_t* buf, Py_ssize_t nbits):
-        cdef Py_ssize_t i, bits_remaining = nbits % 8, size_bytes = nbits // 8
-        if nbits == 0:
+        cdef Py_ssize_t i, bits_remaining, size_bytes
+        if nbits < 1:
             return False
 
         for i in range(size_bytes):
             if buf[i] > 0:
                 return True
 
+        bits_remaining = nbits % 8
+        size_bytes = nbits // 8
         for i in range(bits_remaining):
             if ArrowBitGet(buf, nbits - i - 1):
                 return True

From 5436b04644a237abb853146577e0fbdeaf5161b4 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 20 Aug 2023 18:09:31 -0400
Subject: [PATCH 051/126] simplified buf passing and fixed bugs

---
 pandas/_libs/arrays.pyx | 80 +++++++++++++++++++++++------------------
 1 file changed, 45 insertions(+), 35 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index edee22f57a01d..fdac88f165ab4 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -340,13 +340,11 @@ cdef class BitMaskArray:
             if self.bitmap.size_bits == 0:
                 return np.empty(dtype=bool).reshape(self.array_shape)
 
+            if self.bitmap.size_bits != other_bma.bitmap.size_bits:
+                raise ValueError("bitmaps are not equal size")
+
             buf = <uint8_t*>malloc(self.bitmap.size_bits)
-            BitMaskArray.buf_and(
-                self.bitmap.buffer.data,
-                other_bma.bitmap.buffer.data,
-                self.bitmap.size_bits // 8 + 1,
-                buf
-            )
+            BitMaskArray.buf_and(&self.bitmap, &other_bma.bitmap, buf)
             result = np.empty(self.bitmap.size_bits, dtype=bool)
             BitMaskArray.buffer_to_array_1d(
                 result,
@@ -366,13 +364,11 @@ cdef class BitMaskArray:
             if self.bitmap.size_bits == 0:
                 return np.empty(dtype=bool).reshape(self.array_shape)
 
+            if self.bitmap.size_bits != other_bma.bitmap.size_bits:
+                raise ValueError("bitmaps are not equal size")
+
             buf = <uint8_t*>malloc(self.bitmap.size_bits)
-            BitMaskArray.buf_or(
-                self.bitmap.buffer.data,
-                other_bma.bitmap.buffer.data,
-                self.bitmap.size_bits // 8 + 1,
-                buf
-            )
+            BitMaskArray.buf_or(&self.bitmap, &other_bma.bitmap, buf)
             result = np.empty(self.bitmap.size_bits, dtype=bool)
             BitMaskArray.buffer_to_array_1d(
                 result,
@@ -392,13 +388,11 @@ cdef class BitMaskArray:
             if self.bitmap.size_bits == 0:
                 return np.empty(dtype=bool).reshape(self.array_shape)
 
+            if self.bitmap.size_bits != other_bma.bitmap.size_bits:
+                raise ValueError("bitmaps are not equal size")
+
             buf = <uint8_t*>malloc(self.bitmap.size_bits)
-            BitMaskArray.buf_xor(
-                self.bitmap.buffer.data,
-                other_bma.bitmap.buffer.data,
-                self.bitmap.size_bits // 8 + 1,
-                buf
-            )
+            BitMaskArray.buf_xor(&self.bitmap, &other_bma.bitmap, buf)
             result = np.empty(self.bitmap.size_bits, dtype=bool)
             BitMaskArray.buffer_to_array_1d(
                 result,
@@ -429,7 +423,7 @@ cdef class BitMaskArray:
         return bool
 
     def any(self) -> bool:
-        return BitMaskArray.buf_any(self.bitmap.buffer.data, self.bitmap.size_bits)
+        return BitMaskArray.buf_any(&self.bitmap)
 
     def sum(self) -> int:
         return ArrowBitCountSet(self.bitmap.buffer.data, 0, self.bitmap.size_bits)
@@ -494,19 +488,20 @@ cdef class BitMaskArray:
     @cython.boundscheck(False)
     @cython.wraparound(False)
     @staticmethod
-    cdef bint buf_any(const uint8_t* buf, Py_ssize_t nbits):
-        cdef Py_ssize_t i, bits_remaining, size_bytes
-        if nbits < 1:
+    cdef bint buf_any(const ArrowBitmap* bitmap):
+        cdef Py_ssize_t i, bits_remaining
+        cdef int64_t size_bits = bitmap.size_bits
+        cdef const uint8_t* buf = bitmap.buffer.data
+        if size_bits < 1:
             return False
 
-        for i in range(size_bytes):
+        for i in range(bitmap.buffer.size_bytes):
             if buf[i] > 0:
                 return True
 
-        bits_remaining = nbits % 8
-        size_bytes = nbits // 8
+        bits_remaining = size_bits % 8
         for i in range(bits_remaining):
-            if ArrowBitGet(buf, nbits - i - 1):
+            if ArrowBitGet(buf, size_bits - i - 1):
                 return True
 
         return False
@@ -519,12 +514,17 @@ cdef class BitMaskArray:
     @cython.wraparound(False)
     @staticmethod
     cdef void buf_or(
-        const uint8_t* buf1,
-        const uint8_t* buf2,
-        Py_ssize_t nbytes,
+        const ArrowBitmap* bitmap1,
+        const ArrowBitmap* bitmap2,
         uint8_t* out
     ):
         cdef Py_ssize_t i
+        cdef const uint8_t* buf1 = bitmap1.buffer.data
+        cdef const uint8_t* buf2 = bitmap2.buffer.data
+        # Assumed caller has checked that bitmaps are equal,
+        # otherwise trailing comparison is undefined
+        cdef int64_t nbytes = bitmap1.buffer.size_bytes
+
         for i in range(nbytes):
             out[i] = buf1[i] | buf2[i]
 
@@ -532,12 +532,17 @@ cdef class BitMaskArray:
     @cython.wraparound(False)
     @staticmethod
     cdef void buf_xor(
-        const uint8_t* buf1,
-        const uint8_t* buf2,
-        Py_ssize_t nbytes,
+        const ArrowBitmap* bitmap1,
+        const ArrowBitmap* bitmap2,
         uint8_t* out
     ):
         cdef Py_ssize_t i
+        cdef const uint8_t* buf1 = bitmap1.buffer.data
+        cdef const uint8_t* buf2 = bitmap2.buffer.data
+        # Assumed caller has checked that bitmaps are equal,
+        # otherwise trailing comparison is undefined
+        cdef int64_t nbytes = bitmap1.buffer.size_bytes
+
         for i in range(nbytes):
             out[i] = buf1[i] ^ buf2[i]
 
@@ -545,12 +550,17 @@ cdef class BitMaskArray:
     @cython.wraparound(False)
     @staticmethod
     cdef void buf_and(
-        const uint8_t* buf1,
-        const uint8_t* buf2,
-        Py_ssize_t nbytes,
+        const ArrowBitmap* bitmap1,
+        const ArrowBitmap* bitmap2,
         uint8_t* out
     ):
         cdef Py_ssize_t i
+        cdef const uint8_t* buf1 = bitmap1.buffer.data
+        cdef const uint8_t* buf2 = bitmap2.buffer.data
+        # Assumed caller has checked that bitmaps are equal,
+        # otherwise trailing comparison is undefined
+        cdef int64_t nbytes = bitmap1.buffer.size_bytes
+
         for i in range(nbytes):
             out[i] = buf1[i] & buf2[i]
 

From b4aa12d3ca0158995d3e70323a4f9d30a8d048ea Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 20 Aug 2023 19:23:55 -0400
Subject: [PATCH 052/126] setitem fastpaths

---
 pandas/_libs/arrays.pyx | 44 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 39 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index fdac88f165ab4..545035f397142 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -18,6 +18,7 @@ from numpy cimport (
     ndarray,
     uint8_t,
 )
+from pandas.core.common import is_empty_slice
 
 cnp.import_array()
 
@@ -37,6 +38,7 @@ cdef extern from "pandas/vendored/nanoarrow.h":
     void ArrowBitsUnpackInt8(const uint8_t*, int64_t, int64_t, int8_t*)
     int8_t ArrowBitGet(const uint8_t*, int64_t)
     void ArrowBitSetTo(uint8_t*, int64_t, uint8_t)
+    void ArrowBitsSetTo(uint8_t*, int64_t, int64_t, uint8_t)
     int64_t ArrowBitCountSet(const uint8_t*, int64_t, int64_t)
 
 
@@ -300,6 +302,21 @@ cdef class BitMaskArray:
         bma.buffer_owner = True
         return bma
 
+    def __len__(self):
+        return self.bitmap.size_bits
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef _set_scalar_value_from_equal_sized_array(
+        self,
+        const uint8_t[:] data,
+        bint value
+    ):
+        cdef Py_ssize_t i
+        for i in range(self.bitmap.size_bits):
+            if data[i]:
+                ArrowBitSetTo(self.bitmap.buffer.data, i, value)
+
     def __setitem__(self, key, value):
         cdef const uint8_t[:] arr1d
         cdef Py_ssize_t i = 0
@@ -313,11 +330,28 @@ cdef class BitMaskArray:
                 ArrowBitSetTo(self.bitmap.buffer.data, ckey, cvalue)
                 return
 
-        arr = self.to_numpy()
-        arr[key] = value
-        arr1d = arr.ravel()
-        for i in range(arr1d.shape[0]):
-            ArrowBitSetTo(self.bitmap.buffer.data, i, arr1d[i])
+        # TODO: implement fastpaths here for equal sized containers
+        # to avoid the to_numpy() call
+        if is_empty_slice(key) and isinstance(value, (int, bool)):
+            cvalue = value  # blindly assuming ints are 0 or 1
+            ArrowBitsSetTo(
+                self.bitmap.buffer.data,
+                0,
+                self.bitmap.size_bits,
+                cvalue
+            )
+        elif (
+                isinstance(key, np.ndarray)
+                and key.dtype == bool
+                and isinstance(value, (int, bool))
+        ):
+            self._set_scalar_value_from_equal_sized_array(key, value)
+        else:
+            arr = self.to_numpy()
+            arr[key] = value
+            arr1d = arr.ravel()
+            for i in range(arr1d.shape[0]):
+                ArrowBitSetTo(self.bitmap.buffer.data, i, arr1d[i])
 
     def __getitem__(self, key):
         cdef Py_ssize_t ckey

From 8c5cd15bb0226514124588795d23dc0caa56f2d2 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 20 Aug 2023 20:51:34 -0400
Subject: [PATCH 053/126] cython < 3 compat

---
 pandas/_libs/arrays.pyx | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 545035f397142..6dce16ecd4280 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -369,7 +369,9 @@ cdef class BitMaskArray:
     def __and__(self, other):
         cdef ndarray[uint8_t] result
         cdef BitMaskArray other_bma
+        cdef ArrowBitmap bitmap
         if isinstance(other, type(self)):
+            bitmap = self.bitmap
             other_bma = other
             if self.bitmap.size_bits == 0:
                 return np.empty(dtype=bool).reshape(self.array_shape)
@@ -378,7 +380,7 @@ cdef class BitMaskArray:
                 raise ValueError("bitmaps are not equal size")
 
             buf = <uint8_t*>malloc(self.bitmap.size_bits)
-            BitMaskArray.buf_and(&self.bitmap, &other_bma.bitmap, buf)
+            BitMaskArray.buf_and(&bitmap, &other_bma.bitmap, buf)
             result = np.empty(self.bitmap.size_bits, dtype=bool)
             BitMaskArray.buffer_to_array_1d(
                 result,
@@ -393,8 +395,10 @@ cdef class BitMaskArray:
     def __or__(self, other):
         cdef ndarray[uint8_t] result
         cdef BitMaskArray other_bma
+        cdef ArrowBitmap bitmap
         if isinstance(other, type(self)):
             other_bma = other
+            bitmap = self.bitmap  # Cython >= 3 can just use &self.bitmap in calls
             if self.bitmap.size_bits == 0:
                 return np.empty(dtype=bool).reshape(self.array_shape)
 
@@ -402,7 +406,7 @@ cdef class BitMaskArray:
                 raise ValueError("bitmaps are not equal size")
 
             buf = <uint8_t*>malloc(self.bitmap.size_bits)
-            BitMaskArray.buf_or(&self.bitmap, &other_bma.bitmap, buf)
+            BitMaskArray.buf_or(&bitmap, &other_bma.bitmap, buf)
             result = np.empty(self.bitmap.size_bits, dtype=bool)
             BitMaskArray.buffer_to_array_1d(
                 result,
@@ -417,8 +421,10 @@ cdef class BitMaskArray:
     def __xor__(self, other):
         cdef ndarray[uint8_t] result
         cdef BitMaskArray other_bma
+        cdef ArrowBitmap bitmap
         if isinstance(other, type(self)):
             other_bma = other
+            bitmap = self.bitmap  # Cython >= 3 can just use &self.bitmap in calls
             if self.bitmap.size_bits == 0:
                 return np.empty(dtype=bool).reshape(self.array_shape)
 
@@ -426,7 +432,7 @@ cdef class BitMaskArray:
                 raise ValueError("bitmaps are not equal size")
 
             buf = <uint8_t*>malloc(self.bitmap.size_bits)
-            BitMaskArray.buf_xor(&self.bitmap, &other_bma.bitmap, buf)
+            BitMaskArray.buf_xor(&bitmap, &other_bma.bitmap, buf)
             result = np.empty(self.bitmap.size_bits, dtype=bool)
             BitMaskArray.buffer_to_array_1d(
                 result,

From e904e1888790801727fcd574cb83c6814cc5af30 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 20 Aug 2023 22:25:24 -0400
Subject: [PATCH 054/126] Revert "simplified buf passing and fixed bugs"

This reverts commit 5436b04644a237abb853146577e0fbdeaf5161b4.
---
 pandas/_libs/arrays.pyx | 51 +++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 6dce16ecd4280..78c3bdd595dd8 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -368,24 +368,23 @@ cdef class BitMaskArray:
 
     def __and__(self, other):
         cdef ndarray[uint8_t] result
-        cdef BitMaskArray other_bma
-        cdef ArrowBitmap bitmap
+        cdef BitMaskArray other_bma, self_ = self  # self_ required for Cython < 3
+
         if isinstance(other, type(self)):
-            bitmap = self.bitmap
             other_bma = other
-            if self.bitmap.size_bits == 0:
+            if self_.bitmap.size_bits == 0:
                 return np.empty(dtype=bool).reshape(self.array_shape)
 
-            if self.bitmap.size_bits != other_bma.bitmap.size_bits:
+            if self_.bitmap.size_bits != other_bma.bitmap.size_bits:
                 raise ValueError("bitmaps are not equal size")
 
-            buf = <uint8_t*>malloc(self.bitmap.size_bits)
-            BitMaskArray.buf_and(&bitmap, &other_bma.bitmap, buf)
-            result = np.empty(self.bitmap.size_bits, dtype=bool)
+            buf = <uint8_t*>malloc(self_.bitmap.size_bits)
+            BitMaskArray.buf_and(&self_.bitmap, &other_bma.bitmap, buf)
+            result = np.empty(self_.bitmap.size_bits, dtype=bool)
             BitMaskArray.buffer_to_array_1d(
                 result,
                 buf,
-                self.bitmap.size_bits
+                self_.bitmap.size_bits
             )
             free(buf)
             return result.reshape(self.array_shape)
@@ -394,24 +393,23 @@ cdef class BitMaskArray:
 
     def __or__(self, other):
         cdef ndarray[uint8_t] result
-        cdef BitMaskArray other_bma
-        cdef ArrowBitmap bitmap
+        cdef BitMaskArray other_bma, self_ = self  # self_ required for Cython < 3
+
         if isinstance(other, type(self)):
             other_bma = other
-            bitmap = self.bitmap  # Cython >= 3 can just use &self.bitmap in calls
-            if self.bitmap.size_bits == 0:
+            if self_.bitmap.size_bits == 0:
                 return np.empty(dtype=bool).reshape(self.array_shape)
 
-            if self.bitmap.size_bits != other_bma.bitmap.size_bits:
+            if self_.bitmap.size_bits != other_bma.bitmap.size_bits:
                 raise ValueError("bitmaps are not equal size")
 
-            buf = <uint8_t*>malloc(self.bitmap.size_bits)
-            BitMaskArray.buf_or(&bitmap, &other_bma.bitmap, buf)
-            result = np.empty(self.bitmap.size_bits, dtype=bool)
+            buf = <uint8_t*>malloc(self_.bitmap.size_bits)
+            BitMaskArray.buf_or(&self_.bitmap, &other_bma.bitmap, buf)
+            result = np.empty(self_.bitmap.size_bits, dtype=bool)
             BitMaskArray.buffer_to_array_1d(
                 result,
                 buf,
-                self.bitmap.size_bits
+                self_.bitmap.size_bits
             )
             free(buf)
             return result.reshape(self.array_shape)
@@ -420,24 +418,23 @@ cdef class BitMaskArray:
 
     def __xor__(self, other):
         cdef ndarray[uint8_t] result
-        cdef BitMaskArray other_bma
-        cdef ArrowBitmap bitmap
+        cdef BitMaskArray other_bma, self_ = self  # self_ required for Cython < 3
+
         if isinstance(other, type(self)):
             other_bma = other
-            bitmap = self.bitmap  # Cython >= 3 can just use &self.bitmap in calls
-            if self.bitmap.size_bits == 0:
+            if self_.bitmap.size_bits == 0:
                 return np.empty(dtype=bool).reshape(self.array_shape)
 
-            if self.bitmap.size_bits != other_bma.bitmap.size_bits:
+            if self_.bitmap.size_bits != other_bma.bitmap.size_bits:
                 raise ValueError("bitmaps are not equal size")
 
-            buf = <uint8_t*>malloc(self.bitmap.size_bits)
-            BitMaskArray.buf_xor(&bitmap, &other_bma.bitmap, buf)
-            result = np.empty(self.bitmap.size_bits, dtype=bool)
+            buf = <uint8_t*>malloc(self_.bitmap.size_bits)
+            BitMaskArray.buf_xor(&self_.bitmap, &other_bma.bitmap, buf)
+            result = np.empty(self_.bitmap.size_bits, dtype=bool)
             BitMaskArray.buffer_to_array_1d(
                 result,
                 buf,
-                self.bitmap.size_bits
+                self_.bitmap.size_bits
             )
             free(buf)
             return result.reshape(self.array_shape)

From c218e5127fb18cb5b6a0acccf570673e9ec96ae0 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 20 Aug 2023 22:28:26 -0400
Subject: [PATCH 055/126] implemented all

---
 pandas/_libs/arrays.pyx      | 24 ++++++++++++++++++++++++
 pandas/core/arrays/masked.py | 11 +++++++++++
 2 files changed, 35 insertions(+)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 78c3bdd595dd8..fb4e359219e20 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -462,6 +462,9 @@ cdef class BitMaskArray:
     def any(self) -> bool:
         return BitMaskArray.buf_any(&self.bitmap)
 
+    def all(self) -> bool:
+        return BitMaskArray.buf_all(&self.bitmap)
+
     def sum(self) -> int:
         return ArrowBitCountSet(self.bitmap.buffer.data, 0, self.bitmap.size_bits)
 
@@ -543,6 +546,27 @@ cdef class BitMaskArray:
 
         return False
 
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @staticmethod
+    cdef bint buf_all(const ArrowBitmap* bitmap):
+        cdef Py_ssize_t i, bits_remaining
+        cdef int64_t size_bits = bitmap.size_bits
+        cdef const uint8_t* buf = bitmap.buffer.data
+        if size_bits < 1:
+            return True
+
+        for i in range(bitmap.buffer.size_bytes):
+            if buf[i] != 256:
+                return False
+
+        bits_remaining = size_bits % 8
+        for i in range(bits_remaining):
+            if ArrowBitGet(buf, size_bits - i - 1) == 0:
+                return False
+
+        return True
+
     # TODO: clean up signatures - don't mix nbits and nbytes
     # Note that in cases where the size_bits doesn't end on a word
     # boundary that these will still operate on the remaining bits,
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 4b8e8e2681636..746afdd820247 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -1383,6 +1383,17 @@ def any(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         """
         nv.validate_any((), kwargs)
 
+        # attempt to avoid to_numpy call on mask for best performance
+        is_all_na = self._mask.all()
+        if is_all_na and skipna or len(self) == 0:
+            return False
+        if not skipna and not is_all_na:
+            return True
+        if not skipna and self._mask.any():
+            return self.dtype.na_value
+
+        # fallback to numpy - will be slower
+        # TODO: some of these conditions are likely duplicative of above checks
         values = self._data.copy()
         # error: Argument 3 to "putmask" has incompatible type "object";
         # expected "Union[_SupportsArray[dtype[Any]],

From 9cf54f9d8bd7585eccf64e099cfc551cfe4a8255 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 20 Aug 2023 22:49:35 -0400
Subject: [PATCH 056/126] faster any

---
 pandas/core/arrays/masked.py | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 746afdd820247..1f52b32160302 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -1385,29 +1385,31 @@ def any(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
 
         # attempt to avoid to_numpy call on mask for best performance
         is_all_na = self._mask.all()
-        if is_all_na and skipna or len(self) == 0:
+        is_any_na = self._mask.any()
+        if len(self) == 0 or (skipna and is_all_na):
             return False
-        if not skipna and not is_all_na:
-            return True
-        if not skipna and self._mask.any():
-            return self.dtype.na_value
 
-        # fallback to numpy - will be slower
-        # TODO: some of these conditions are likely duplicative of above checks
-        values = self._data.copy()
-        # error: Argument 3 to "putmask" has incompatible type "object";
-        # expected "Union[_SupportsArray[dtype[Any]],
-        # _NestedSequence[_SupportsArray[dtype[Any]]],
-        # bool, int, float, complex, str, bytes,
-        # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
-        np.putmask(
-            values, self._mask.to_numpy(), self._falsey_value  # type: ignore[arg-type]
-        )
+        if is_any_na:
+            # fallback to numpy - will be slower
+            values = self._data.copy()
+            # error: Argument 3 to "putmask" has incompatible type "object";
+            # expected "Union[_SupportsArray[dtype[Any]],
+            # _NestedSequence[_SupportsArray[dtype[Any]]],
+            # bool, int, float, complex, str, bytes,
+            # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
+            np.putmask(
+                values,
+                self._mask.to_numpy(),
+                self._falsey_value,  # type: ignore[arg-type]
+            )
+        else:
+            values = self._data
+
         result = values.any()
         if skipna:
             return result
         else:
-            if result or len(self) == 0 or not self._mask.any():
+            if result or not is_any_na:
                 return result
             else:
                 return self.dtype.na_value

From 4f6d035ca16fbf72c28d7f86ffe2aedbc1338bf7 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 20 Aug 2023 22:53:55 -0400
Subject: [PATCH 057/126] faster all implementation

---
 pandas/core/arrays/masked.py | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 1f52b32160302..8c39aaf8994d6 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -1479,21 +1479,33 @@ def all(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         """
         nv.validate_all((), kwargs)
 
-        values = self._data.copy()
-        # error: Argument 3 to "putmask" has incompatible type "object";
-        # expected "Union[_SupportsArray[dtype[Any]],
-        # _NestedSequence[_SupportsArray[dtype[Any]]],
-        # bool, int, float, complex, str, bytes,
-        # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
-        np.putmask(
-            values, self._mask.to_numpy(), self._truthy_value  # type: ignore[arg-type]
-        )
+        # attempt to avoid to_numpy call on mask for best performance
+        is_all_na = self._mask.all()
+        is_any_na = self._mask.any()
+        if len(self) == 0 or (skipna and is_all_na):
+            return True
+
+        if is_any_na:
+            values = self._data.copy()
+            # error: Argument 3 to "putmask" has incompatible type "object";
+            # expected "Union[_SupportsArray[dtype[Any]],
+            # _NestedSequence[_SupportsArray[dtype[Any]]],
+            # bool, int, float, complex, str, bytes,
+            # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
+            np.putmask(
+                values,
+                self._mask.to_numpy(),
+                self._truthy_value,  # type: ignore[arg-type]
+            )
+        else:
+            values = self._data
+
         result = values.all(axis=axis)
 
         if skipna:
             return result
         else:
-            if not result or len(self) == 0 or not self._mask.any():
+            if not result or not self._mask.any():
                 return result
             else:
                 return self.dtype.na_value

From dca1c65fe10001cccc8b82cc0c80959f912aebf6 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 20 Aug 2023 23:31:50 -0400
Subject: [PATCH 058/126] faster reshape

---
 pandas/_libs/arrays.pyi      |  1 +
 pandas/_libs/arrays.pyx      | 33 ++++++++++++++++++++++++++++-----
 pandas/core/arrays/masked.py |  7 ++-----
 3 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi
index 9c53fa93f473d..5d5400ad044b0 100644
--- a/pandas/_libs/arrays.pyi
+++ b/pandas/_libs/arrays.pyi
@@ -52,6 +52,7 @@ class BitMaskArray:
     def __setitem__(self, key: PositionalIndexer, value: ArrayLike | bool) -> None: ...
     def __getitem__(self, key: PositionalIndexer) -> bool: ...
     def __invert__(self) -> np.ndarray: ...
+    def __eq__(self, other) -> bool: ...
     def __and__(self, other: np.ndarray | Self) -> np.ndarray: ...
     def __or__(self, other: np.ndarray | Self) -> np.ndarray: ...
     def __xor__(self, other: np.ndarray | Self) -> np.ndarray: ...
diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index fb4e359219e20..9be3750dd72c8 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -11,14 +11,18 @@ from libc.stdlib cimport (
     free,
     malloc,
 )
-from libc.string cimport memcpy
+from libc.string cimport (
+    memcmp,
+    memcpy,
+)
 from numpy cimport (
     int8_t,
     int64_t,
     ndarray,
     uint8_t,
 )
-from pandas.core.common import is_empty_slice
+
+from pandas.core.common import is_null_slice
 
 cnp.import_array()
 
@@ -292,7 +296,7 @@ cdef class BitMaskArray:
         # We may want to upstream a BitmapCopy function instead
         ArrowBitmapInit(&bitmap)
         buf = <uint8_t*>malloc(old_bma.bitmap.size_bits)
-        memcpy(buf, old_bma.bitmap.buffer.data, old_bma.bitmap.size_bits)
+        memcpy(buf, old_bma.bitmap.buffer.data, old_bma.bitmap.buffer.size_bytes)
         bitmap.buffer.size_bytes = old_bma.bitmap.buffer.size_bytes
         bitmap.size_bits = old_bma.bitmap.size_bits
         bitmap.buffer.data = buf
@@ -332,7 +336,7 @@ cdef class BitMaskArray:
 
         # TODO: implement fastpaths here for equal sized containers
         # to avoid the to_numpy() call
-        if is_empty_slice(key) and isinstance(value, (int, bool)):
+        if is_null_slice(key) and isinstance(value, (int, bool)):
             cvalue = value  # blindly assuming ints are 0 or 1
             ArrowBitsSetTo(
                 self.bitmap.buffer.data,
@@ -355,10 +359,13 @@ cdef class BitMaskArray:
 
     def __getitem__(self, key):
         cdef Py_ssize_t ckey
+        # to_numpy can be expensive, so try to avoid for simple cases
         if isinstance(key, int):
             ckey = key
             if ckey >= 0 and ckey < self.bitmap.size_bits:
-                return ArrowBitGet(self.bitmap.buffer.data, ckey)
+                return bool(ArrowBitGet(self.bitmap.buffer.data, ckey))
+        elif is_null_slice(key):
+            return self.copy()
 
         return self.to_numpy()[key]
 
@@ -366,6 +373,22 @@ cdef class BitMaskArray:
         # TODO: could invert the buffer first then go to numpy
         return ~self.to_numpy()
 
+    def __eq__(self, other):
+        cdef BitMaskArray other_bma
+        if isinstance(other, type(self)):
+            other_bma = other
+            if (
+                    self.bitmap.size_bits == other_bma.bitmap.size_bits
+                    and memcmp(
+                        self.bitmap.buffer.data,
+                        other_bma.bitmap.buffer.data,
+                        self.bitmap.buffer.size_bytes
+                    ) == 0
+            ):
+                return True
+
+        return False
+
     def __and__(self, other):
         cdef ndarray[uint8_t] result
         cdef BitMaskArray other_bma, self_ = self  # self_ required for Cython < 3
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 8c39aaf8994d6..d3d1c3b5f5c52 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -191,10 +191,7 @@ def __getitem__(self, item: SequenceIndexer) -> Self:
     def __getitem__(self, item: PositionalIndexer) -> Self | Any:
         item = check_array_indexer(self, item)
 
-        # TODO: need to change this to special case multiple
-        # indexers versus just scalar
-        np_mask = self._mask.to_numpy()
-        newmask = np_mask[item]
+        newmask = self._mask[item]
         if is_bool(newmask):
             # This is a scalar indexing
             if newmask:
@@ -202,7 +199,7 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:
             return self._data[item]
 
         # sending self._mask avoids copy of buffer
-        if np.array_equal(newmask, np_mask):
+        if isinstance(newmask, BitMaskArray) and newmask == self._mask:
             return self._simple_new(self._data[item], self._mask)
 
         return self._simple_new(self._data[item], newmask)

From 946c892d9d35e72f2c43fe91512144cfc4c9d442 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 21 Aug 2023 07:50:40 -0400
Subject: [PATCH 059/126] Faster is_null_slice implementation

---
 pandas/_libs/arrays.pyx |  2 +-
 pandas/_libs/lib.pyx    | 18 ++++++++++++++++++
 pandas/core/common.py   |  7 +------
 3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 9be3750dd72c8..c5ff6d6c7dcb9 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -22,7 +22,7 @@ from numpy cimport (
     uint8_t,
 )
 
-from pandas.core.common import is_null_slice
+from pandas._libs.lib import is_null_slice
 
 cnp.import_array()
 
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 2681115bbdcfb..228ad078c6927 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -8,6 +8,7 @@ from typing import (
 )
 
 cimport cython
+from cpython cimport PyErr_Clear
 from cpython.datetime cimport (
     PyDate_Check,
     PyDateTime_Check,
@@ -29,6 +30,7 @@ from cpython.object cimport (
 )
 from cpython.ref cimport Py_INCREF
 from cpython.sequence cimport PySequence_Check
+from cpython.slice cimport PySlice_Unpack
 from cpython.tuple cimport (
     PyTuple_New,
     PyTuple_SET_ITEM,
@@ -71,6 +73,7 @@ cdef extern from "Python.h":
     # Note: importing extern-style allows us to declare these as nogil
     # functions, whereas `from cpython cimport` does not.
     bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil
+    cdef Py_ssize_t PY_SSIZE_T_MAX
 
 cdef extern from "numpy/arrayobject.h":
     # cython's numpy.dtype specification is incorrect, which leads to
@@ -1234,6 +1237,21 @@ def is_pyarrow_array(obj):
     return False
 
 
+def is_null_slice(obj):
+    """
+    Return True if given object
+    """
+    cdef Py_ssize_t start, stop, step
+    if isinstance(obj, slice):
+        if PySlice_Unpack(obj, &start, &stop, &step) == 0:
+            if start == 0 and stop == PY_SSIZE_T_MAX and step == 1:
+                return True
+        else:
+            PyErr_Clear()
+
+    return False
+
+
 _TYPE_MAP = {
     "categorical": "categorical",
     "category": "categorical",
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 6d419098bf279..73e2e276a7a00 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -307,12 +307,7 @@ def is_null_slice(obj) -> bool:
     """
     We have a null slice.
     """
-    return (
-        isinstance(obj, slice)
-        and obj.start is None
-        and obj.stop is None
-        and obj.step is None
-    )
+    return lib.is_null_sice(obj)
 
 
 def is_empty_slice(obj) -> bool:

From 1eb0e0120a984d57d862f9ed81457e6577efeb75 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 21 Aug 2023 09:42:09 -0400
Subject: [PATCH 060/126] revert troublesome __getitem__ enhancements

---
 pandas/_libs/arrays.pyi      |  1 -
 pandas/_libs/arrays.pyx      | 21 +--------------------
 pandas/core/arrays/masked.py |  6 +++++-
 3 files changed, 6 insertions(+), 22 deletions(-)

diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi
index 5d5400ad044b0..9c53fa93f473d 100644
--- a/pandas/_libs/arrays.pyi
+++ b/pandas/_libs/arrays.pyi
@@ -52,7 +52,6 @@ class BitMaskArray:
     def __setitem__(self, key: PositionalIndexer, value: ArrayLike | bool) -> None: ...
     def __getitem__(self, key: PositionalIndexer) -> bool: ...
     def __invert__(self) -> np.ndarray: ...
-    def __eq__(self, other) -> bool: ...
     def __and__(self, other: np.ndarray | Self) -> np.ndarray: ...
     def __or__(self, other: np.ndarray | Self) -> np.ndarray: ...
     def __xor__(self, other: np.ndarray | Self) -> np.ndarray: ...
diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index c5ff6d6c7dcb9..fb428ddbc2c8e 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -11,10 +11,7 @@ from libc.stdlib cimport (
     free,
     malloc,
 )
-from libc.string cimport (
-    memcmp,
-    memcpy,
-)
+from libc.string cimport memcpy
 from numpy cimport (
     int8_t,
     int64_t,
@@ -373,22 +370,6 @@ cdef class BitMaskArray:
         # TODO: could invert the buffer first then go to numpy
         return ~self.to_numpy()
 
-    def __eq__(self, other):
-        cdef BitMaskArray other_bma
-        if isinstance(other, type(self)):
-            other_bma = other
-            if (
-                    self.bitmap.size_bits == other_bma.bitmap.size_bits
-                    and memcmp(
-                        self.bitmap.buffer.data,
-                        other_bma.bitmap.buffer.data,
-                        self.bitmap.buffer.size_bytes
-                    ) == 0
-            ):
-                return True
-
-        return False
-
     def __and__(self, other):
         cdef ndarray[uint8_t] result
         cdef BitMaskArray other_bma, self_ = self  # self_ required for Cython < 3
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index d3d1c3b5f5c52..7592f37567255 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -191,7 +191,11 @@ def __getitem__(self, item: SequenceIndexer) -> Self:
     def __getitem__(self, item: PositionalIndexer) -> Self | Any:
         item = check_array_indexer(self, item)
 
-        newmask = self._mask[item]
+        # TODO: some of the numpy semantics for handling 2D indexing
+        # are not implemented in the bitmaskarray, hence the to_numpy()
+        # requirement, though that slows things down
+        np_mask = self._mask.to_numpy()
+        newmask = np_mask[item]
         if is_bool(newmask):
             # This is a scalar indexing
             if newmask:

From 07594d644a4e332ce3fffb6039473cd6ec4358e1 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 21 Aug 2023 10:01:38 -0400
Subject: [PATCH 061/126] typo fixup

---
 pandas/core/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/common.py b/pandas/core/common.py
index b93506d14da87..d75cf52149bf9 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -307,7 +307,7 @@ def is_null_slice(obj) -> bool:
     """
     We have a null slice.
     """
-    return lib.is_null_sice(obj)
+    return lib.is_null_slice(obj)
 
 
 def is_empty_slice(obj) -> bool:

From d30b6138f49b727fefac0faabc21ad3a02232dc4 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 21 Aug 2023 10:06:01 -0400
Subject: [PATCH 062/126] finish revert

---
 pandas/core/arrays/masked.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 7592f37567255..d5a5b481bb3c7 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -203,7 +203,7 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:
             return self._data[item]
 
         # sending self._mask avoids copy of buffer
-        if isinstance(newmask, BitMaskArray) and newmask == self._mask:
+        if np.array_equal(newmask, np_mask):
             return self._simple_new(self._data[item], self._mask)
 
         return self._simple_new(self._data[item], newmask)

From 34ac61318927881649bc200348573235717c9226 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 21 Aug 2023 12:25:57 -0400
Subject: [PATCH 063/126] reshape fast path

---
 pandas/_libs/arrays.pyx      | 82 +++++++++++++++++++-----------------
 pandas/core/algorithms.py    | 13 ++----
 pandas/core/arrays/masked.py | 17 +++++---
 3 files changed, 56 insertions(+), 56 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index fb428ddbc2c8e..2e0614fcc2eba 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -34,6 +34,7 @@ cdef extern from "pandas/vendored/nanoarrow.h":
 
     void ArrowBitmapInit(ArrowBitmap*)
     void ArrowBitmapReserve(ArrowBitmap*, int64_t)
+    void ArrowBitmapAppendUnsafe(ArrowBitmap*, uint8_t, int64_t)
     void ArrowBitmapAppendInt8Unsafe(ArrowBitmap*, const int8_t *, int64_t)
     void ArrowBitmapReset(ArrowBitmap*)
     void ArrowBitsUnpackInt8(const uint8_t*, int64_t, int64_t, int8_t*)
@@ -41,6 +42,7 @@ cdef extern from "pandas/vendored/nanoarrow.h":
     void ArrowBitSetTo(uint8_t*, int64_t, uint8_t)
     void ArrowBitsSetTo(uint8_t*, int64_t, int64_t, uint8_t)
     int64_t ArrowBitCountSet(const uint8_t*, int64_t, int64_t)
+    void ArrowBitmapReset(ArrowBitmap*)
 
 
 @cython.freelist(16)
@@ -474,51 +476,53 @@ cdef class BitMaskArray:
 
     @cython.wraparound(False)
     @cython.boundscheck(False)
-    cdef void c_take(
-        self,
-        const int64_t[:] indices,
-        uint8_t[:] out,
-        bint fill_value,
-        bint allow_fill
-    ):
-        # TODO: we should try and upstream this into nanoarrow with a better algo
+    cdef int ctake_1d(self, const int64_t[:] indices, ArrowBitmap* out_bitmap):
+        """returns -1 in case a negative index is encountered, 0 on success"""
+        cdef bint value
         cdef Py_ssize_t i
-        cdef uint8_t value
-        if not allow_fill:
-            for i in range(indices.shape[0]):
-                out[i] = ArrowBitGet(self.bitmap.buffer.data, indices[i])
-        else:
-            for i in range(indices.shape[0]):
-                value = ArrowBitGet(self.bitmap.buffer.data, indices[i])
-                if value == 1:
-                    out[i] = fill_value
-                else:
-                    out[i] = value
-
-    def take(
+        cdef int64_t index
+        cdef nindices = indices.shape[0]
+
+        for i in range(nindices):
+            index = indices[i]
+            if index < 0:
+                return -1
+
+            value = ArrowBitGet(self.bitmap.buffer.data, index)
+            ArrowBitmapAppendUnsafe(out_bitmap, value, 1)
+
+    def take_1d(
         self,
-        const int64_t[:] indices,
-            int axis=0,
-            bint fill_value=0,
-            bint allow_fill=0
-    ) -> np.ndarray:
+        indices,
+        const int axis=0,
+    ):
+        cdef Py_ssize_t nindices = len(indices)
         if axis != 0:
             raise NotImplementedError(
-                "BitMaskArray.take only implemented for axis=0"
+                "BitMaskArray.take_1d only implemented for axis=0"
+            )
+
+        if nindices <= 0:
+            raise NotImplementedError(
+                "take_1d does not support empty takes"
             )
 
-        # TODO: would be great to check this here, though most of these functions
-        # are by definition unsafe
-        # if indices.min() < 0:
-        #     raise NotImplementedError(
-        #         "BitMaskArray.take does not support negative index values"
-        #     )
-
-        # TODO: indices.shape gave wrong number of dimensions, expected 1 got 8
-        # len(indices) works the same as long as 1d assumption holds
-        result = np.empty(len(indices), dtype=bool)
-        self.c_take(indices, result, fill_value, allow_fill)
-        return result
+        cdef ArrowBitmap bitmap
+        cdef BitMaskArray bma = BitMaskArray.__new__(BitMaskArray)
+
+        # TODO: this leaks a bit into the internals of the nanoarrow bitmap
+        # We may want to upstream a BitmapCopy function instead
+        ArrowBitmapInit(&bitmap)
+        ArrowBitmapReserve(&bitmap, nindices)
+
+        if self.ctake_1d(indices, &bitmap) != 0:
+            ArrowBitmapReset(&bitmap)
+            raise ValueError("take_1d does not support negative indexing")
+
+        bma.bitmap = bitmap
+        bma.array_shape = indices.shape
+        bma.buffer_owner = True
+        return bma
 
     def copy(self):
         return BitMaskArray.copy_from_bitmaskarray(self)
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index b7c6cb2130c58..26f344330dc11 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -21,7 +21,6 @@
     iNaT,
     lib,
 )
-from pandas._libs.arrays import BitMaskArray
 from pandas._typing import (
     AnyArrayLike,
     ArrayLike,
@@ -1287,9 +1286,7 @@ def take(
     ...      fill_value=-10)
     array([ 10,  10, -10])
     """
-    if not isinstance(
-        arr, (np.ndarray, ABCExtensionArray, ABCIndex, ABCSeries, BitMaskArray)
-    ):
+    if not isinstance(arr, (np.ndarray, ABCExtensionArray, ABCIndex, ABCSeries)):
         # GH#52981
         warnings.warn(
             "pd.api.extensions.take accepting non-standard inputs is deprecated "
@@ -1299,15 +1296,11 @@ def take(
             stacklevel=find_stack_level(),
         )
 
-    if not isinstance(arr, BitMaskArray) and not is_array_like(arr):
+    if not is_array_like(arr):
         arr = np.asarray(arr)
 
     indices = ensure_platform_int(indices)
 
-    # BitMaskArray does not support negative indexing
-    if isinstance(arr, BitMaskArray) and indices.size > 0 and indices.min() < 0:
-        arr = arr.to_numpy()
-
     if allow_fill:
         # Pandas style, -1 means NA
         validate_indices(indices, arr.shape[axis])
@@ -1315,7 +1308,7 @@ def take(
             arr, indices, axis=axis, allow_fill=True, fill_value=fill_value
         )
     else:
-        # NumPy / BitMaskArray style
+        # NumPy style
         result = arr.take(indices, axis=axis)
     return result
 
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index d5a5b481bb3c7..c5abc85754f14 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -937,13 +937,16 @@ def take(
             axis=axis,
         )
 
-        mask = take(
-            self._mask,
-            indexer,
-            fill_value=True,
-            allow_fill=allow_fill,
-            axis=axis,
-        )
+        try:
+            mask = self._mask.take_1d(indexer)
+        except (ValueError, NotImplementedError):
+            mask = take(
+                self._mask.to_numpy(),
+                indexer,
+                fill_value=True,
+                allow_fill=allow_fill,
+                axis=axis,
+            )
 
         # if we are filling
         # we only fill where the indexer is null

From 44aae253c84de1901db951a4a7353f4e9a90de63 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 21 Aug 2023 14:43:44 -0400
Subject: [PATCH 064/126] fix is_null_slice

---
 pandas/_libs/lib.pyx | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 228ad078c6927..6fed8e1339895 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -8,7 +8,6 @@ from typing import (
 )
 
 cimport cython
-from cpython cimport PyErr_Clear
 from cpython.datetime cimport (
     PyDate_Check,
     PyDateTime_Check,
@@ -1243,11 +1242,13 @@ def is_null_slice(obj):
     """
     cdef Py_ssize_t start, stop, step
     if isinstance(obj, slice):
-        if PySlice_Unpack(obj, &start, &stop, &step) == 0:
-            if start == 0 and stop == PY_SSIZE_T_MAX and step == 1:
-                return True
-        else:
-            PyErr_Clear()
+        try:
+            PySlice_Unpack(obj, &start, &stop, &step)
+        except TypeError:
+            return False
+
+        if start == 0 and stop == PY_SSIZE_T_MAX and step == 1:
+            return True
 
     return False
 

From 8b72d09407a24aceaa51be18767995c3a5467629 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 21 Aug 2023 14:45:51 -0400
Subject: [PATCH 065/126] fix indexer perf boost

---
 pandas/core/arrays/masked.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index c5abc85754f14..2b6ee50875184 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -939,7 +939,7 @@ def take(
 
         try:
             mask = self._mask.take_1d(indexer)
-        except (ValueError, NotImplementedError):
+        except (TypeError, ValueError, NotImplementedError):
             mask = take(
                 self._mask.to_numpy(),
                 indexer,

From 45d1cf06ac24b144be62f14eb3aec847b91d8a26 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 21 Aug 2023 15:03:21 -0400
Subject: [PATCH 066/126] less to_numpy()

---
 pandas/_libs/arrays.pyx      |  4 ++++
 pandas/core/arrays/masked.py | 22 +++++++++++-----------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 2e0614fcc2eba..ef1a2d7bbd9bf 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -451,6 +451,10 @@ cdef class BitMaskArray:
         object_state = (self.to_numpy(), self.parent)
         return (_unpickle_bitmaskarray, object_state, self.parent)
 
+    @property
+    def size(self) -> int:
+        return self.bitmap.size_bits
+
     @property
     def nbytes(self) -> int:
         return self.bitmap.buffer.size_bytes
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 2b6ee50875184..e462a929cedae 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -420,16 +420,16 @@ def round(self, decimals: int = 0, *args, **kwargs):
     # Unary Methods
 
     def __invert__(self) -> Self:
-        return self._simple_new(~self._data, self._mask.to_numpy())
+        return self._simple_new(~self._data, self._mask.copy())
 
     def __neg__(self) -> Self:
-        return self._simple_new(-self._data, self._mask.to_numpy())
+        return self._simple_new(-self._data, self._mask.copy())
 
     def __pos__(self) -> Self:
         return self.copy()
 
     def __abs__(self) -> Self:
-        return self._simple_new(abs(self._data), self._mask.to_numpy())
+        return self._simple_new(abs(self._data), self._mask.copy())
 
     # ------------------------------------------------------------------
 
@@ -1236,7 +1236,7 @@ def sum(
 
         result = masked_reductions.sum(
             self._data,
-            self._mask.to_numpy(),
+            self._mask,
             skipna=skipna,
             min_count=min_count,
             axis=axis,
@@ -1257,7 +1257,7 @@ def prod(
 
         result = masked_reductions.prod(
             self._data,
-            self._mask.to_numpy(),
+            self._mask,
             skipna=skipna,
             min_count=min_count,
             axis=axis,
@@ -1270,7 +1270,7 @@ def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         nv.validate_mean((), kwargs)
         result = masked_reductions.mean(
             self._data,
-            self._mask.to_numpy(),
+            self._mask,
             skipna=skipna,
             axis=axis,
         )
@@ -1282,7 +1282,7 @@ def var(
         nv.validate_stat_ddof_func((), kwargs, fname="var")
         result = masked_reductions.var(
             self._data,
-            self._mask.to_numpy(),
+            self._mask,
             skipna=skipna,
             axis=axis,
             ddof=ddof,
@@ -1295,7 +1295,7 @@ def std(
         nv.validate_stat_ddof_func((), kwargs, fname="std")
         result = masked_reductions.std(
             self._data,
-            self._mask.to_numpy(),
+            self._mask,
             skipna=skipna,
             axis=axis,
             ddof=ddof,
@@ -1306,7 +1306,7 @@ def min(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         nv.validate_min((), kwargs)
         result = masked_reductions.min(
             self._data,
-            self._mask.to_numpy(),
+            self._mask,
             skipna=skipna,
             axis=axis,
         )
@@ -1316,7 +1316,7 @@ def max(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         nv.validate_max((), kwargs)
         result = masked_reductions.max(
             self._data,
-            self._mask.to_numpy(),
+            self._mask,
             skipna=skipna,
             axis=axis,
         )
@@ -1518,7 +1518,7 @@ def _accumulate(
         self, name: str, *, skipna: bool = True, **kwargs
     ) -> BaseMaskedArray:
         data = self._data
-        mask = self._mask.to_numpy()
+        mask = self._mask
 
         op = getattr(masked_accumulations, name)
         data, mask = op(data, mask, skipna=skipna, **kwargs)

From 68b7191a3a57df6376797de63aada4007dac8b6e Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 21 Aug 2023 16:12:23 -0400
Subject: [PATCH 067/126] make bitmaskarray iterable

---
 pandas/_libs/arrays.pyi      |  1 +
 pandas/_libs/arrays.pyx      |  8 ++++++++
 pandas/core/arrays/masked.py |  2 +-
 pandas/core/ops/mask_ops.py  | 13 +++++--------
 4 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi
index 9c53fa93f473d..f6e342aaeadb4 100644
--- a/pandas/_libs/arrays.pyi
+++ b/pandas/_libs/arrays.pyi
@@ -56,6 +56,7 @@ class BitMaskArray:
     def __or__(self, other: np.ndarray | Self) -> np.ndarray: ...
     def __xor__(self, other: np.ndarray | Self) -> np.ndarray: ...
     def __reduce__(self) -> Tuple[Callable[[np.ndarray], Self], Tuple[np.ndarray]]: ...
+    def __iter__(self): ...
     @property
     def nbytes(self) -> int: ...
     def shape(self) -> tuple[int, ...]: ...
diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index ef1a2d7bbd9bf..d87f6c52d79c2 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -451,6 +451,14 @@ cdef class BitMaskArray:
         object_state = (self.to_numpy(), self.parent)
         return (_unpickle_bitmaskarray, object_state, self.parent)
 
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def __iter__(self):
+        cdef Py_ssize_t i
+        cdef BitMaskArray self_ = self  # self_ required for Cython < 3
+        for i in range(self_.bitmap.size_bits):
+            yield bool(ArrowBitGet(self_.bitmap.buffer.data, i))
+
     @property
     def size(self) -> int:
         return self.bitmap.size_bits
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index e462a929cedae..6deee8924fc1f 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -341,7 +341,7 @@ def __iter__(self) -> Iterator:
                     yield val
             else:
                 na_value = self.dtype.na_value
-                for isna_, val in zip(self._mask.to_numpy(), self._data):
+                for isna_, val in zip(self._mask, self._data):
                     if isna_:
                         yield na_value
                     else:
diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py
index a6dbb1db28af2..a66822f50217e 100644
--- a/pandas/core/ops/mask_ops.py
+++ b/pandas/core/ops/mask_ops.py
@@ -175,21 +175,18 @@ def kleene_and(
         result = left & right
 
     if right_mask is None:
-        if isinstance(left_mask, BitMaskArray):
-            left_mask = left_mask.to_numpy()
-
         # Scalar `right`
         if right is libmissing.NA:
-            mask = (left & ~left_mask) | left_mask
+            if left_mask.any():
+                mask = (left & ~left_mask) | left_mask
         else:
-            if not isinstance(left_mask, BitMaskArray):  # already a copy
-                mask = left_mask.copy()
+            mask = left_mask.copy()
             if right is False:
                 # unmask everything
                 mask[:] = False
     else:
-        # TODO: Cython 3 changed support for radd / ror methods and may
-        # not be working? For now convert to NumPy
+        # Since we must compare to left / right it helps perf to convert
+        # to numpy up front, rather than deferring multiple times
         if isinstance(left_mask, BitMaskArray):
             left_mask = left_mask.to_numpy()
         if isinstance(right_mask, BitMaskArray):

From 685f48169c2d73aca5423a1a03ed0813913181a1 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 21 Aug 2023 16:14:14 -0400
Subject: [PATCH 068/126] typing cleanups

---
 pandas/_libs/arrays.pyi | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi
index f6e342aaeadb4..a6eef23b0f830 100644
--- a/pandas/_libs/arrays.pyi
+++ b/pandas/_libs/arrays.pyi
@@ -57,12 +57,13 @@ class BitMaskArray:
     def __xor__(self, other: np.ndarray | Self) -> np.ndarray: ...
     def __reduce__(self) -> Tuple[Callable[[np.ndarray], Self], Tuple[np.ndarray]]: ...
     def __iter__(self): ...
+    def size(self) -> int: ...
     @property
     def nbytes(self) -> int: ...
     def shape(self) -> tuple[int, ...]: ...
     def dtype(self) -> type_t[bool]: ...
     def any(self) -> bool: ...
     def sum(self) -> int: ...
-    def take(self, indices: np.ndarray, axis: int, fill_value: bool) -> np.ndarray: ...
+    def take_1d(self, indices: np.ndarray, axis: int) -> Self: ...
     def copy(self) -> Self: ...
     def to_numpy(self) -> np.ndarray: ...

From 82826e90b74d5f71584adb43795a98e9bf1c189d Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 21 Aug 2023 17:09:41 -0400
Subject: [PATCH 069/126] boolean fixes

---
 pandas/core/arrays/masked.py | 4 ++--
 pandas/core/ops/mask_ops.py  | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 6deee8924fc1f..9020c85f00327 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -1391,7 +1391,7 @@ def any(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         is_all_na = self._mask.all()
         is_any_na = self._mask.any()
         if len(self) == 0 or (skipna and is_all_na):
-            return False
+            return np.bool_(False)
 
         if is_any_na:
             # fallback to numpy - will be slower
@@ -1487,7 +1487,7 @@ def all(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         is_all_na = self._mask.all()
         is_any_na = self._mask.any()
         if len(self) == 0 or (skipna and is_all_na):
-            return True
+            return np.bool_(True)
 
         if is_any_na:
             values = self._data.copy()
diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py
index a66822f50217e..1f37b568f1ec5 100644
--- a/pandas/core/ops/mask_ops.py
+++ b/pandas/core/ops/mask_ops.py
@@ -179,6 +179,8 @@ def kleene_and(
         if right is libmissing.NA:
             if left_mask.any():
                 mask = (left & ~left_mask) | left_mask
+            else:
+                mask = left
         else:
             mask = left_mask.copy()
             if right is False:

From 78e42454199d15ab171586bd5781eb11f6c7088d Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 21 Aug 2023 21:27:54 -0400
Subject: [PATCH 070/126] perf in take

---
 pandas/_libs/arrays.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index d87f6c52d79c2..4794d0ee92f06 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -493,7 +493,7 @@ cdef class BitMaskArray:
         cdef bint value
         cdef Py_ssize_t i
         cdef int64_t index
-        cdef nindices = indices.shape[0]
+        cdef Py_ssize_t nindices = indices.shape[0]
 
         for i in range(nindices):
             index = indices[i]

From 69c51c22cbcbc929ada6e5e07eaa73fd4fa398e5 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 22 Aug 2023 16:49:11 -0400
Subject: [PATCH 071/126] fixed typing

---
 pandas/_libs/arrays.pyi                      |  4 ++++
 pandas/_libs/lib.pyi                         |  1 +
 pandas/core/array_algos/masked_reductions.py | 19 ++++++++++---------
 pandas/core/arrays/masked.py                 |  2 +-
 pandas/core/nanops.py                        | 11 +++++++++--
 pandas/core/ops/mask_ops.py                  |  5 +++--
 6 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi
index a6eef23b0f830..b3ed14cb1c30c 100644
--- a/pandas/_libs/arrays.pyi
+++ b/pandas/_libs/arrays.pyi
@@ -57,12 +57,16 @@ class BitMaskArray:
     def __xor__(self, other: np.ndarray | Self) -> np.ndarray: ...
     def __reduce__(self) -> Tuple[Callable[[np.ndarray], Self], Tuple[np.ndarray]]: ...
     def __iter__(self): ...
+    @property
     def size(self) -> int: ...
     @property
     def nbytes(self) -> int: ...
+    @property
     def shape(self) -> tuple[int, ...]: ...
+    @property
     def dtype(self) -> type_t[bool]: ...
     def any(self) -> bool: ...
+    def all(self) -> bool: ...
     def sum(self) -> int: ...
     def take_1d(self, indices: np.ndarray, axis: int) -> Self: ...
     def copy(self) -> Self: ...
diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
index 32641319a6b96..4427a7ce734c8 100644
--- a/pandas/_libs/lib.pyi
+++ b/pandas/_libs/lib.pyi
@@ -44,6 +44,7 @@ def is_iterator(obj: object) -> bool: ...
 def is_scalar(val: object) -> bool: ...
 def is_list_like(obj: object, allow_sets: bool = ...) -> bool: ...
 def is_pyarrow_array(obj: object) -> bool: ...
+def is_null_slice(obj: object) -> bool: ...
 def is_period(val: object) -> TypeGuard[Period]: ...
 def is_interval(val: object) -> TypeGuard[Interval]: ...
 def is_decimal(val: object) -> TypeGuard[Decimal]: ...
diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py
index 335fa1afc0f4e..6d4fbcc3c34e5 100644
--- a/pandas/core/array_algos/masked_reductions.py
+++ b/pandas/core/array_algos/masked_reductions.py
@@ -17,6 +17,7 @@
 from pandas.core.nanops import check_below_min_count
 
 if TYPE_CHECKING:
+    from pandas._libs.arrays import BitMaskArray
     from pandas._typing import (
         AxisInt,
         npt,
@@ -26,7 +27,7 @@
 def _reductions(
     func: Callable,
     values: np.ndarray,
-    mask: npt.NDArray[np.bool_],
+    mask: npt.NDArray[np.bool_] | BitMaskArray,
     *,
     skipna: bool = True,
     min_count: int = 0,
@@ -67,7 +68,7 @@ def _reductions(
 
 def sum(
     values: np.ndarray,
-    mask: npt.NDArray[np.bool_],
+    mask: npt.NDArray[np.bool_] | BitMaskArray,
     *,
     skipna: bool = True,
     min_count: int = 0,
@@ -80,7 +81,7 @@ def sum(
 
 def prod(
     values: np.ndarray,
-    mask: npt.NDArray[np.bool_],
+    mask: npt.NDArray[np.bool_] | BitMaskArray,
     *,
     skipna: bool = True,
     min_count: int = 0,
@@ -94,7 +95,7 @@ def prod(
 def _minmax(
     func: Callable,
     values: np.ndarray,
-    mask: npt.NDArray[np.bool_],
+    mask: npt.NDArray[np.bool_] | BitMaskArray,
     *,
     skipna: bool = True,
     axis: AxisInt | None = None,
@@ -131,7 +132,7 @@ def _minmax(
 
 def min(
     values: np.ndarray,
-    mask: npt.NDArray[np.bool_],
+    mask: npt.NDArray[np.bool_] | BitMaskArray,
     *,
     skipna: bool = True,
     axis: AxisInt | None = None,
@@ -141,7 +142,7 @@ def min(
 
 def max(
     values: np.ndarray,
-    mask: npt.NDArray[np.bool_],
+    mask: npt.NDArray[np.bool_] | BitMaskArray,
     *,
     skipna: bool = True,
     axis: AxisInt | None = None,
@@ -151,7 +152,7 @@ def max(
 
 def mean(
     values: np.ndarray,
-    mask: npt.NDArray[np.bool_],
+    mask: npt.NDArray[np.bool_] | BitMaskArray,
     *,
     skipna: bool = True,
     axis: AxisInt | None = None,
@@ -163,7 +164,7 @@ def mean(
 
 def var(
     values: np.ndarray,
-    mask: npt.NDArray[np.bool_],
+    mask: npt.NDArray[np.bool_] | BitMaskArray,
     *,
     skipna: bool = True,
     axis: AxisInt | None = None,
@@ -181,7 +182,7 @@ def var(
 
 def std(
     values: np.ndarray,
-    mask: npt.NDArray[np.bool_],
+    mask: npt.NDArray[np.bool_] | BitMaskArray,
     *,
     skipna: bool = True,
     axis: AxisInt | None = None,
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 9020c85f00327..766c9eb7ddffb 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -938,7 +938,7 @@ def take(
         )
 
         try:
-            mask = self._mask.take_1d(indexer)
+            mask = self._mask.take_1d(indexer, axis=axis)
         except (TypeError, ValueError, NotImplementedError):
             mask = take(
                 self._mask.to_numpy(),
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index e60c42a20a9af..babbb757c8a61 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -3,6 +3,7 @@
 import functools
 import itertools
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     cast,
@@ -49,6 +50,10 @@
     notna,
 )
 
+if TYPE_CHECKING:
+    from pandas._libs.arrays import BitMaskArray
+
+
 bn = import_optional_dependency("bottleneck", errors="warn")
 _BOTTLENECK_INSTALLED = bn is not None
 _USE_BOTTLENECK = False
@@ -1537,7 +1542,9 @@ def _maybe_null_out(
 
 
 def check_below_min_count(
-    shape: tuple[int, ...], mask: npt.NDArray[np.bool_] | None, min_count: int
+    shape: tuple[int, ...],
+    mask: npt.NDArray[np.bool_] | BitMaskArray | None,
+    min_count: int,
 ) -> bool:
     """
     Check for the `min_count` keyword. Returns True if below `min_count` (when
@@ -1561,7 +1568,7 @@ def check_below_min_count(
             # no missing values, only check size
             non_nulls = np.prod(shape)
         else:
-            non_nulls = mask.size - mask.sum()
+            non_nulls = mask.size - mask.sum()  # type: ignore[assignment]
         if non_nulls < min_count:
             return True
     return False
diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py
index 1f37b568f1ec5..8136354659d6b 100644
--- a/pandas/core/ops/mask_ops.py
+++ b/pandas/core/ops/mask_ops.py
@@ -70,7 +70,8 @@ def kleene_or(
         if right is True:
             mask = np.zeros(left_mask.shape, left_mask.dtype)
         else:
-            left_mask = left_mask.to_numpy()
+            if isinstance(left_mask, BitMaskArray):
+                left_mask = left_mask.to_numpy()
             if right is libmissing.NA:
                 mask = (~left & ~left_mask) | left_mask
             else:
@@ -178,7 +179,7 @@ def kleene_and(
         # Scalar `right`
         if right is libmissing.NA:
             if left_mask.any():
-                mask = (left & ~left_mask) | left_mask
+                mask = (left & ~left_mask) | left_mask  # type: ignore[operator]
             else:
                 mask = left
         else:

From 404268f5b24fb194f0c60c38bb3349db68cc3aa2 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 22 Aug 2023 17:17:23 -0400
Subject: [PATCH 072/126] rework pickling

---
 pandas/_libs/arrays.pyi |  9 ++----
 pandas/_libs/arrays.pyx | 71 +++++++++++++++++++++++++++++------------
 2 files changed, 54 insertions(+), 26 deletions(-)

diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi
index b3ed14cb1c30c..23f32cf5b3a15 100644
--- a/pandas/_libs/arrays.pyi
+++ b/pandas/_libs/arrays.pyi
@@ -1,8 +1,4 @@
-from typing import (
-    Callable,
-    Sequence,
-    Tuple,
-)
+from typing import Sequence
 
 import numpy as np
 
@@ -55,7 +51,8 @@ class BitMaskArray:
     def __and__(self, other: np.ndarray | Self) -> np.ndarray: ...
     def __or__(self, other: np.ndarray | Self) -> np.ndarray: ...
     def __xor__(self, other: np.ndarray | Self) -> np.ndarray: ...
-    def __reduce__(self) -> Tuple[Callable[[np.ndarray], Self], Tuple[np.ndarray]]: ...
+    def __getstate__(self) -> dict: ...
+    def __setstate__(self, other: dict) -> None: ...
     def __iter__(self): ...
     @property
     def size(self) -> int: ...
diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 4794d0ee92f06..d310c0d2f91f5 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -224,11 +224,6 @@ cdef class NDArrayBacked:
         return to_concat[0]._from_backing_data(new_arr)
 
 
-def _unpickle_bitmaskarray(array, parent):
-    bma = BitMaskArray(array, parent)
-    return bma
-
-
 cdef class BitMaskArray:
     cdef:
         ArrowBitmap bitmap
@@ -257,23 +252,15 @@ cdef class BitMaskArray:
     def __cinit__(self):
         self.parent = False
 
-    def __init__(self, data, parent=None):
-        # parent is only required to reconstruct ref-counting from pickle
-        # but should not be called from user code
+    def __init__(self, data):
         if isinstance(data, np.ndarray):
             self.init_from_ndarray(data.ravel())
             self.array_shape = data.shape
-            if parent:
-                self.parent = parent
-            else:
-                self.parent = None
+            self.parent = None
         elif isinstance(data, type(self)):
             self.init_from_bitmaskarray(data)
             self.array_shape = data.array_shape
-            if parent:
-                self.parent = parent
-            else:
-                self.parent = data
+            self.parent = data
         else:
             raise TypeError("Unsupported argument to BitMaskArray constructor")
 
@@ -294,7 +281,7 @@ cdef class BitMaskArray:
         # TODO: this leaks a bit into the internals of the nanoarrow bitmap
         # We may want to upstream a BitmapCopy function instead
         ArrowBitmapInit(&bitmap)
-        buf = <uint8_t*>malloc(old_bma.bitmap.size_bits)
+        buf = <uint8_t*>malloc(old_bma.bitmap.size_bytes)
         memcpy(buf, old_bma.bitmap.buffer.data, old_bma.bitmap.buffer.size_bytes)
         bitmap.buffer.size_bytes = old_bma.bitmap.buffer.size_bytes
         bitmap.size_bits = old_bma.bitmap.size_bits
@@ -447,9 +434,53 @@ cdef class BitMaskArray:
 
         return self.to_numpy() ^ other
 
-    def __reduce__(self):
-        object_state = (self.to_numpy(), self.parent)
-        return (_unpickle_bitmaskarray, object_state, self.parent)
+    def __getstate__(self):
+        cdef BitMaskArray self_ = self
+        state = {
+            "parent": self.parent,
+            "array_shape": self.array_shape,
+            "buffer_owner": self_.buffer_owner,
+            # Private ArrowBitmap attributes below
+            "bitmap.buffer.size_bytes": self_.bitmap.buffer.size_bytes,
+            "bitmap.size_bits": self_.bitmap.size_bits
+        }
+
+        # Only parents own data
+        if self_.buffer_owner:
+            bitmap_data = bytearray(self_.bitmap.buffer.size_bytes)
+            for i in range(self_.bitmap.buffer.size_bytes):
+                bitmap_data[i] = self_.bitmap.buffer.data[i]
+
+            state["bitmap_data"] = bitmap_data
+
+        return state
+
+    def __setstate__(self, state):
+        cdef ArrowBitmap bitmap
+        cdef BitMaskArray self_ = self, other
+        self.parent = state["parent"]
+        self.array_shape = state["array_shape"]
+        self_.buffer_owner = state["buffer_owner"]
+
+        nbytes = state["bitmap.buffer.size_bytes"]
+        nbits = state["bitmap.size_bits"]
+        if not self_.buffer_owner:
+            other = self.parent
+            self_.bitmap = other.bitmap
+            self_.bitmap.size_bits = nbits
+            self_.bitmap.buffer.size_bytes = nbytes
+        else:
+            ArrowBitmapInit(&bitmap)
+
+            buf = <uint8_t*>malloc(nbytes)
+            data = state["bitmap_data"]
+            for i in range(nbytes):
+                buf[i] = data[i]
+
+            bitmap.buffer.data = buf
+            bitmap.buffer.size_bytes = nbytes
+            bitmap.size_bits = nbits
+            self_.bitmap = bitmap
 
     @cython.boundscheck(False)
     @cython.wraparound(False)

From 28dd82d1f787b4af865611f8a894b19f154c2be1 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 22 Aug 2023 17:43:53 -0400
Subject: [PATCH 073/126] fixed attribute lookup

---
 pandas/_libs/arrays.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index d310c0d2f91f5..6a8740fb833ff 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -281,7 +281,7 @@ cdef class BitMaskArray:
         # TODO: this leaks a bit into the internals of the nanoarrow bitmap
         # We may want to upstream a BitmapCopy function instead
         ArrowBitmapInit(&bitmap)
-        buf = <uint8_t*>malloc(old_bma.bitmap.size_bytes)
+        buf = <uint8_t*>malloc(old_bma.bitmap.buffer.size_bytes)
         memcpy(buf, old_bma.bitmap.buffer.data, old_bma.bitmap.buffer.size_bytes)
         bitmap.buffer.size_bytes = old_bma.bitmap.buffer.size_bytes
         bitmap.size_bits = old_bma.bitmap.size_bits

From f0bc4a2bcbb177a440ea97e376f9c876fab9fd68 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 22 Aug 2023 19:57:32 -0400
Subject: [PATCH 074/126] More efficient invert

---
 pandas/_libs/arrays.pyx | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 6a8740fb833ff..83846c5f73827 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -356,8 +356,19 @@ cdef class BitMaskArray:
         return self.to_numpy()[key]
 
     def __invert__(self):
-        # TODO: could invert the buffer first then go to numpy
-        return ~self.to_numpy()
+        # TODO: should we return a mask here instead of a NumPy array?
+        cdef Py_ssize_t i
+        cdef BitMaskArray self_ = self
+        cdef ndarray[uint8_t] result = np.empty(self_.bitmap.size_bits, dtype=bool)
+
+        cdef uint8_t* buf = <uint8_t*>malloc(self_.bitmap.buffer.size_bytes)
+        for i in range(self_.bitmap.buffer.size_bytes):
+            buf[i] = ~self_.bitmap.buffer.data[i]
+
+        BitMaskArray.buffer_to_array_1d(result, buf, self_.bitmap.size_bits)
+        free(buf)
+
+        return result
 
     def __and__(self, other):
         cdef ndarray[uint8_t] result

From b6ae9bbc2791feade1942fec49f667f63ad03211 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 22 Aug 2023 22:23:17 -0400
Subject: [PATCH 075/126] doc fix

---
 pandas/core/arrays/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 3827b5b5d40b2..9f715c9629ee0 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -594,7 +594,7 @@ def nbytes(self) -> int:
         Examples
         --------
         >>> pd.array([1, 2, 3]).nbytes
-        27
+        25
         """
         # If this is expensive to compute, return an approximate lower bound
         # on the number of bytes needed.

From e1825aedd7fee0ffc3750afc37b9fc144158cef0 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 22 Aug 2023 23:22:31 -0400
Subject: [PATCH 076/126] Have invert return BitMaskArray

---
 pandas/_libs/arrays.pyi                      |  2 +-
 pandas/_libs/arrays.pyx                      | 22 +++++++++-----
 pandas/core/array_algos/masked_reductions.py | 32 ++++++++++----------
 pandas/core/arrays/masked.py                 | 12 ++++----
 4 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi
index 23f32cf5b3a15..73ca5ea86d324 100644
--- a/pandas/_libs/arrays.pyi
+++ b/pandas/_libs/arrays.pyi
@@ -47,7 +47,7 @@ class BitMaskArray:
     def __init__(self, data: np.ndarray | Self) -> None: ...
     def __setitem__(self, key: PositionalIndexer, value: ArrayLike | bool) -> None: ...
     def __getitem__(self, key: PositionalIndexer) -> bool: ...
-    def __invert__(self) -> np.ndarray: ...
+    def __invert__(self) -> Self: ...
     def __and__(self, other: np.ndarray | Self) -> np.ndarray: ...
     def __or__(self, other: np.ndarray | Self) -> np.ndarray: ...
     def __xor__(self, other: np.ndarray | Self) -> np.ndarray: ...
diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 83846c5f73827..eda01241b0399 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -356,19 +356,27 @@ cdef class BitMaskArray:
         return self.to_numpy()[key]
 
     def __invert__(self):
-        # TODO: should we return a mask here instead of a NumPy array?
         cdef Py_ssize_t i
         cdef BitMaskArray self_ = self
-        cdef ndarray[uint8_t] result = np.empty(self_.bitmap.size_bits, dtype=bool)
+        cdef BitMaskArray bma = BitMaskArray.__new__(BitMaskArray)
+        cdef ArrowBitmap bitmap
+
+        ArrowBitmapInit(&bitmap)
+        ArrowBitmapReserve(&bitmap, self_.bitmap.size_bits)
 
-        cdef uint8_t* buf = <uint8_t*>malloc(self_.bitmap.buffer.size_bytes)
         for i in range(self_.bitmap.buffer.size_bytes):
-            buf[i] = ~self_.bitmap.buffer.data[i]
+            bitmap.buffer.data[i] = ~self_.bitmap.buffer.data[i]
 
-        BitMaskArray.buffer_to_array_1d(result, buf, self_.bitmap.size_bits)
-        free(buf)
+        # TODO: avoid nanoarrow internals
+        bitmap.size_bits = self_.bitmap.size_bits
+        bitmap.buffer.size_bytes = self_.bitmap.buffer.size_bytes
 
-        return result
+        bma.bitmap = bitmap
+        bma.array_shape = self.array_shape
+        bma.buffer_owner = True
+        bma.parent = None
+
+        return bma
 
     def __and__(self, other):
         cdef ndarray[uint8_t] result
diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py
index 6d4fbcc3c34e5..f426e57da4380 100644
--- a/pandas/core/array_algos/masked_reductions.py
+++ b/pandas/core/array_algos/masked_reductions.py
@@ -13,21 +13,18 @@
 import numpy as np
 
 from pandas._libs import missing as libmissing
+from pandas._libs.arrays import BitMaskArray
 
 from pandas.core.nanops import check_below_min_count
 
 if TYPE_CHECKING:
-    from pandas._libs.arrays import BitMaskArray
-    from pandas._typing import (
-        AxisInt,
-        npt,
-    )
+    from pandas._typing import AxisInt
 
 
 def _reductions(
     func: Callable,
     values: np.ndarray,
-    mask: npt.NDArray[np.bool_] | BitMaskArray,
+    mask: BitMaskArray,
     *,
     skipna: bool = True,
     min_count: int = 0,
@@ -63,12 +60,12 @@ def _reductions(
         ):
             return libmissing.NA
 
-        return func(values, where=~mask, axis=axis, **kwargs)
+        return func(values, where=~mask.to_numpy(), axis=axis, **kwargs)
 
 
 def sum(
     values: np.ndarray,
-    mask: npt.NDArray[np.bool_] | BitMaskArray,
+    mask: BitMaskArray,
     *,
     skipna: bool = True,
     min_count: int = 0,
@@ -81,7 +78,7 @@ def sum(
 
 def prod(
     values: np.ndarray,
-    mask: npt.NDArray[np.bool_] | BitMaskArray,
+    mask: BitMaskArray,
     *,
     skipna: bool = True,
     min_count: int = 0,
@@ -95,7 +92,7 @@ def prod(
 def _minmax(
     func: Callable,
     values: np.ndarray,
-    mask: npt.NDArray[np.bool_] | BitMaskArray,
+    mask: np.ndarray | BitMaskArray,
     *,
     skipna: bool = True,
     axis: AxisInt | None = None,
@@ -122,7 +119,10 @@ def _minmax(
         else:
             return func(values, axis=axis)
     else:
-        subset = values[~mask]
+        if isinstance(mask, BitMaskArray):
+            subset = values[(~mask).to_numpy()]
+        else:
+            subset = values[~mask]
         if subset.size:
             return func(subset, axis=axis)
         else:
@@ -132,7 +132,7 @@ def _minmax(
 
 def min(
     values: np.ndarray,
-    mask: npt.NDArray[np.bool_] | BitMaskArray,
+    mask: np.ndarray | BitMaskArray,
     *,
     skipna: bool = True,
     axis: AxisInt | None = None,
@@ -142,7 +142,7 @@ def min(
 
 def max(
     values: np.ndarray,
-    mask: npt.NDArray[np.bool_] | BitMaskArray,
+    mask: np.ndarray | BitMaskArray,
     *,
     skipna: bool = True,
     axis: AxisInt | None = None,
@@ -152,7 +152,7 @@ def max(
 
 def mean(
     values: np.ndarray,
-    mask: npt.NDArray[np.bool_] | BitMaskArray,
+    mask: BitMaskArray,
     *,
     skipna: bool = True,
     axis: AxisInt | None = None,
@@ -164,7 +164,7 @@ def mean(
 
 def var(
     values: np.ndarray,
-    mask: npt.NDArray[np.bool_] | BitMaskArray,
+    mask: BitMaskArray,
     *,
     skipna: bool = True,
     axis: AxisInt | None = None,
@@ -182,7 +182,7 @@ def var(
 
 def std(
     values: np.ndarray,
-    mask: npt.NDArray[np.bool_] | BitMaskArray,
+    mask: BitMaskArray,
     *,
     skipna: bool = True,
     axis: AxisInt | None = None,
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 766c9eb7ddffb..363c8addea6aa 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -788,21 +788,21 @@ def _arith_method(self, other, op):
 
         if op_name == "pow":
             # 1 ** x is 1.
-            mask = np.where((self._data == 1) & ~self._mask, False, mask)
+            mask = np.where((self._data == 1) & (~self._mask).to_numpy(), False, mask)
             # x ** 0 is 1.
             if omask is not None:
-                mask = np.where((other == 0) & ~omask, False, mask)
+                mask = np.where((other == 0) & (~omask).to_numpy(), False, mask)
             elif other is not libmissing.NA:
                 mask = np.where(other == 0, False, mask)
 
         elif op_name == "rpow":
             # 1 ** x is 1.
             if omask is not None:
-                mask = np.where((other == 1) & ~omask, False, mask)
+                mask = np.where((other == 1) & (~omask).to_numpy(), False, mask)
             elif other is not libmissing.NA:
                 mask = np.where(other == 1, False, mask)
             # x ** 0 is 1.
-            mask = np.where((self._data == 0) & ~self._mask, False, mask)
+            mask = np.where((self._data == 0) & (~self._mask).to_numpy(), False, mask)
 
         return self._maybe_mask_result(result, mask)
 
@@ -1113,8 +1113,8 @@ def equals(self, other) -> bool:
         if not np.array_equal(self._mask.to_numpy(), other._mask.to_numpy()):
             return False
 
-        left = self._data[~self._mask]
-        right = other._data[~other._mask]
+        left = self._data[(~self._mask).to_numpy()]
+        right = other._data[(~other._mask).to_numpy()]
         return array_equivalent(left, right, strict_nan=True, dtype_equal=True)
 
     def _quantile(

From 5211e2e7cc828a3550313b7001b6913cc9589f5e Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 23 Aug 2023 10:30:04 -0400
Subject: [PATCH 077/126] Implemented Bitmask Concatenate

---
 pandas/_libs/arrays.pyi                       |  2 +
 pandas/_libs/arrays.pyx                       | 63 +++++++++++++++++++
 .../_libs/include/pandas/bitmask_algorithms.h | 16 +++++
 pandas/_libs/meson.build                      |  2 +-
 pandas/_libs/src/bitmask_algorithms.c         | 43 +++++++++++++
 pandas/core/arrays/masked.py                  |  5 +-
 6 files changed, 129 insertions(+), 2 deletions(-)
 create mode 100644 pandas/_libs/include/pandas/bitmask_algorithms.h
 create mode 100644 pandas/_libs/src/bitmask_algorithms.c

diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi
index 73ca5ea86d324..f3414659b53ec 100644
--- a/pandas/_libs/arrays.pyi
+++ b/pandas/_libs/arrays.pyi
@@ -54,6 +54,8 @@ class BitMaskArray:
     def __getstate__(self) -> dict: ...
     def __setstate__(self, other: dict) -> None: ...
     def __iter__(self): ...
+    @staticmethod
+    def concatenate(objs: list[Self], axis: int) -> Self: ...
     @property
     def size(self) -> int: ...
     @property
diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index eda01241b0399..d4bf9bd8687aa 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -44,6 +44,9 @@ cdef extern from "pandas/vendored/nanoarrow.h":
     int64_t ArrowBitCountSet(const uint8_t*, int64_t, int64_t)
     void ArrowBitmapReset(ArrowBitmap*)
 
+cdef extern from "pandas/bitmask_algorithms.h":
+    void ConcatenateBitmapData(ArrowBitmap*, size_t, uint8_t*)
+
 
 @cython.freelist(16)
 cdef class NDArrayBacked:
@@ -295,6 +298,66 @@ cdef class BitMaskArray:
     def __len__(self):
         return self.bitmap.size_bits
 
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    @staticmethod
+    cdef BitMaskArray c_concatenate(list objs):
+        cdef Py_ssize_t i
+        cdef int64_t bytes_needed, total_bits = 0
+        cdef BitMaskArray current_bma
+        cdef Py_ssize_t nbitmaps = len(objs)
+
+        cdef Py_ssize_t second_dim = 0
+        if any(len(x.array_shape) > 1 for x in objs):
+            second_dim = objs[0].array_shape[1]
+            for obj in objs:
+                if not obj.array_shape[1] == second_dim:
+                    raise NotImplementedError(
+                        "BitMaskArray.concatenate does not support broadcasting"
+                    )
+
+        cdef ArrowBitmap* bitmaps = <ArrowBitmap*>malloc(sizeof(ArrowBitmap) * nbitmaps)
+        for i in range(nbitmaps):
+            current_bma = <BitMaskArray?>objs[i]
+            total_bits += current_bma.bitmap.size_bits
+            bitmaps[i] = current_bma.bitmap
+
+        # Bypass __init__ calls
+        cdef BitMaskArray bma = BitMaskArray.__new__(BitMaskArray)
+        cdef ArrowBitmap bitmap
+
+        ArrowBitmapInit(&bitmap)
+        ArrowBitmapReserve(&bitmap, total_bits)
+        ConcatenateBitmapData(bitmaps, nbitmaps, bitmap.buffer.data)
+        free(bitmaps)
+
+        # TODO: avoid nanoarrow internals
+        bitmap.size_bits = total_bits
+        bytes_needed = total_bits // 8
+        if total_bits % 8 != 0:
+            bytes_needed += 1
+        bitmap.buffer.size_bytes = bytes_needed
+
+        bma.bitmap = bitmap
+
+        if second_dim != 0:
+            bma.array_shape = tuple((total_bits // second_dim, second_dim))
+        else:
+            bma.array_shape = tuple((total_bits,))
+        bma.buffer_owner = True
+        bma.parent = None
+
+        return bma
+
+    @staticmethod
+    def concatenate(objs, axis):
+        if axis != 0:
+            raise NotImplementedError(
+                "BitMaskArray.concatenate only implemented for axis=0"
+            )
+
+        return BitMaskArray.c_concatenate(objs)
+
     @cython.boundscheck(False)
     @cython.wraparound(False)
     cdef _set_scalar_value_from_equal_sized_array(
diff --git a/pandas/_libs/include/pandas/bitmask_algorithms.h b/pandas/_libs/include/pandas/bitmask_algorithms.h
new file mode 100644
index 0000000000000..408a37f5e1495
--- /dev/null
+++ b/pandas/_libs/include/pandas/bitmask_algorithms.h
@@ -0,0 +1,16 @@
+#include <stddef.h>
+#include <stdint.h>
+
+#include "pandas/vendored/nanoarrow.h"
+
+/*
+  Ordered concatenation of bitmasks. Masks is the data itself,
+  nmasks is the number of masks to concatenate, mask_nbits is the
+  number of bits within each mask to concatenate.
+
+  Concatenation preserves order.
+
+  out is assumed to have enough bytes to hold all elements.
+*/
+void ConcatenateBitmapData(struct ArrowBitmap *bitmaps, size_t nbitmaps,
+                           uint8_t *out);
diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build
index 29b1298050619..849b839d33a87 100644
--- a/pandas/_libs/meson.build
+++ b/pandas/_libs/meson.build
@@ -62,7 +62,7 @@ libs_sources = {
     # Dict of extension name -> dict of {sources, include_dirs, and deps}
     # numpy include dir is implicitly included
     'algos': {'sources': ['algos.pyx', _algos_common_helper, _algos_take_helper, _khash_primitive_helper]},
-    'arrays': {'sources': ['arrays.pyx', 'src/vendored/nanoarrow.c'], 'includes': ['include/pandas/vendored']},
+    'arrays': {'sources': ['arrays.pyx', 'src/vendored/nanoarrow.c', 'src/bitmask_algorithms.c'], 'includes': ['include/pandas/vendored']},
     'groupby': {'sources': ['groupby.pyx']},
     'hashing': {'sources': ['hashing.pyx']},
     'hashtable': {'sources': ['hashtable.pyx', _khash_primitive_helper, _hashtable_class_helper, _hashtable_func_helper]},
diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
new file mode 100644
index 0000000000000..06775257e3386
--- /dev/null
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -0,0 +1,43 @@
+#include <string.h>
+
+#include "pandas/bitmask_algorithms.h"
+
+static const uint8_t clear_mask[8] = {0x0, 0x1,  0x3,  0x7,
+                                      0xf, 0x1f, 0x3f, 0x7f};
+
+void ConcatenateBitmapData(struct ArrowBitmap *bitmaps, size_t nbitmaps,
+                           uint8_t *out) {
+  if (nbitmaps == 0) {
+    return;
+  }
+
+  uint8_t *out_cursor = out;
+  // As we loop through each array, any time we end up starting
+  // on a word boundary we can simply use memcpy. If we are not
+  // so lucky we fall back to bit shifting each element
+  size_t start_bit_pos = 0;
+  for (size_t i = 0; i < nbitmaps; i++) {
+    struct ArrowBitmap bitmap = bitmaps[i];
+    int64_t nbytes = bitmap.buffer.size_bytes;
+    size_t trailing_nbits = bitmap.size_bits % 8;
+
+    if (start_bit_pos == 0) {
+      memcpy(out_cursor, bitmap.buffer.data, nbytes);
+    } else {
+      for (size_t j = 0; j < nbytes; j++) {
+        uint8_t lshifted = bitmap.buffer.data[j] << start_bit_pos;
+        out_cursor[j] = (out_cursor[j] & clear_mask[start_bit_pos]) | lshifted;
+
+        uint8_t rshifted = bitmap.buffer.data[j] >> (8 - start_bit_pos);
+        out_cursor[j + 1] = rshifted;
+      }
+    }
+
+    start_bit_pos = (start_bit_pos + trailing_nbits) % 8;
+    if (start_bit_pos == 0) {
+      out_cursor += nbytes;
+    } else {
+      out_cursor += nbytes - 1;
+    }
+  }
+}
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 363c8addea6aa..89b3df8aa89f9 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -915,7 +915,10 @@ def _concat_same_type(
         axis: AxisInt = 0,
     ) -> Self:
         data = np.concatenate([x._data for x in to_concat], axis=axis)
-        mask = np.concatenate([x._mask.to_numpy() for x in to_concat], axis=axis)
+        try:
+            mask = BitMaskArray.concatenate([x._mask for x in to_concat], axis=axis)
+        except NotImplementedError:
+            mask = np.concatenate([x._mask.to_numpy() for x in to_concat], axis=axis)
         return cls(data, mask)
 
     def take(

From cfa3b931c278ea99a673433139559b82965be2ac Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 23 Aug 2023 11:17:50 -0400
Subject: [PATCH 078/126] bitmask_any moved to algorithms

---
 pandas/_libs/arrays.pyx                       | 32 +++----------
 .../_libs/include/pandas/bitmask_algorithms.h | 17 +++----
 pandas/_libs/src/bitmask_algorithms.c         | 45 ++++++++++++++-----
 3 files changed, 51 insertions(+), 43 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index d4bf9bd8687aa..875876e38b125 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -45,7 +45,8 @@ cdef extern from "pandas/vendored/nanoarrow.h":
     void ArrowBitmapReset(ArrowBitmap*)
 
 cdef extern from "pandas/bitmask_algorithms.h":
-    void ConcatenateBitmapData(ArrowBitmap*, size_t, uint8_t*)
+    void ConcatenateBitmapData(const ArrowBitmap**, size_t, const uint8_t*)
+    bint BitmapAny(const ArrowBitmap*)
 
 
 @cython.freelist(16)
@@ -316,11 +317,13 @@ cdef class BitMaskArray:
                         "BitMaskArray.concatenate does not support broadcasting"
                     )
 
-        cdef ArrowBitmap* bitmaps = <ArrowBitmap*>malloc(sizeof(ArrowBitmap) * nbitmaps)
+        cdef ArrowBitmap** bitmaps = <ArrowBitmap**>malloc(
+            sizeof(ArrowBitmap*) * nbitmaps
+        )
         for i in range(nbitmaps):
             current_bma = <BitMaskArray?>objs[i]
             total_bits += current_bma.bitmap.size_bits
-            bitmaps[i] = current_bma.bitmap
+            bitmaps[i] = &current_bma.bitmap
 
         # Bypass __init__ calls
         cdef BitMaskArray bma = BitMaskArray.__new__(BitMaskArray)
@@ -591,7 +594,7 @@ cdef class BitMaskArray:
         return bool
 
     def any(self) -> bool:
-        return BitMaskArray.buf_any(&self.bitmap)
+        return BitmapAny(&self.bitmap)
 
     def all(self) -> bool:
         return BitMaskArray.buf_all(&self.bitmap)
@@ -658,27 +661,6 @@ cdef class BitMaskArray:
     cdef void buffer_to_array_1d(uint8_t[:] out, const uint8_t* buf, Py_ssize_t size):
         ArrowBitsUnpackInt8(buf, 0, size, <const int8_t*>&out[0])
 
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    @staticmethod
-    cdef bint buf_any(const ArrowBitmap* bitmap):
-        cdef Py_ssize_t i, bits_remaining
-        cdef int64_t size_bits = bitmap.size_bits
-        cdef const uint8_t* buf = bitmap.buffer.data
-        if size_bits < 1:
-            return False
-
-        for i in range(bitmap.buffer.size_bytes):
-            if buf[i] > 0:
-                return True
-
-        bits_remaining = size_bits % 8
-        for i in range(bits_remaining):
-            if ArrowBitGet(buf, size_bits - i - 1):
-                return True
-
-        return False
-
     @cython.boundscheck(False)
     @cython.wraparound(False)
     @staticmethod
diff --git a/pandas/_libs/include/pandas/bitmask_algorithms.h b/pandas/_libs/include/pandas/bitmask_algorithms.h
index 408a37f5e1495..093905f887777 100644
--- a/pandas/_libs/include/pandas/bitmask_algorithms.h
+++ b/pandas/_libs/include/pandas/bitmask_algorithms.h
@@ -1,16 +1,17 @@
+#pragma once
+
+#include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
 
 #include "pandas/vendored/nanoarrow.h"
 
 /*
-  Ordered concatenation of bitmasks. Masks is the data itself,
-  nmasks is the number of masks to concatenate, mask_nbits is the
-  number of bits within each mask to concatenate.
-
-  Concatenation preserves order.
-
-  out is assumed to have enough bytes to hold all elements.
+  Concatenates the data from an array of bitmaps with size nbitmaps
+  into a buffer "out". Order is preserved and out is assumed to have
+  enough bytes to hold all elements.
 */
-void ConcatenateBitmapData(struct ArrowBitmap *bitmaps, size_t nbitmaps,
+void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
                            uint8_t *out);
+
+bool BitmapAny(const struct ArrowBitmap* bitmap);
diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
index 06775257e3386..7d6d22a12736d 100644
--- a/pandas/_libs/src/bitmask_algorithms.c
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -5,30 +5,30 @@
 static const uint8_t clear_mask[8] = {0x0, 0x1,  0x3,  0x7,
                                       0xf, 0x1f, 0x3f, 0x7f};
 
-void ConcatenateBitmapData(struct ArrowBitmap *bitmaps, size_t nbitmaps,
+void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
                            uint8_t *out) {
   if (nbitmaps == 0) {
     return;
   }
 
   uint8_t *out_cursor = out;
-  // As we loop through each array, any time we end up starting
-  // on a word boundary we can simply use memcpy. If we are not
-  // so lucky we fall back to bit shifting each element
   size_t start_bit_pos = 0;
   for (size_t i = 0; i < nbitmaps; i++) {
-    struct ArrowBitmap bitmap = bitmaps[i];
-    int64_t nbytes = bitmap.buffer.size_bytes;
-    size_t trailing_nbits = bitmap.size_bits % 8;
+    const struct ArrowBitmap* bitmap = bitmaps[i];
+    const int64_t nbytes = bitmap->buffer.size_bytes;
+    const size_t trailing_nbits = bitmap->size_bits % 8;
 
+    // As we loop through each array, any time we end up starting
+    // on a word boundary we can simply use memcpy. If we are not
+    // so lucky we fall back to bit shifting each element
     if (start_bit_pos == 0) {
-      memcpy(out_cursor, bitmap.buffer.data, nbytes);
+      memcpy(out_cursor, bitmap->buffer.data, nbytes);
     } else {
       for (size_t j = 0; j < nbytes; j++) {
-        uint8_t lshifted = bitmap.buffer.data[j] << start_bit_pos;
+        const uint8_t lshifted = bitmap->buffer.data[j] << start_bit_pos;
         out_cursor[j] = (out_cursor[j] & clear_mask[start_bit_pos]) | lshifted;
 
-        uint8_t rshifted = bitmap.buffer.data[j] >> (8 - start_bit_pos);
+        const uint8_t rshifted = bitmap->buffer.data[j] >> (8 - start_bit_pos);
         out_cursor[j + 1] = rshifted;
       }
     }
@@ -41,3 +41,28 @@ void ConcatenateBitmapData(struct ArrowBitmap *bitmaps, size_t nbitmaps,
     }
   }
 }
+
+bool BitmapAny(const struct ArrowBitmap* bitmap) {
+  const size_t nbits = bitmap->size_bits;
+  const size_t size_bytes = bitmap->buffer.size_bytes;
+  const uint8_t* buf = bitmap->buffer.data;
+
+  if (nbits < 1) {
+    return false;
+  }
+
+  for (size_t i = 0; i < size_bytes - 1; i++) {
+    if (buf[i] > 0) {
+      return true;
+    }
+  }
+
+  const size_t bits_remaining = nbits % 8;
+  for (size_t i = 0; i < bits_remaining; i++) {
+    if (ArrowBitGet(buf, nbits - i - 1)) {
+      return true;
+    }
+  }
+
+  return false;
+}

From 6df2930e7029cbba37cbc15fcfc8f490e157a188 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 23 Aug 2023 11:48:56 -0400
Subject: [PATCH 079/126] more algorithms

---
 pandas/_libs/arrays.pyx                       | 133 ++++--------------
 .../_libs/include/pandas/bitmask_algorithms.h |  18 ++-
 pandas/_libs/src/bitmask_algorithms.c         | 100 ++++++++++++-
 3 files changed, 145 insertions(+), 106 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 875876e38b125..d8c86db6d2e1d 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -47,6 +47,11 @@ cdef extern from "pandas/vendored/nanoarrow.h":
 cdef extern from "pandas/bitmask_algorithms.h":
     void ConcatenateBitmapData(const ArrowBitmap**, size_t, const uint8_t*)
     bint BitmapAny(const ArrowBitmap*)
+    bint BitmapAll(const ArrowBitmap*)
+    bint BitmapOr(const ArrowBitmap*, const ArrowBitmap*, ArrowBitmap*)
+    bint BitmapXor(const ArrowBitmap*, const ArrowBitmap*, ArrowBitmap*)
+    bint BitmapAnd(const ArrowBitmap*, const ArrowBitmap*, ArrowBitmap*)
+    bint BitmapInvert(const ArrowBitmap*, ArrowBitmap*)
 
 
 @cython.freelist(16)
@@ -422,7 +427,6 @@ cdef class BitMaskArray:
         return self.to_numpy()[key]
 
     def __invert__(self):
-        cdef Py_ssize_t i
         cdef BitMaskArray self_ = self
         cdef BitMaskArray bma = BitMaskArray.__new__(BitMaskArray)
         cdef ArrowBitmap bitmap
@@ -430,12 +434,7 @@ cdef class BitMaskArray:
         ArrowBitmapInit(&bitmap)
         ArrowBitmapReserve(&bitmap, self_.bitmap.size_bits)
 
-        for i in range(self_.bitmap.buffer.size_bytes):
-            bitmap.buffer.data[i] = ~self_.bitmap.buffer.data[i]
-
-        # TODO: avoid nanoarrow internals
-        bitmap.size_bits = self_.bitmap.size_bits
-        bitmap.buffer.size_bytes = self_.bitmap.buffer.size_bytes
+        BitmapInvert(&self_.bitmap, &bitmap)
 
         bma.bitmap = bitmap
         bma.array_shape = self.array_shape
@@ -447,6 +446,7 @@ cdef class BitMaskArray:
     def __and__(self, other):
         cdef ndarray[uint8_t] result
         cdef BitMaskArray other_bma, self_ = self  # self_ required for Cython < 3
+        cdef ArrowBitmap bitmap
 
         if isinstance(other, type(self)):
             other_bma = other
@@ -456,15 +456,17 @@ cdef class BitMaskArray:
             if self_.bitmap.size_bits != other_bma.bitmap.size_bits:
                 raise ValueError("bitmaps are not equal size")
 
-            buf = <uint8_t*>malloc(self_.bitmap.size_bits)
-            BitMaskArray.buf_and(&self_.bitmap, &other_bma.bitmap, buf)
+            ArrowBitmapInit(&bitmap)
+            ArrowBitmapReserve(&bitmap, self_.bitmap.size_bits)
+            BitmapAnd(&self_.bitmap, &other_bma.bitmap, &bitmap)
+
             result = np.empty(self_.bitmap.size_bits, dtype=bool)
             BitMaskArray.buffer_to_array_1d(
                 result,
-                buf,
-                self_.bitmap.size_bits
+                bitmap.buffer.data,
+                bitmap.size_bits
             )
-            free(buf)
+            ArrowBitmapReset(&bitmap)
             return result.reshape(self.array_shape)
 
         return self.to_numpy() & other
@@ -472,6 +474,7 @@ cdef class BitMaskArray:
     def __or__(self, other):
         cdef ndarray[uint8_t] result
         cdef BitMaskArray other_bma, self_ = self  # self_ required for Cython < 3
+        cdef ArrowBitmap bitmap
 
         if isinstance(other, type(self)):
             other_bma = other
@@ -481,15 +484,17 @@ cdef class BitMaskArray:
             if self_.bitmap.size_bits != other_bma.bitmap.size_bits:
                 raise ValueError("bitmaps are not equal size")
 
-            buf = <uint8_t*>malloc(self_.bitmap.size_bits)
-            BitMaskArray.buf_or(&self_.bitmap, &other_bma.bitmap, buf)
+            ArrowBitmapInit(&bitmap)
+            ArrowBitmapReserve(&bitmap, self_.bitmap.size_bits)
+            BitmapOr(&self_.bitmap, &other_bma.bitmap, &bitmap)
+
             result = np.empty(self_.bitmap.size_bits, dtype=bool)
             BitMaskArray.buffer_to_array_1d(
                 result,
-                buf,
-                self_.bitmap.size_bits
+                bitmap.buffer.data,
+                bitmap.size_bits
             )
-            free(buf)
+            ArrowBitmapReset(&bitmap)
             return result.reshape(self.array_shape)
 
         return self.to_numpy() | other
@@ -497,6 +502,7 @@ cdef class BitMaskArray:
     def __xor__(self, other):
         cdef ndarray[uint8_t] result
         cdef BitMaskArray other_bma, self_ = self  # self_ required for Cython < 3
+        cdef ArrowBitmap bitmap
 
         if isinstance(other, type(self)):
             other_bma = other
@@ -506,15 +512,17 @@ cdef class BitMaskArray:
             if self_.bitmap.size_bits != other_bma.bitmap.size_bits:
                 raise ValueError("bitmaps are not equal size")
 
-            buf = <uint8_t*>malloc(self_.bitmap.size_bits)
-            BitMaskArray.buf_xor(&self_.bitmap, &other_bma.bitmap, buf)
+            ArrowBitmapInit(&bitmap)
+            ArrowBitmapReserve(&bitmap, self_.bitmap.size_bits)
+            BitmapXor(&self_.bitmap, &other_bma.bitmap, &bitmap)
+
             result = np.empty(self_.bitmap.size_bits, dtype=bool)
             BitMaskArray.buffer_to_array_1d(
                 result,
-                buf,
-                self_.bitmap.size_bits
+                bitmap.buffer.data,
+                bitmap.size_bits
             )
-            free(buf)
+            ArrowBitmapReset(&bitmap)
             return result.reshape(self.array_shape)
 
         return self.to_numpy() ^ other
@@ -597,7 +605,7 @@ cdef class BitMaskArray:
         return BitmapAny(&self.bitmap)
 
     def all(self) -> bool:
-        return BitMaskArray.buf_all(&self.bitmap)
+        return BitmapAll(&self.bitmap)
 
     def sum(self) -> int:
         return ArrowBitCountSet(self.bitmap.buffer.data, 0, self.bitmap.size_bits)
@@ -661,85 +669,6 @@ cdef class BitMaskArray:
     cdef void buffer_to_array_1d(uint8_t[:] out, const uint8_t* buf, Py_ssize_t size):
         ArrowBitsUnpackInt8(buf, 0, size, <const int8_t*>&out[0])
 
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    @staticmethod
-    cdef bint buf_all(const ArrowBitmap* bitmap):
-        cdef Py_ssize_t i, bits_remaining
-        cdef int64_t size_bits = bitmap.size_bits
-        cdef const uint8_t* buf = bitmap.buffer.data
-        if size_bits < 1:
-            return True
-
-        for i in range(bitmap.buffer.size_bytes):
-            if buf[i] != 256:
-                return False
-
-        bits_remaining = size_bits % 8
-        for i in range(bits_remaining):
-            if ArrowBitGet(buf, size_bits - i - 1) == 0:
-                return False
-
-        return True
-
-    # TODO: clean up signatures - don't mix nbits and nbytes
-    # Note that in cases where the size_bits doesn't end on a word
-    # boundary that these will still operate on the remaining bits,
-    # with undefined values therein
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    @staticmethod
-    cdef void buf_or(
-        const ArrowBitmap* bitmap1,
-        const ArrowBitmap* bitmap2,
-        uint8_t* out
-    ):
-        cdef Py_ssize_t i
-        cdef const uint8_t* buf1 = bitmap1.buffer.data
-        cdef const uint8_t* buf2 = bitmap2.buffer.data
-        # Assumed caller has checked that bitmaps are equal,
-        # otherwise trailing comparison is undefined
-        cdef int64_t nbytes = bitmap1.buffer.size_bytes
-
-        for i in range(nbytes):
-            out[i] = buf1[i] | buf2[i]
-
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    @staticmethod
-    cdef void buf_xor(
-        const ArrowBitmap* bitmap1,
-        const ArrowBitmap* bitmap2,
-        uint8_t* out
-    ):
-        cdef Py_ssize_t i
-        cdef const uint8_t* buf1 = bitmap1.buffer.data
-        cdef const uint8_t* buf2 = bitmap2.buffer.data
-        # Assumed caller has checked that bitmaps are equal,
-        # otherwise trailing comparison is undefined
-        cdef int64_t nbytes = bitmap1.buffer.size_bytes
-
-        for i in range(nbytes):
-            out[i] = buf1[i] ^ buf2[i]
-
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    @staticmethod
-    cdef void buf_and(
-        const ArrowBitmap* bitmap1,
-        const ArrowBitmap* bitmap2,
-        uint8_t* out
-    ):
-        cdef Py_ssize_t i
-        cdef const uint8_t* buf1 = bitmap1.buffer.data
-        cdef const uint8_t* buf2 = bitmap2.buffer.data
-        # Assumed caller has checked that bitmaps are equal,
-        # otherwise trailing comparison is undefined
-        cdef int64_t nbytes = bitmap1.buffer.size_bytes
-
-        for i in range(nbytes):
-            out[i] = buf1[i] & buf2[i]
-
     def to_numpy(self) -> ndarray:
         cdef ndarray[uint8_t] result = np.empty(self.bitmap.size_bits, dtype=bool)
         BitMaskArray.buffer_to_array_1d(
diff --git a/pandas/_libs/include/pandas/bitmask_algorithms.h b/pandas/_libs/include/pandas/bitmask_algorithms.h
index 093905f887777..b0850b6923a03 100644
--- a/pandas/_libs/include/pandas/bitmask_algorithms.h
+++ b/pandas/_libs/include/pandas/bitmask_algorithms.h
@@ -14,4 +14,20 @@
 void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
                            uint8_t *out);
 
-bool BitmapAny(const struct ArrowBitmap* bitmap);
+bool BitmapAny(const struct ArrowBitmap *bitmap);
+bool BitmapAll(const struct ArrowBitmap *bitmap);
+
+/* Returns -1 on failure. On success returns 0 and writes to out */
+int BitmapOr(const struct ArrowBitmap *bitmap1,
+             const struct ArrowBitmap *bitmap2, struct ArrowBitmap *out);
+
+/* Returns -1 on failure. On success returns 0 and writes to out */
+int BitmapXor(const struct ArrowBitmap *bitmap1,
+              const struct ArrowBitmap *bitmap2, struct ArrowBitmap *out);
+
+/* Returns -1 on failure. On success returns 0 and writes to out */
+int BitmapAnd(const struct ArrowBitmap *bitmap1,
+              const struct ArrowBitmap *bitmap2, struct ArrowBitmap *out);
+
+/* Returns -1 on failure. On success returns 0 and writes to out */
+int BitmapInvert(const struct ArrowBitmap *bitmap, struct ArrowBitmap *out);
diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
index 7d6d22a12736d..74a4cc0695d8c 100644
--- a/pandas/_libs/src/bitmask_algorithms.c
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -14,7 +14,7 @@ void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
   uint8_t *out_cursor = out;
   size_t start_bit_pos = 0;
   for (size_t i = 0; i < nbitmaps; i++) {
-    const struct ArrowBitmap* bitmap = bitmaps[i];
+    const struct ArrowBitmap *bitmap = bitmaps[i];
     const int64_t nbytes = bitmap->buffer.size_bytes;
     const size_t trailing_nbits = bitmap->size_bits % 8;
 
@@ -42,10 +42,10 @@ void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
   }
 }
 
-bool BitmapAny(const struct ArrowBitmap* bitmap) {
+bool BitmapAny(const struct ArrowBitmap *bitmap) {
   const size_t nbits = bitmap->size_bits;
   const size_t size_bytes = bitmap->buffer.size_bytes;
-  const uint8_t* buf = bitmap->buffer.data;
+  const uint8_t *buf = bitmap->buffer.data;
 
   if (nbits < 1) {
     return false;
@@ -66,3 +66,97 @@ bool BitmapAny(const struct ArrowBitmap* bitmap) {
 
   return false;
 }
+
+bool BitmapAll(const struct ArrowBitmap *bitmap) {
+  const size_t nbits = bitmap->size_bits;
+  const size_t size_bytes = bitmap->buffer.size_bytes;
+  const uint8_t *buf = bitmap->buffer.data;
+
+  if (nbits < 1) {
+    return true;
+  }
+
+  for (size_t i = 0; i < size_bytes - 1; i++) {
+    if (buf[i] != 0xff) {
+      return false;
+    }
+  }
+
+  const size_t bits_remaining = nbits % 8;
+  for (size_t i = 0; i < bits_remaining; i++) {
+    if (ArrowBitGet(buf, nbits - i - 1) == 0) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+int BitmapOr(const struct ArrowBitmap *bitmap1,
+             const struct ArrowBitmap *bitmap2, struct ArrowBitmap *out) {
+  if (bitmap1->size_bits != bitmap2->size_bits) {
+    return -1;
+  } else if (!(out->buffer.capacity_bytes >= bitmap1->buffer.size_bytes)) {
+    return -1;
+  }
+
+  for (size_t i = 0; i < bitmap1->buffer.size_bytes; i++) {
+    out->buffer.data[i] = bitmap1->buffer.data[i] | bitmap2->buffer.data[i];
+  }
+
+  out->size_bits = bitmap1->size_bits;
+  out->buffer.size_bytes = bitmap1->buffer.size_bytes;
+
+  return 0;
+}
+
+int BitmapAnd(const struct ArrowBitmap *bitmap1,
+              const struct ArrowBitmap *bitmap2, struct ArrowBitmap *out) {
+  if (bitmap1->size_bits != bitmap2->size_bits) {
+    return -1;
+  } else if (!(out->buffer.capacity_bytes >= bitmap1->buffer.size_bytes)) {
+    return -1;
+  }
+
+  for (size_t i = 0; i < bitmap1->buffer.size_bytes; i++) {
+    out->buffer.data[i] = bitmap1->buffer.data[i] & bitmap2->buffer.data[i];
+  }
+
+  out->size_bits = bitmap1->size_bits;
+  out->buffer.size_bytes = bitmap1->buffer.size_bytes;
+
+  return 0;
+}
+
+int BitmapXor(const struct ArrowBitmap *bitmap1,
+              const struct ArrowBitmap *bitmap2, struct ArrowBitmap *out) {
+  if (bitmap1->size_bits != bitmap2->size_bits) {
+    return -1;
+  } else if (!(out->buffer.capacity_bytes >= bitmap1->buffer.size_bytes)) {
+    return -1;
+  }
+
+  for (size_t i = 0; i < bitmap1->buffer.size_bytes; i++) {
+    out->buffer.data[i] = bitmap1->buffer.data[i] ^ bitmap2->buffer.data[i];
+  }
+
+  out->size_bits = bitmap1->size_bits;
+  out->buffer.size_bytes = bitmap1->buffer.size_bytes;
+
+  return 0;
+}
+
+int BitmapInvert(const struct ArrowBitmap *bitmap1, struct ArrowBitmap *out) {
+  if (!(out->buffer.capacity_bytes >= bitmap1->buffer.size_bytes)) {
+    return -1;
+  }
+
+  for (size_t i = 0; i < bitmap1->buffer.size_bytes; i++) {
+    out->buffer.data[i] = ~bitmap1->buffer.data[i];
+  }
+
+  out->size_bits = bitmap1->size_bits;
+  out->buffer.size_bytes = bitmap1->buffer.size_bytes;
+
+  return 0;
+}

From 06f3b01a99272822502d62e21327f7be9b40754e Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 23 Aug 2023 12:53:42 -0400
Subject: [PATCH 080/126] C-implemented take / putmask

---
 pandas/_libs/arrays.pyx                       | 58 ++++++++-----------
 .../_libs/include/pandas/bitmask_algorithms.h |  8 +++
 pandas/_libs/src/bitmask_algorithms.c         | 56 ++++++++++++++++--
 3 files changed, 82 insertions(+), 40 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index d8c86db6d2e1d..74565fe92e36e 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -27,6 +27,7 @@ cdef extern from "pandas/vendored/nanoarrow.h":
     struct ArrowBuffer:
         uint8_t* data
         int64_t size_bytes
+        int64_t capacity_bytes
 
     struct ArrowBitmap:
         ArrowBuffer buffer
@@ -52,6 +53,8 @@ cdef extern from "pandas/bitmask_algorithms.h":
     bint BitmapXor(const ArrowBitmap*, const ArrowBitmap*, ArrowBitmap*)
     bint BitmapAnd(const ArrowBitmap*, const ArrowBitmap*, ArrowBitmap*)
     bint BitmapInvert(const ArrowBitmap*, ArrowBitmap*)
+    bint BitmapTake(const ArrowBitmap*, const int64_t*, size_t, ArrowBitmap*)
+    bint BitmapPutFromBufferMask(ArrowBitmap*, const uint8_t*, size_t, uint8_t)
 
 
 @cython.freelist(16)
@@ -293,6 +296,7 @@ cdef class BitMaskArray:
         buf = <uint8_t*>malloc(old_bma.bitmap.buffer.size_bytes)
         memcpy(buf, old_bma.bitmap.buffer.data, old_bma.bitmap.buffer.size_bytes)
         bitmap.buffer.size_bytes = old_bma.bitmap.buffer.size_bytes
+        bitmap.buffer.capacity_bytes = old_bma.bitmap.buffer.capacity_bytes
         bitmap.size_bits = old_bma.bitmap.size_bits
         bitmap.buffer.data = buf
 
@@ -339,12 +343,13 @@ cdef class BitMaskArray:
         ConcatenateBitmapData(bitmaps, nbitmaps, bitmap.buffer.data)
         free(bitmaps)
 
-        # TODO: avoid nanoarrow internals
+        # TODO: avoid nanoarrow internals - maybe handle in concat function?
         bitmap.size_bits = total_bits
         bytes_needed = total_bits // 8
         if total_bits % 8 != 0:
             bytes_needed += 1
         bitmap.buffer.size_bytes = bytes_needed
+        bitmap.buffer.capacity_bytes = bytes_needed
 
         bma.bitmap = bitmap
 
@@ -366,23 +371,13 @@ cdef class BitMaskArray:
 
         return BitMaskArray.c_concatenate(objs)
 
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    cdef _set_scalar_value_from_equal_sized_array(
-        self,
-        const uint8_t[:] data,
-        bint value
-    ):
-        cdef Py_ssize_t i
-        for i in range(self.bitmap.size_bits):
-            if data[i]:
-                ArrowBitSetTo(self.bitmap.buffer.data, i, value)
-
     def __setitem__(self, key, value):
+        cdef const uint8_t[:] keymask
         cdef const uint8_t[:] arr1d
         cdef Py_ssize_t i = 0
         cdef Py_ssize_t ckey
         cdef bint cvalue
+        cdef BitMaskArray self_ = self
 
         if isinstance(key, int):
             ckey = key
@@ -406,7 +401,14 @@ cdef class BitMaskArray:
                 and key.dtype == bool
                 and isinstance(value, (int, bool))
         ):
-            self._set_scalar_value_from_equal_sized_array(key, value)
+            keymask = key
+            if BitmapPutFromBufferMask(
+                    &self_.bitmap,
+                    &keymask[0],
+                    keymask.shape[0],
+                    value
+            ) != 0:
+                raise ValueError("BitMaskArray.__setitem__ failed!")
         else:
             arr = self.to_numpy()
             arr[key] = value
@@ -535,6 +537,7 @@ cdef class BitMaskArray:
             "buffer_owner": self_.buffer_owner,
             # Private ArrowBitmap attributes below
             "bitmap.buffer.size_bytes": self_.bitmap.buffer.size_bytes,
+            "bitmap.buffer.capacity_bytes": self_.bitmap.buffer.capacity_bytes,
             "bitmap.size_bits": self_.bitmap.size_bits
         }
 
@@ -556,12 +559,14 @@ cdef class BitMaskArray:
         self_.buffer_owner = state["buffer_owner"]
 
         nbytes = state["bitmap.buffer.size_bytes"]
+        capacity_bytes = state["bitmap.buffer.capacity_bytes"]
         nbits = state["bitmap.size_bits"]
         if not self_.buffer_owner:
             other = self.parent
             self_.bitmap = other.bitmap
             self_.bitmap.size_bits = nbits
             self_.bitmap.buffer.size_bytes = nbytes
+            self_.bitmap.buffer.capacity_bytes = capacity_bytes
         else:
             ArrowBitmapInit(&bitmap)
 
@@ -572,6 +577,7 @@ cdef class BitMaskArray:
 
             bitmap.buffer.data = buf
             bitmap.buffer.size_bytes = nbytes
+            bitmap.buffer.capacity_bytes = nbytes
             bitmap.size_bits = nbits
             self_.bitmap = bitmap
 
@@ -610,28 +616,12 @@ cdef class BitMaskArray:
     def sum(self) -> int:
         return ArrowBitCountSet(self.bitmap.buffer.data, 0, self.bitmap.size_bits)
 
-    @cython.wraparound(False)
-    @cython.boundscheck(False)
-    cdef int ctake_1d(self, const int64_t[:] indices, ArrowBitmap* out_bitmap):
-        """returns -1 in case a negative index is encountered, 0 on success"""
-        cdef bint value
-        cdef Py_ssize_t i
-        cdef int64_t index
-        cdef Py_ssize_t nindices = indices.shape[0]
-
-        for i in range(nindices):
-            index = indices[i]
-            if index < 0:
-                return -1
-
-            value = ArrowBitGet(self.bitmap.buffer.data, index)
-            ArrowBitmapAppendUnsafe(out_bitmap, value, 1)
-
     def take_1d(
         self,
-        indices,
+        const int64_t[:] indices,
         const int axis=0,
     ):
+        cdef BitMaskArray self_ = self
         cdef Py_ssize_t nindices = len(indices)
         if axis != 0:
             raise NotImplementedError(
@@ -651,12 +641,12 @@ cdef class BitMaskArray:
         ArrowBitmapInit(&bitmap)
         ArrowBitmapReserve(&bitmap, nindices)
 
-        if self.ctake_1d(indices, &bitmap) != 0:
+        if BitmapTake(&self_.bitmap, &indices[0], nindices, &bitmap) != 0:
             ArrowBitmapReset(&bitmap)
             raise ValueError("take_1d does not support negative indexing")
 
         bma.bitmap = bitmap
-        bma.array_shape = indices.shape
+        bma.array_shape = tuple((indices.shape[0],))
         bma.buffer_owner = True
         return bma
 
diff --git a/pandas/_libs/include/pandas/bitmask_algorithms.h b/pandas/_libs/include/pandas/bitmask_algorithms.h
index b0850b6923a03..fb2bbdd711ac0 100644
--- a/pandas/_libs/include/pandas/bitmask_algorithms.h
+++ b/pandas/_libs/include/pandas/bitmask_algorithms.h
@@ -31,3 +31,11 @@ int BitmapAnd(const struct ArrowBitmap *bitmap1,
 
 /* Returns -1 on failure. On success returns 0 and writes to out */
 int BitmapInvert(const struct ArrowBitmap *bitmap, struct ArrowBitmap *out);
+
+/* Returns -1 on failure. On success returns 0 and writes to out */
+int BitmapTake(const struct ArrowBitmap *bitmap, const int64_t *indices,
+               size_t nindices, struct ArrowBitmap *out);
+
+/* Returns -1 on failure. On success returns 0 and writes to out */
+int BitmapPutFromBufferMask(struct ArrowBitmap *bitmap, const uint8_t *buf,
+                            size_t n, uint8_t value);
diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
index 74a4cc0695d8c..7181fc502d321 100644
--- a/pandas/_libs/src/bitmask_algorithms.c
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -146,17 +146,61 @@ int BitmapXor(const struct ArrowBitmap *bitmap1,
   return 0;
 }
 
-int BitmapInvert(const struct ArrowBitmap *bitmap1, struct ArrowBitmap *out) {
-  if (!(out->buffer.capacity_bytes >= bitmap1->buffer.size_bytes)) {
+int BitmapInvert(const struct ArrowBitmap *bitmap, struct ArrowBitmap *out) {
+  if (!(out->buffer.capacity_bytes >= bitmap->buffer.size_bytes)) {
     return -1;
   }
 
-  for (size_t i = 0; i < bitmap1->buffer.size_bytes; i++) {
-    out->buffer.data[i] = ~bitmap1->buffer.data[i];
+  for (size_t i = 0; i < bitmap->buffer.size_bytes; i++) {
+    out->buffer.data[i] = ~bitmap->buffer.data[i];
   }
 
-  out->size_bits = bitmap1->size_bits;
-  out->buffer.size_bytes = bitmap1->buffer.size_bytes;
+  out->size_bits = bitmap->size_bits;
+  out->buffer.size_bytes = bitmap->buffer.size_bytes;
+
+  return 0;
+}
+
+int BitmapTake(const struct ArrowBitmap *bitmap, const int64_t *indices,
+               size_t nindices, struct ArrowBitmap *out) {
+  int64_t bytes_needed = nindices / 8;
+  if ((nindices % 8) > 0) {
+    bytes_needed += 1;
+  }
+
+  if (!(out->buffer.capacity_bytes >= bytes_needed)) {
+    return -1;
+  }
+
+  for (size_t i = 0; i < nindices; i++) {
+    int64_t index = indices[i];
+    if (index < 0) {
+      return -1;
+    }
+
+    int8_t value = ArrowBitGet(bitmap->buffer.data, index);
+    ArrowBitmapAppendUnsafe(out, value, 1);
+  }
+
+  return 0;
+}
+
+int BitmapPutFromBufferMask(struct ArrowBitmap *bitmap, const uint8_t *buf,
+                            size_t n, uint8_t value) {
+  int64_t bytes_needed = n / 8;
+  if ((n % 8) > 0) {
+    bytes_needed += 1;
+  }
+
+  if (bytes_needed > bitmap->buffer.capacity_bytes) {
+    return -1;
+  }
+
+  for (size_t i = 0; i < n; i++) {
+    if (buf[i]) {
+      ArrowBitSetTo(bitmap->buffer.data, i, value);
+    }
+  }
 
   return 0;
 }

From 9a6187459e88c9d45d133fdd81e546440351a967 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 23 Aug 2023 13:04:32 -0400
Subject: [PATCH 081/126] clean up calling conventions

---
 pandas/_libs/arrays.pyx                       | 20 ++++---------------
 .../_libs/include/pandas/bitmask_algorithms.h |  2 +-
 pandas/_libs/src/bitmask_algorithms.c         | 13 ++++++++++--
 3 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 74565fe92e36e..e4b7d9907fa48 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -46,7 +46,7 @@ cdef extern from "pandas/vendored/nanoarrow.h":
     void ArrowBitmapReset(ArrowBitmap*)
 
 cdef extern from "pandas/bitmask_algorithms.h":
-    void ConcatenateBitmapData(const ArrowBitmap**, size_t, const uint8_t*)
+    void ConcatenateBitmapData(const ArrowBitmap**, size_t, ArrowBitmap*)
     bint BitmapAny(const ArrowBitmap*)
     bint BitmapAll(const ArrowBitmap*)
     bint BitmapOr(const ArrowBitmap*, const ArrowBitmap*, ArrowBitmap*)
@@ -313,7 +313,7 @@ cdef class BitMaskArray:
     @staticmethod
     cdef BitMaskArray c_concatenate(list objs):
         cdef Py_ssize_t i
-        cdef int64_t bytes_needed, total_bits = 0
+        cdef int64_t total_bits = 0
         cdef BitMaskArray current_bma
         cdef Py_ssize_t nbitmaps = len(objs)
 
@@ -340,19 +340,10 @@ cdef class BitMaskArray:
 
         ArrowBitmapInit(&bitmap)
         ArrowBitmapReserve(&bitmap, total_bits)
-        ConcatenateBitmapData(bitmaps, nbitmaps, bitmap.buffer.data)
+        ConcatenateBitmapData(bitmaps, nbitmaps, &bitmap)
         free(bitmaps)
 
-        # TODO: avoid nanoarrow internals - maybe handle in concat function?
-        bitmap.size_bits = total_bits
-        bytes_needed = total_bits // 8
-        if total_bits % 8 != 0:
-            bytes_needed += 1
-        bitmap.buffer.size_bytes = bytes_needed
-        bitmap.buffer.capacity_bytes = bytes_needed
-
         bma.bitmap = bitmap
-
         if second_dim != 0:
             bma.array_shape = tuple((total_bits // second_dim, second_dim))
         else:
@@ -386,8 +377,6 @@ cdef class BitMaskArray:
                 ArrowBitSetTo(self.bitmap.buffer.data, ckey, cvalue)
                 return
 
-        # TODO: implement fastpaths here for equal sized containers
-        # to avoid the to_numpy() call
         if is_null_slice(key) and isinstance(value, (int, bool)):
             cvalue = value  # blindly assuming ints are 0 or 1
             ArrowBitsSetTo(
@@ -400,6 +389,7 @@ cdef class BitMaskArray:
                 isinstance(key, np.ndarray)
                 and key.dtype == bool
                 and isinstance(value, (int, bool))
+                and len(key) == len(self)
         ):
             keymask = key
             if BitmapPutFromBufferMask(
@@ -636,8 +626,6 @@ cdef class BitMaskArray:
         cdef ArrowBitmap bitmap
         cdef BitMaskArray bma = BitMaskArray.__new__(BitMaskArray)
 
-        # TODO: this leaks a bit into the internals of the nanoarrow bitmap
-        # We may want to upstream a BitmapCopy function instead
         ArrowBitmapInit(&bitmap)
         ArrowBitmapReserve(&bitmap, nindices)
 
diff --git a/pandas/_libs/include/pandas/bitmask_algorithms.h b/pandas/_libs/include/pandas/bitmask_algorithms.h
index fb2bbdd711ac0..3ca086acf77a6 100644
--- a/pandas/_libs/include/pandas/bitmask_algorithms.h
+++ b/pandas/_libs/include/pandas/bitmask_algorithms.h
@@ -12,7 +12,7 @@
   enough bytes to hold all elements.
 */
 void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
-                           uint8_t *out);
+                           struct ArrowBitmap *out);
 
 bool BitmapAny(const struct ArrowBitmap *bitmap);
 bool BitmapAll(const struct ArrowBitmap *bitmap);
diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
index 7181fc502d321..494ae411addb2 100644
--- a/pandas/_libs/src/bitmask_algorithms.c
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -6,12 +6,13 @@ static const uint8_t clear_mask[8] = {0x0, 0x1,  0x3,  0x7,
                                       0xf, 0x1f, 0x3f, 0x7f};
 
 void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
-                           uint8_t *out) {
+                           struct ArrowBitmap *out) {
   if (nbitmaps == 0) {
     return;
   }
 
-  uint8_t *out_cursor = out;
+  int64_t bits_processed = 0;
+  uint8_t *out_cursor = out->buffer.data;
   size_t start_bit_pos = 0;
   for (size_t i = 0; i < nbitmaps; i++) {
     const struct ArrowBitmap *bitmap = bitmaps[i];
@@ -39,6 +40,14 @@ void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
     } else {
       out_cursor += nbytes - 1;
     }
+
+    bits_processed += bitmap->size_bits;
+  }
+
+  out->size_bits = bits_processed;
+  out->buffer.size_bytes = bits_processed / 8;
+  if ((bits_processed % 8) > 0) {
+    out->buffer.size_bytes += 1;
   }
 }
 

From cd2794318fbdff740b28942b2ca15e3960c7df91 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 23 Aug 2023 15:07:25 -0400
Subject: [PATCH 082/126] fix off by one

---
 pandas/_libs/src/bitmask_algorithms.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
index 494ae411addb2..a8d669147700a 100644
--- a/pandas/_libs/src/bitmask_algorithms.c
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -66,7 +66,7 @@ bool BitmapAny(const struct ArrowBitmap *bitmap) {
     }
   }
 
-  const size_t bits_remaining = nbits % 8;
+  const size_t bits_remaining = nbits - ((size_bytes - 1) * 8);
   for (size_t i = 0; i < bits_remaining; i++) {
     if (ArrowBitGet(buf, nbits - i - 1)) {
       return true;
@@ -91,7 +91,7 @@ bool BitmapAll(const struct ArrowBitmap *bitmap) {
     }
   }
 
-  const size_t bits_remaining = nbits % 8;
+  const size_t bits_remaining = nbits - ((size_bytes - 1) * 8);
   for (size_t i = 0; i < bits_remaining; i++) {
     if (ArrowBitGet(buf, nbits - i - 1) == 0) {
       return false;

From dc54ca0131e7c78ebdeadd3431629e6a3253d956 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 23 Aug 2023 15:16:21 -0400
Subject: [PATCH 083/126] make mypy happy

---
 pandas/_libs/arrays.pyi | 4 ++--
 pandas/_libs/arrays.pyx | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi
index f3414659b53ec..55ff7273685df 100644
--- a/pandas/_libs/arrays.pyi
+++ b/pandas/_libs/arrays.pyi
@@ -54,8 +54,8 @@ class BitMaskArray:
     def __getstate__(self) -> dict: ...
     def __setstate__(self, other: dict) -> None: ...
     def __iter__(self): ...
-    @staticmethod
-    def concatenate(objs: list[Self], axis: int) -> Self: ...
+    @classmethod
+    def concatenate(cls, objs: list[Self], axis: int) -> Self: ...
     @property
     def size(self) -> int: ...
     @property
diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index e4b7d9907fa48..274061f6c1929 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -353,8 +353,8 @@ cdef class BitMaskArray:
 
         return bma
 
-    @staticmethod
-    def concatenate(objs, axis):
+    @classmethod
+    def concatenate(cls, objs, axis):
         if axis != 0:
             raise NotImplementedError(
                 "BitMaskArray.concatenate only implemented for axis=0"

From 3794ec57fce20a6509f919caf8a3441fb11fffce Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 23 Aug 2023 16:15:20 -0400
Subject: [PATCH 084/126] fix bug moving cursor when crossing byte boundary

---
 pandas/_libs/src/bitmask_algorithms.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
index a8d669147700a..4ff63044aca86 100644
--- a/pandas/_libs/src/bitmask_algorithms.c
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -17,7 +17,6 @@ void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
   for (size_t i = 0; i < nbitmaps; i++) {
     const struct ArrowBitmap *bitmap = bitmaps[i];
     const int64_t nbytes = bitmap->buffer.size_bytes;
-    const size_t trailing_nbits = bitmap->size_bits % 8;
 
     // As we loop through each array, any time we end up starting
     // on a word boundary we can simply use memcpy. If we are not
@@ -34,13 +33,12 @@ void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
       }
     }
 
-    start_bit_pos = (start_bit_pos + trailing_nbits) % 8;
-    if (start_bit_pos == 0) {
-      out_cursor += nbytes;
-    } else {
-      out_cursor += nbytes - 1;
+    out_cursor += nbytes - 1;
+    const int64_t new_bit_position = start_bit_pos + bitmap->size_bits;
+    if (new_bit_position >= 8) {
+      out_cursor += 1;
     }
-
+    start_bit_pos = new_bit_position % 8;
     bits_processed += bitmap->size_bits;
   }
 

From 274a7b53c24df6d5c49a35bfc5bd7c75836dc59f Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 23 Aug 2023 19:04:12 -0400
Subject: [PATCH 085/126] pedantic cleanups

---
 pandas/_libs/src/bitmask_algorithms.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
index 4ff63044aca86..b6b75f05e7847 100644
--- a/pandas/_libs/src/bitmask_algorithms.c
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -24,7 +24,7 @@ void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
     if (start_bit_pos == 0) {
       memcpy(out_cursor, bitmap->buffer.data, nbytes);
     } else {
-      for (size_t j = 0; j < nbytes; j++) {
+      for (int64_t j = 0; j < nbytes; j++) {
         const uint8_t lshifted = bitmap->buffer.data[j] << start_bit_pos;
         out_cursor[j] = (out_cursor[j] & clear_mask[start_bit_pos]) | lshifted;
 
@@ -107,7 +107,7 @@ int BitmapOr(const struct ArrowBitmap *bitmap1,
     return -1;
   }
 
-  for (size_t i = 0; i < bitmap1->buffer.size_bytes; i++) {
+  for (int64_t i = 0; i < bitmap1->buffer.size_bytes; i++) {
     out->buffer.data[i] = bitmap1->buffer.data[i] | bitmap2->buffer.data[i];
   }
 
@@ -125,7 +125,7 @@ int BitmapAnd(const struct ArrowBitmap *bitmap1,
     return -1;
   }
 
-  for (size_t i = 0; i < bitmap1->buffer.size_bytes; i++) {
+  for (int64_t i = 0; i < bitmap1->buffer.size_bytes; i++) {
     out->buffer.data[i] = bitmap1->buffer.data[i] & bitmap2->buffer.data[i];
   }
 
@@ -143,7 +143,7 @@ int BitmapXor(const struct ArrowBitmap *bitmap1,
     return -1;
   }
 
-  for (size_t i = 0; i < bitmap1->buffer.size_bytes; i++) {
+  for (int64_t i = 0; i < bitmap1->buffer.size_bytes; i++) {
     out->buffer.data[i] = bitmap1->buffer.data[i] ^ bitmap2->buffer.data[i];
   }
 
@@ -158,7 +158,7 @@ int BitmapInvert(const struct ArrowBitmap *bitmap, struct ArrowBitmap *out) {
     return -1;
   }
 
-  for (size_t i = 0; i < bitmap->buffer.size_bytes; i++) {
+  for (int64_t i = 0; i < bitmap->buffer.size_bytes; i++) {
     out->buffer.data[i] = ~bitmap->buffer.data[i];
   }
 

From 4b0603847f2887996c054ec8890520b66a610327 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 23 Aug 2023 19:28:36 -0400
Subject: [PATCH 086/126] off by one fix

---
 pandas/_libs/src/bitmask_algorithms.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
index b6b75f05e7847..c659832fa02bc 100644
--- a/pandas/_libs/src/bitmask_algorithms.c
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -17,6 +17,9 @@ void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
   for (size_t i = 0; i < nbitmaps; i++) {
     const struct ArrowBitmap *bitmap = bitmaps[i];
     const int64_t nbytes = bitmap->buffer.size_bytes;
+    if (nbytes == 0) {
+      continue;
+    }
 
     // As we loop through each array, any time we end up starting
     // on a word boundary we can simply use memcpy. If we are not

From 62657840f62d3a80ed80931386dbcd042939cba3 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 23 Aug 2023 19:47:06 -0400
Subject: [PATCH 087/126] Revert "fix bug moving cursor when crossing byte
 boundary"

This reverts commit 3794ec57fce20a6509f919caf8a3441fb11fffce.
---
 pandas/_libs/src/bitmask_algorithms.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
index c659832fa02bc..0a92be9fca253 100644
--- a/pandas/_libs/src/bitmask_algorithms.c
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -20,6 +20,7 @@ void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
     if (nbytes == 0) {
       continue;
     }
+    const size_t trailing_nbits = bitmap->size_bits % 8;
 
     // As we loop through each array, any time we end up starting
     // on a word boundary we can simply use memcpy. If we are not
@@ -36,12 +37,13 @@ void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
       }
     }
 
-    out_cursor += nbytes - 1;
-    const int64_t new_bit_position = start_bit_pos + bitmap->size_bits;
-    if (new_bit_position >= 8) {
-      out_cursor += 1;
+    start_bit_pos = (start_bit_pos + trailing_nbits) % 8;
+    if (start_bit_pos == 0) {
+      out_cursor += nbytes;
+    } else {
+      out_cursor += nbytes - 1;
     }
-    start_bit_pos = new_bit_position % 8;
+
     bits_processed += bitmap->size_bits;
   }
 

From cea82a500dc2e00d617b349625e1b6fbe1f4315b Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Thu, 24 Aug 2023 15:24:40 -0400
Subject: [PATCH 088/126] concatenate bug fix

---
 pandas/_libs/src/bitmask_algorithms.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
index 0a92be9fca253..ca286da4ebd17 100644
--- a/pandas/_libs/src/bitmask_algorithms.c
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -37,13 +37,13 @@ void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
       }
     }
 
-    start_bit_pos = (start_bit_pos + trailing_nbits) % 8;
-    if (start_bit_pos == 0) {
-      out_cursor += nbytes;
-    } else {
-      out_cursor += nbytes - 1;
+    out_cursor += nbytes;
+    const int64_t next_bit_pos = start_bit_pos + trailing_nbits;
+    if ((next_bit_pos > 0) && (next_bit_pos < 8)) {
+      out_cursor--;
     }
 
+    start_bit_pos = next_bit_pos % 8;
     bits_processed += bitmap->size_bits;
   }
 

From 0d529e86a54773eee7735ee41ef8ee85dcc69639 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Thu, 24 Aug 2023 22:12:40 -0400
Subject: [PATCH 089/126] fixed bounds issues

---
 pandas/_libs/src/bitmask_algorithms.c | 32 +++++++++++++++------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
index ca286da4ebd17..3ce52d54bc6ba 100644
--- a/pandas/_libs/src/bitmask_algorithms.c
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -20,31 +20,35 @@ void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
     if (nbytes == 0) {
       continue;
     }
-    const size_t trailing_nbits = bitmap->size_bits % 8;
+    const size_t bitmap_rem = bitmap->size_bits % 8;
 
     // As we loop through each array, any time we end up starting
     // on a word boundary we can simply use memcpy. If we are not
     // so lucky we fall back to bit shifting each element
     if (start_bit_pos == 0) {
-      memcpy(out_cursor, bitmap->buffer.data, nbytes);
+      const size_t index = bits_processed / 8;
+      memcpy(&out_cursor[index], bitmap->buffer.data, nbytes);
+      bits_processed += bitmap->size_bits;
     } else {
       for (int64_t j = 0; j < nbytes; j++) {
         const uint8_t lshifted = bitmap->buffer.data[j] << start_bit_pos;
-        out_cursor[j] = (out_cursor[j] & clear_mask[start_bit_pos]) | lshifted;
-
-        const uint8_t rshifted = bitmap->buffer.data[j] >> (8 - start_bit_pos);
-        out_cursor[j + 1] = rshifted;
+        const size_t index = bits_processed / 8;
+        out_cursor[index] = (out_cursor[index] & clear_mask[start_bit_pos]) | lshifted;
+
+        if (index < out->buffer.capacity_bytes - 1) {
+          const uint8_t rshifted = bitmap->buffer.data[j] >> (8 - start_bit_pos);
+          out_cursor[index + 1] = rshifted;
+        }
+
+        if ((j == nbytes - 1) && (bitmap_rem > 0)){
+          bits_processed += bitmap_rem;
+        } else {
+          bits_processed += 8;
+        }
       }
     }
 
-    out_cursor += nbytes;
-    const int64_t next_bit_pos = start_bit_pos + trailing_nbits;
-    if ((next_bit_pos > 0) && (next_bit_pos < 8)) {
-      out_cursor--;
-    }
-
-    start_bit_pos = next_bit_pos % 8;
-    bits_processed += bitmap->size_bits;
+    start_bit_pos = (start_bit_pos + bitmap_rem) % 8;
   }
 
   out->size_bits = bits_processed;

From e80e70913f58df4993ecea53f5ecb7de4a175da1 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Thu, 24 Aug 2023 22:18:42 -0400
Subject: [PATCH 090/126] Revert "fixed bounds issues"

This reverts commit 0d529e86a54773eee7735ee41ef8ee85dcc69639.
---
 pandas/_libs/src/bitmask_algorithms.c | 32 ++++++++++++---------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
index 3ce52d54bc6ba..ca286da4ebd17 100644
--- a/pandas/_libs/src/bitmask_algorithms.c
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -20,35 +20,31 @@ void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
     if (nbytes == 0) {
       continue;
     }
-    const size_t bitmap_rem = bitmap->size_bits % 8;
+    const size_t trailing_nbits = bitmap->size_bits % 8;
 
     // As we loop through each array, any time we end up starting
     // on a word boundary we can simply use memcpy. If we are not
     // so lucky we fall back to bit shifting each element
     if (start_bit_pos == 0) {
-      const size_t index = bits_processed / 8;
-      memcpy(&out_cursor[index], bitmap->buffer.data, nbytes);
-      bits_processed += bitmap->size_bits;
+      memcpy(out_cursor, bitmap->buffer.data, nbytes);
     } else {
       for (int64_t j = 0; j < nbytes; j++) {
         const uint8_t lshifted = bitmap->buffer.data[j] << start_bit_pos;
-        const size_t index = bits_processed / 8;
-        out_cursor[index] = (out_cursor[index] & clear_mask[start_bit_pos]) | lshifted;
-
-        if (index < out->buffer.capacity_bytes - 1) {
-          const uint8_t rshifted = bitmap->buffer.data[j] >> (8 - start_bit_pos);
-          out_cursor[index + 1] = rshifted;
-        }
-
-        if ((j == nbytes - 1) && (bitmap_rem > 0)){
-          bits_processed += bitmap_rem;
-        } else {
-          bits_processed += 8;
-        }
+        out_cursor[j] = (out_cursor[j] & clear_mask[start_bit_pos]) | lshifted;
+
+        const uint8_t rshifted = bitmap->buffer.data[j] >> (8 - start_bit_pos);
+        out_cursor[j + 1] = rshifted;
       }
     }
 
-    start_bit_pos = (start_bit_pos + bitmap_rem) % 8;
+    out_cursor += nbytes;
+    const int64_t next_bit_pos = start_bit_pos + trailing_nbits;
+    if ((next_bit_pos > 0) && (next_bit_pos < 8)) {
+      out_cursor--;
+    }
+
+    start_bit_pos = next_bit_pos % 8;
+    bits_processed += bitmap->size_bits;
   }
 
   out->size_bits = bits_processed;

From 8689c993e1b4cc51b1c86853eb119de8a2912946 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Thu, 24 Aug 2023 22:24:33 -0400
Subject: [PATCH 091/126] faster impl

---
 pandas/_libs/src/bitmask_algorithms.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
index ca286da4ebd17..8c15cf823ded6 100644
--- a/pandas/_libs/src/bitmask_algorithms.c
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -32,8 +32,10 @@ void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
         const uint8_t lshifted = bitmap->buffer.data[j] << start_bit_pos;
         out_cursor[j] = (out_cursor[j] & clear_mask[start_bit_pos]) | lshifted;
 
-        const uint8_t rshifted = bitmap->buffer.data[j] >> (8 - start_bit_pos);
-        out_cursor[j + 1] = rshifted;
+        if (out_cursor - out->buffer.data < out->buffer.capacity_bytes - 1) {
+          const uint8_t rshifted = bitmap->buffer.data[j] >> (8 - start_bit_pos);
+          out_cursor[j + 1] = rshifted;
+        }
       }
     }
 

From 4ed187598e8dc577e22f150e7daa72f399806606 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Thu, 24 Aug 2023 22:42:58 -0400
Subject: [PATCH 092/126] move condition out of loop

---
 pandas/_libs/src/bitmask_algorithms.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
index 8c15cf823ded6..e74f7a2aa6ab3 100644
--- a/pandas/_libs/src/bitmask_algorithms.c
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -28,14 +28,24 @@ void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
     if (start_bit_pos == 0) {
       memcpy(out_cursor, bitmap->buffer.data, nbytes);
     } else {
-      for (int64_t j = 0; j < nbytes; j++) {
+      for (int64_t j = 0; j < nbytes - 1; j++) {
         const uint8_t lshifted = bitmap->buffer.data[j] << start_bit_pos;
         out_cursor[j] = (out_cursor[j] & clear_mask[start_bit_pos]) | lshifted;
 
-        if (out_cursor - out->buffer.data < out->buffer.capacity_bytes - 1) {
-          const uint8_t rshifted = bitmap->buffer.data[j] >> (8 - start_bit_pos);
-          out_cursor[j + 1] = rshifted;
-        }
+        const uint8_t rshifted = bitmap->buffer.data[j] >> (8 - start_bit_pos);
+        out_cursor[j + 1] = rshifted;
+      }
+
+      // last byte can overrun - check outside loop for performance
+      const size_t index = nbytes - 1;
+      const uint8_t lshifted = bitmap->buffer.data[index] << start_bit_pos;
+      out_cursor[index] =
+          (out_cursor[index] & clear_mask[start_bit_pos]) | lshifted;
+
+      if (out_cursor - out->buffer.data < out->buffer.capacity_bytes - 1) {
+        const uint8_t rshifted =
+            bitmap->buffer.data[index] >> (8 - start_bit_pos);
+        out_cursor[index + 1] = rshifted;
       }
     }
 

From 24c381457dd0180518b17888e3c53afed4253d61 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 25 Aug 2023 08:40:08 -0400
Subject: [PATCH 093/126] memory benchmark

---
 asv_bench/benchmarks/array.py | 11 +++++++++++
 pandas/_libs/arrays.pyx       | 16 ++++++++--------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/asv_bench/benchmarks/array.py b/asv_bench/benchmarks/array.py
index 09c4acc0ab309..e545fcab513f0 100644
--- a/asv_bench/benchmarks/array.py
+++ b/asv_bench/benchmarks/array.py
@@ -28,6 +28,17 @@ def time_from_float_array(self):
         pd.array(self.values_float, dtype="boolean")
 
 
+class BooleanArrayMem:
+    def setup_cache(self):
+        N = 250_000
+        data = np.array([True] * N)
+        mask = np.array([False] * N)
+        return [pd.arrays.BooleanArray(data, mask)] * 500
+
+    def peakmem_array(self, arrays):
+        return [~x for x in arrays]
+
+
 class IntegerArray:
     def setup(self):
         N = 250_000
diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 274061f6c1929..35e1c59d92631 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -6,10 +6,10 @@ cimport cython
 import numpy as np
 
 cimport numpy as cnp
-from cpython cimport PyErr_Clear
-from libc.stdlib cimport (
-    free,
-    malloc,
+from cpython cimport (
+    PyErr_Clear,
+    PyMem_Free,
+    PyMem_Malloc,
 )
 from libc.string cimport memcpy
 from numpy cimport (
@@ -293,7 +293,7 @@ cdef class BitMaskArray:
         # TODO: this leaks a bit into the internals of the nanoarrow bitmap
         # We may want to upstream a BitmapCopy function instead
         ArrowBitmapInit(&bitmap)
-        buf = <uint8_t*>malloc(old_bma.bitmap.buffer.size_bytes)
+        buf = <uint8_t*>PyMem_Malloc(old_bma.bitmap.buffer.size_bytes)
         memcpy(buf, old_bma.bitmap.buffer.data, old_bma.bitmap.buffer.size_bytes)
         bitmap.buffer.size_bytes = old_bma.bitmap.buffer.size_bytes
         bitmap.buffer.capacity_bytes = old_bma.bitmap.buffer.capacity_bytes
@@ -326,7 +326,7 @@ cdef class BitMaskArray:
                         "BitMaskArray.concatenate does not support broadcasting"
                     )
 
-        cdef ArrowBitmap** bitmaps = <ArrowBitmap**>malloc(
+        cdef ArrowBitmap** bitmaps = <ArrowBitmap**>PyMem_Malloc(
             sizeof(ArrowBitmap*) * nbitmaps
         )
         for i in range(nbitmaps):
@@ -341,7 +341,7 @@ cdef class BitMaskArray:
         ArrowBitmapInit(&bitmap)
         ArrowBitmapReserve(&bitmap, total_bits)
         ConcatenateBitmapData(bitmaps, nbitmaps, &bitmap)
-        free(bitmaps)
+        PyMem_Free(bitmaps)
 
         bma.bitmap = bitmap
         if second_dim != 0:
@@ -560,7 +560,7 @@ cdef class BitMaskArray:
         else:
             ArrowBitmapInit(&bitmap)
 
-            buf = <uint8_t*>malloc(nbytes)
+            buf = <uint8_t*>PyMem_Malloc(nbytes)
             data = state["bitmap_data"]
             for i in range(nbytes):
                 buf[i] = data[i]

From 5ad89648de26418d7c02728ca5015ef4f6d09107 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 25 Aug 2023 09:34:31 -0400
Subject: [PATCH 094/126] use c standard malloc/free

---
 pandas/_libs/arrays.pyx | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 35e1c59d92631..274061f6c1929 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -6,10 +6,10 @@ cimport cython
 import numpy as np
 
 cimport numpy as cnp
-from cpython cimport (
-    PyErr_Clear,
-    PyMem_Free,
-    PyMem_Malloc,
+from cpython cimport PyErr_Clear
+from libc.stdlib cimport (
+    free,
+    malloc,
 )
 from libc.string cimport memcpy
 from numpy cimport (
@@ -293,7 +293,7 @@ cdef class BitMaskArray:
         # TODO: this leaks a bit into the internals of the nanoarrow bitmap
         # We may want to upstream a BitmapCopy function instead
         ArrowBitmapInit(&bitmap)
-        buf = <uint8_t*>PyMem_Malloc(old_bma.bitmap.buffer.size_bytes)
+        buf = <uint8_t*>malloc(old_bma.bitmap.buffer.size_bytes)
         memcpy(buf, old_bma.bitmap.buffer.data, old_bma.bitmap.buffer.size_bytes)
         bitmap.buffer.size_bytes = old_bma.bitmap.buffer.size_bytes
         bitmap.buffer.capacity_bytes = old_bma.bitmap.buffer.capacity_bytes
@@ -326,7 +326,7 @@ cdef class BitMaskArray:
                         "BitMaskArray.concatenate does not support broadcasting"
                     )
 
-        cdef ArrowBitmap** bitmaps = <ArrowBitmap**>PyMem_Malloc(
+        cdef ArrowBitmap** bitmaps = <ArrowBitmap**>malloc(
             sizeof(ArrowBitmap*) * nbitmaps
         )
         for i in range(nbitmaps):
@@ -341,7 +341,7 @@ cdef class BitMaskArray:
         ArrowBitmapInit(&bitmap)
         ArrowBitmapReserve(&bitmap, total_bits)
         ConcatenateBitmapData(bitmaps, nbitmaps, &bitmap)
-        PyMem_Free(bitmaps)
+        free(bitmaps)
 
         bma.bitmap = bitmap
         if second_dim != 0:
@@ -560,7 +560,7 @@ cdef class BitMaskArray:
         else:
             ArrowBitmapInit(&bitmap)
 
-            buf = <uint8_t*>PyMem_Malloc(nbytes)
+            buf = <uint8_t*>malloc(nbytes)
             data = state["bitmap_data"]
             for i in range(nbytes):
                 buf[i] = data[i]

From b64ba05d76ccac18643d14399988eaedfda42ab7 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 25 Aug 2023 09:54:30 -0400
Subject: [PATCH 095/126] added repr for bitmaskarray

---
 pandas/_libs/arrays.pyx | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 274061f6c1929..9ea8a28aea9f5 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -308,6 +308,24 @@ cdef class BitMaskArray:
     def __len__(self):
         return self.bitmap.size_bits
 
+    def __repr__(self):
+        cdef Py_ssize_t i, nbytes = self.bitmap.buffer.size_bytes
+        arr_bytes = bytearray(nbytes)
+        for i in range(nbytes):
+            arr_bytes[i] = self.bitmap.buffer.data[i]
+
+        if self.parent:
+            par = object.__repr__(self.parent)
+        else:
+            par = None
+
+        shape = self.array_shape
+        data = repr(arr_bytes)
+
+        return (
+            f"{object.__repr__(self)}\nparent: {par}\nshape: {shape}\ndata: {data}\n"
+        )
+
     @cython.wraparound(False)
     @cython.boundscheck(False)
     @staticmethod

From a51dfe9ba6b8cb27babf1c84c0d9dca7ac052dbc Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 25 Aug 2023 12:31:36 -0400
Subject: [PATCH 096/126] more tests and better repr

---
 pandas/_libs/arrays.pyi                    |   2 +
 pandas/_libs/arrays.pyx                    |  23 +-
 pandas/tests/arrays/masked/test_bitmask.py | 421 +++++++++++++++++++++
 3 files changed, 440 insertions(+), 6 deletions(-)
 create mode 100644 pandas/tests/arrays/masked/test_bitmask.py

diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi
index 55ff7273685df..643324bde838e 100644
--- a/pandas/_libs/arrays.pyi
+++ b/pandas/_libs/arrays.pyi
@@ -61,6 +61,8 @@ class BitMaskArray:
     @property
     def nbytes(self) -> int: ...
     @property
+    def bytes(self) -> bytes: ...
+    @property
     def shape(self) -> tuple[int, ...]: ...
     @property
     def dtype(self) -> type_t[bool]: ...
diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 9ea8a28aea9f5..cd348bdebee2d 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -309,18 +309,13 @@ cdef class BitMaskArray:
         return self.bitmap.size_bits
 
     def __repr__(self):
-        cdef Py_ssize_t i, nbytes = self.bitmap.buffer.size_bytes
-        arr_bytes = bytearray(nbytes)
-        for i in range(nbytes):
-            arr_bytes[i] = self.bitmap.buffer.data[i]
-
         if self.parent:
             par = object.__repr__(self.parent)
         else:
             par = None
 
         shape = self.array_shape
-        data = repr(arr_bytes)
+        data = self.bytes
 
         return (
             f"{object.__repr__(self)}\nparent: {par}\nshape: {shape}\ndata: {data}\n"
@@ -437,6 +432,10 @@ cdef class BitMaskArray:
         return self.to_numpy()[key]
 
     def __invert__(self):
+        # note that this inverts the entire byte, even if the
+        # bitmap only uses a few of the bits within that byte
+        # the remaining bits of the byte are of undefined value
+        # so be sure to only check bytes we need
         cdef BitMaskArray self_ = self
         cdef BitMaskArray bma = BitMaskArray.__new__(BitMaskArray)
         cdef ArrowBitmap bitmap
@@ -459,6 +458,7 @@ cdef class BitMaskArray:
         cdef ArrowBitmap bitmap
 
         if isinstance(other, type(self)):
+            # TODO: maybe should return Self here instead of ndarray
             other_bma = other
             if self_.bitmap.size_bits == 0:
                 return np.empty(dtype=bool).reshape(self.array_shape)
@@ -487,6 +487,7 @@ cdef class BitMaskArray:
         cdef ArrowBitmap bitmap
 
         if isinstance(other, type(self)):
+            # TODO: maybe should return Self here instead of ndarray
             other_bma = other
             if self_.bitmap.size_bits == 0:
                 return np.empty(dtype=bool).reshape(self.array_shape)
@@ -515,6 +516,7 @@ cdef class BitMaskArray:
         cdef ArrowBitmap bitmap
 
         if isinstance(other, type(self)):
+            # TODO: maybe should return Self here instead of ndarray
             other_bma = other
             if self_.bitmap.size_bits == 0:
                 return np.empty(dtype=bool).reshape(self.array_shape)
@@ -605,6 +607,15 @@ cdef class BitMaskArray:
     def nbytes(self) -> int:
         return self.bitmap.buffer.size_bytes
 
+    @property
+    def bytes(self):
+        cdef Py_ssize_t i, nbytes = self.bitmap.buffer.size_bytes
+        arr_bytes = bytearray(nbytes)
+        for i in range(nbytes):
+            arr_bytes[i] = self.bitmap.buffer.data[i]
+
+        return bytes(arr_bytes)
+
     @property
     def shape(self):
         """Strictly for NumPy compat in mask_ops"""
diff --git a/pandas/tests/arrays/masked/test_bitmask.py b/pandas/tests/arrays/masked/test_bitmask.py
new file mode 100644
index 0000000000000..1e1258984edaf
--- /dev/null
+++ b/pandas/tests/arrays/masked/test_bitmask.py
@@ -0,0 +1,421 @@
+import itertools
+import pickle
+
+import numpy as np
+import pytest
+
+from pandas._libs.arrays import BitMaskArray
+
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize(
+    "array,expected",
+    [
+        (np.array([False, False]), bytes([0x0])),
+        (np.array([True, False]), bytes([0x1])),
+        (np.array([False, True]), bytes([0x2])),
+        (np.array([True, True]), bytes([0x3])),
+        (np.array([True, False] * 8), bytes([0x55, 0x55])),
+    ],
+)
+def test_constructor_ndarray(array, expected):
+    bma = BitMaskArray(array)
+    assert bma.bytes == expected
+    assert not bma.parent
+    assert bma.array_shape == array.shape
+
+
+@pytest.mark.parametrize(
+    "parent,expected",
+    [
+        (BitMaskArray(np.array([False, False])), bytes([0x0])),
+        (BitMaskArray(np.array([True, False])), bytes([0x1])),
+        (BitMaskArray(np.array([False, True])), bytes([0x2])),
+        (BitMaskArray(np.array([True, True])), bytes([0x3])),
+        (BitMaskArray(np.array([True, False] * 8)), bytes([0x55, 0x55])),
+    ],
+)
+def test_constructor_bitmap(parent, expected):
+    bma = BitMaskArray(parent)
+    assert bma.bytes == expected
+    assert bma.parent is parent
+    assert bma.array_shape == parent.shape
+
+
+def test_len():
+    bma = BitMaskArray(np.array([True, False, False]))
+    assert len(bma) == 3
+
+
+def test_repr_no_parent():
+    bma = BitMaskArray(np.array([True, False, False]))
+    result = repr(bma)
+    assert "parent: None" in result
+    assert "shape: (3,)" in result
+    assert "data: b'\\x01'" in result
+
+
+def test_repr_parent():
+    parent = BitMaskArray(np.array([False, False, True]))
+    bma = BitMaskArray(parent)
+    result = repr(bma)
+    parent_id = hex(id(parent))
+    assert f"parent: <pandas._libs.arrays.BitMaskArray object at {parent_id}" in result
+    assert "shape: (3,)" in result
+    assert "data: b'\\x04'" in result
+
+
+@pytest.mark.parametrize(
+    "input_data",
+    [
+        pytest.param([[True]], id="identity_case"),
+        pytest.param([[True], [True]], id="base_case"),
+        pytest.param([[True], [False]], id="base_case_2"),
+        pytest.param([[True], [False] * 7], id="single_byte_boundary_end"),
+        pytest.param([[True], [False] * 8], id="multi_byte_non_boundary"),
+        pytest.param(
+            [[True] * 4, [False] * 4, [True] * 6, [False] * 2],
+            id="multi_byte_boundary_end",
+        ),
+    ],
+)
+def test_concatenate(input_data):
+    masks = [BitMaskArray(np.array(x)) for x in input_data]
+
+    result = BitMaskArray.concatenate(masks, axis=0)
+    expected = BitMaskArray(np.array(list(itertools.chain.from_iterable(input_data))))
+
+    assert result.bytes == expected.bytes
+
+
+def test_concatenate_raises_not_axis0():
+    with pytest.raises(NotImplementedError, match="only implemented for axis=0"):
+        BitMaskArray.concatenate([], axis=1)
+
+
+@pytest.mark.parametrize(
+    "indexer,expected",
+    [
+        (0, True),
+        (1, False),
+    ],
+)
+def test_getitem_scalar(indexer, expected):
+    bma = BitMaskArray(np.array([True, False, True]))
+    result = bma[indexer]
+
+    assert result == expected
+
+
+def test_getitem_null_slice():
+    bma = BitMaskArray(np.array([True, False, True]))
+    result = bma[:]
+
+    assert result.array_shape == bma.array_shape
+    assert not result.parent
+
+    assert result.bytes[0] & 1 == 1
+    assert (result.bytes[0] >> 1) & 1 == 0
+    assert (result.bytes[0] >> 2) & 1 == 1
+
+
+@pytest.mark.parametrize(
+    "indexer,expected",
+    [
+        ([0, 1], np.array([True, False])),
+        (np.array([2, 1]), np.array([True, False])),
+        (slice(1, 2), np.array([False])),
+    ],
+)
+def test_getitem_numpy_fallback(indexer, expected):
+    bma = BitMaskArray(np.array([True, False, True]))
+    result = bma[indexer]
+
+    tm.assert_numpy_array_equal(result, expected)
+
+
+def test_setitem_scalar():
+    bma = BitMaskArray(np.array([True, False, True]))
+
+    bma[0] = False
+    assert not bma[0]
+
+    bma[:] = True
+    assert bma[0] and bma[1] and bma[2]
+
+    bma[np.array([False, False, True])] = False
+    assert bma[0] and bma[1] and not bma[2]
+
+    bma[[False, True, False]] = False
+    assert bma[0] and not bma[1] and not bma[2]
+
+
+def test_setitem_array():
+    bma = BitMaskArray(np.array([True, False, True]))
+
+    bma[:] = [False, True, False]
+    assert not bma[0] and bma[1] and not bma[2]
+
+    bma[:] = np.array([True, False, True])
+    assert bma[0] and not bma[1] and bma[2]
+
+
+def test_invert():
+    result1 = ~BitMaskArray(np.array([True, False]))
+    assert (result1.bytes[0] & 0x1) == 0
+    assert ((result1.bytes[0] >> 1) & 0x1) == 1
+
+    result2 = ~BitMaskArray(np.array([False, True]))
+    assert (result2.bytes[0] & 0x1) == 1
+    assert ((result2.bytes[0] >> 1) & 0x1) == 0
+
+
+@pytest.mark.parametrize("rhs_as_bitmask", [True, False])
+@pytest.mark.parametrize(
+    "lhs,rhs,expected",
+    [
+        ([True], [True], [True]),
+        ([True], [False], [False]),
+        ([False], [False], [False]),
+        ([True] * 10, [True] * 10, [True] * 10),
+        ([False] * 10, [True] * 10, [False] * 10),
+    ],
+)
+def test_and(rhs_as_bitmask, lhs, rhs, expected):
+    bma1 = BitMaskArray(np.array(lhs))
+
+    if rhs_as_bitmask:
+        bma2 = BitMaskArray(np.array(rhs))
+    else:
+        bma2 = np.array(rhs)
+
+    expected = np.array(expected)
+    result = bma1 & bma2
+    assert (result == expected).all()
+
+
+@pytest.mark.parametrize("rhs_as_bitmask", [True, False])
+@pytest.mark.parametrize(
+    "lhs,rhs,expected",
+    [
+        ([True], [True], [True]),
+        ([True], [False], [True]),
+        ([False], [False], [False]),
+        ([True] * 10, [True] * 10, [True] * 10),
+        ([False] * 10, [True] * 10, [True] * 10),
+    ],
+)
+def test_or(rhs_as_bitmask, lhs, rhs, expected):
+    bma1 = BitMaskArray(np.array(lhs))
+
+    if rhs_as_bitmask:
+        bma2 = BitMaskArray(np.array(rhs))
+    else:
+        bma2 = np.array(rhs)
+
+    expected = np.array(expected)
+    result = bma1 | bma2
+    assert (result == expected).all()
+
+
+@pytest.mark.parametrize("rhs_as_bitmask", [True, False])
+@pytest.mark.parametrize(
+    "lhs,rhs,expected",
+    [
+        ([True], [True], [False]),
+        ([True], [False], [True]),
+        ([False], [False], [False]),
+        ([True] * 10, [True] * 10, [False] * 10),
+        ([False] * 10, [True] * 10, [True] * 10),
+    ],
+)
+def test_xor(rhs_as_bitmask, lhs, rhs, expected):
+    bma1 = BitMaskArray(np.array(lhs))
+
+    if rhs_as_bitmask:
+        bma2 = BitMaskArray(np.array(rhs))
+    else:
+        bma2 = np.array(rhs)
+
+    expected = np.array(expected)
+    result = bma1 ^ bma2
+    assert (result == expected).all()
+
+
+def test_pickle():
+    parent = BitMaskArray(np.array([True, False, True]))
+    child = BitMaskArray(parent)
+
+    result_child = pickle.loads(pickle.dumps(child))
+
+    assert result_child.shape == child.shape
+    assert result_child.bytes == child.bytes
+
+    assert result_child.parent.shape == parent.shape
+    assert result_child.parent.bytes == parent.bytes
+    assert not result_child.parent.parent
+
+
+def test_iter():
+    bma = BitMaskArray(np.array([True, False, True]))
+    itr = iter(bma)
+
+    assert next(itr) is True
+    assert next(itr) is False
+    assert next(itr) is True
+
+    with pytest.raises(StopIteration, match=""):
+        next(itr)
+
+
+@pytest.mark.parametrize(
+    "data,expected",
+    [
+        (np.array([], dtype=bool), 0),
+        (np.array([True, False, True]), 3),
+        (np.array([True] * 8), 8),
+        (np.array([True] * 8 + [False]), 9),
+    ],
+)
+def test_size(data, expected):
+    bma = BitMaskArray(data)
+    result = bma.size
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "data,expected",
+    [
+        (np.array([], dtype=bool), 0),
+        (np.array([True, False, True]), 1),
+        (np.array([True] * 8), 1),
+        (np.array([True] * 8 + [False]), 2),
+    ],
+)
+def test_nbytes(data, expected):
+    bma = BitMaskArray(data)
+    result = bma.nbytes
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        np.array([True, False]),
+        np.array([True, False]).reshape(2, -1),
+        np.array([True, False]).reshape(-1, 2),
+    ],
+)
+def test_shape(data):
+    bma = BitMaskArray(data)
+    assert bma.array_shape == data.shape
+
+
+@pytest.mark.parametrize(
+    "data,expected",
+    [
+        (np.array([], dtype=bool), False),
+        (np.array([True]), True),
+        (np.array([False]), False),
+        (np.array([True] * 8 + [False]), True),
+    ],
+)
+def test_any(data, expected):
+    bma = BitMaskArray(data)
+    assert bma.any() == expected
+
+
+@pytest.mark.parametrize(
+    "data,expected",
+    [
+        (np.array([], dtype=bool), True),
+        (np.array([True]), True),
+        (np.array([False]), False),
+        (np.array([True] * 8 + [False]), False),
+    ],
+)
+def test_all(data, expected):
+    bma = BitMaskArray(data)
+    assert bma.all() == expected
+
+
+@pytest.mark.parametrize(
+    "data,expected",
+    [
+        (np.array([], dtype=bool), 0),
+        (np.array([True]), 1),
+        (np.array([False]), 0),
+        (np.array([True] * 8 + [False]), 8),
+    ],
+)
+def test_sum(data, expected):
+    bma = BitMaskArray(data)
+    assert bma.sum() == expected
+
+
+def test_take1d():
+    bma = BitMaskArray(np.array([True, False, True, False]))
+
+    result1 = bma.take_1d(np.array([0]), axis=0)
+    assert (result1.bytes[0] & 0x1) == 1
+
+    result2 = bma.take_1d(np.array([1]), axis=0)
+    assert (result2.bytes[0] & 0x1) == 0
+
+    result3 = bma.take_1d(np.array([0, 1]), axis=0)
+    assert (result3.bytes[0] & 0x1) == 1
+    assert ((result3.bytes[0] >> 1) & 0x1) == 0
+
+    result4 = bma.take_1d(np.array([0, 0]), axis=0)
+    assert (result4.bytes[0] & 0x1) == 1
+    assert ((result4.bytes[0] >> 1) & 0x1) == 1
+
+    result5 = bma.take_1d(np.array([3, 2, 1, 0]), axis=0)
+    assert (result5.bytes[0] & 0x1) == 0
+    assert ((result5.bytes[0] >> 1) & 0x1) == 1
+    assert ((result5.bytes[0] >> 2) & 0x1) == 0
+    assert ((result5.bytes[0] >> 3) & 0x1) == 1
+
+
+def test_take1d_raises_not_axis0():
+    bma = BitMaskArray(np.array([True, False, True]))
+    with pytest.raises(NotImplementedError, match="only implemented for axis=0"):
+        bma.take_1d(np.array([1]), axis=1)
+
+
+def test_take_1d_raises_empty_indices():
+    bma = BitMaskArray(np.array([True, False, True]))
+    with pytest.raises(NotImplementedError, match="does not support empty takes"):
+        bma.take_1d(np.array([], dtype="int64"), axis=0)
+
+
+def test_take_1d_raises_negative_indices():
+    bma = BitMaskArray(np.array([True, False, True]))
+    with pytest.raises(NotImplementedError, match="does not support negative indexing"):
+        bma.take_1d(np.array([-1], dtype="int64"), axis=0)
+
+
+def test_copy():
+    old_bma = BitMaskArray(np.array([True, False, True, False]))
+    bma = old_bma.copy()
+
+    assert bma.bytes == old_bma.bytes
+    assert bma.shape == old_bma.shape
+    assert not bma.parent
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        np.array([], dtype=bool),
+        np.array([True] * 100, dtype=bool),
+        np.array([[True, False], [True, False], [True, True], [False, False]]),
+        np.array([[True, False, True, False], [True, True, False, False]]),
+    ],
+)
+def test_to_numpy(data):
+    bma = BitMaskArray(data)
+
+    result = bma.to_numpy()
+    tm.assert_numpy_array_equal(result, data)

From 34d4ffcf3798930f49757891b41886b8dbe6b3d3 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 25 Aug 2023 12:34:31 -0400
Subject: [PATCH 097/126] BitMask -> bitmask

---
 pandas/_libs/arrays.pyi                       |  2 +-
 pandas/_libs/arrays.pyx                       | 62 ++++++-------
 pandas/core/algorithms.py                     |  2 +-
 pandas/core/array_algos/masked_reductions.py  | 22 ++---
 pandas/core/arrays/boolean.py                 |  6 +-
 pandas/core/arrays/masked.py                  | 24 ++---
 pandas/core/nanops.py                         |  4 +-
 pandas/core/ops/mask_ops.py                   | 26 +++---
 .../arrays/floating/test_construction.py      |  2 +-
 .../tests/arrays/integer/test_construction.py |  2 +-
 pandas/tests/arrays/masked/test_bitmask.py    | 90 +++++++++----------
 11 files changed, 121 insertions(+), 121 deletions(-)

diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi
index 643324bde838e..d890a59218235 100644
--- a/pandas/_libs/arrays.pyi
+++ b/pandas/_libs/arrays.pyi
@@ -42,7 +42,7 @@ class NDArrayBacked:
         cls, to_concat: Sequence[Self], axis: AxisInt = ...
     ) -> Self: ...
 
-class BitMaskArray:
+class BitmaskArray:
     parent: Self
     def __init__(self, data: np.ndarray | Self) -> None: ...
     def __setitem__(self, key: PositionalIndexer, value: ArrayLike | bool) -> None: ...
diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index cd348bdebee2d..9c0186e4c221e 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -236,7 +236,7 @@ cdef class NDArrayBacked:
         return to_concat[0]._from_backing_data(new_arr)
 
 
-cdef class BitMaskArray:
+cdef class BitmaskArray:
     cdef:
         ArrowBitmap bitmap
         bint buffer_owner  # set when parent is None, but gives C-level access
@@ -257,7 +257,7 @@ cdef class BitMaskArray:
         self.buffer_owner = True
         self.bitmap = bitmap
 
-    cdef void init_from_bitmaskarray(self, BitMaskArray bma):
+    cdef void init_from_bitmaskarray(self, BitmaskArray bma):
         self.buffer_owner = False
         self.bitmap = bma.bitmap
 
@@ -274,20 +274,20 @@ cdef class BitMaskArray:
             self.array_shape = data.array_shape
             self.parent = data
         else:
-            raise TypeError("Unsupported argument to BitMaskArray constructor")
+            raise TypeError("Unsupported argument to BitmaskArray constructor")
 
     def __dealloc__(self):
         if self.buffer_owner:
             ArrowBitmapReset(&self.bitmap)
 
     @staticmethod
-    cdef BitMaskArray copy_from_bitmaskarray(BitMaskArray old_bma):
+    cdef BitmaskArray copy_from_bitmaskarray(BitmaskArray old_bma):
         """
-        Constructs a new BitMaskArray from a bitmap pointer. Copies data
+        Constructs a new BitmaskArray from a bitmap pointer. Copies data
         and manages the subsequenty lifecycle of the bitmap.
         """
         # Bypass __init__ calls
-        cdef BitMaskArray bma = BitMaskArray.__new__(BitMaskArray)
+        cdef BitmaskArray bma = BitmaskArray.__new__(BitmaskArray)
         cdef uint8_t* buf
         cdef ArrowBitmap bitmap
         # TODO: this leaks a bit into the internals of the nanoarrow bitmap
@@ -324,10 +324,10 @@ cdef class BitMaskArray:
     @cython.wraparound(False)
     @cython.boundscheck(False)
     @staticmethod
-    cdef BitMaskArray c_concatenate(list objs):
+    cdef BitmaskArray c_concatenate(list objs):
         cdef Py_ssize_t i
         cdef int64_t total_bits = 0
-        cdef BitMaskArray current_bma
+        cdef BitmaskArray current_bma
         cdef Py_ssize_t nbitmaps = len(objs)
 
         cdef Py_ssize_t second_dim = 0
@@ -336,19 +336,19 @@ cdef class BitMaskArray:
             for obj in objs:
                 if not obj.array_shape[1] == second_dim:
                     raise NotImplementedError(
-                        "BitMaskArray.concatenate does not support broadcasting"
+                        "BitmaskArray.concatenate does not support broadcasting"
                     )
 
         cdef ArrowBitmap** bitmaps = <ArrowBitmap**>malloc(
             sizeof(ArrowBitmap*) * nbitmaps
         )
         for i in range(nbitmaps):
-            current_bma = <BitMaskArray?>objs[i]
+            current_bma = <BitmaskArray?>objs[i]
             total_bits += current_bma.bitmap.size_bits
             bitmaps[i] = &current_bma.bitmap
 
         # Bypass __init__ calls
-        cdef BitMaskArray bma = BitMaskArray.__new__(BitMaskArray)
+        cdef BitmaskArray bma = BitmaskArray.__new__(BitmaskArray)
         cdef ArrowBitmap bitmap
 
         ArrowBitmapInit(&bitmap)
@@ -370,10 +370,10 @@ cdef class BitMaskArray:
     def concatenate(cls, objs, axis):
         if axis != 0:
             raise NotImplementedError(
-                "BitMaskArray.concatenate only implemented for axis=0"
+                "BitmaskArray.concatenate only implemented for axis=0"
             )
 
-        return BitMaskArray.c_concatenate(objs)
+        return BitmaskArray.c_concatenate(objs)
 
     def __setitem__(self, key, value):
         cdef const uint8_t[:] keymask
@@ -381,7 +381,7 @@ cdef class BitMaskArray:
         cdef Py_ssize_t i = 0
         cdef Py_ssize_t ckey
         cdef bint cvalue
-        cdef BitMaskArray self_ = self
+        cdef BitmaskArray self_ = self
 
         if isinstance(key, int):
             ckey = key
@@ -411,7 +411,7 @@ cdef class BitMaskArray:
                     keymask.shape[0],
                     value
             ) != 0:
-                raise ValueError("BitMaskArray.__setitem__ failed!")
+                raise ValueError("BitmaskArray.__setitem__ failed!")
         else:
             arr = self.to_numpy()
             arr[key] = value
@@ -436,8 +436,8 @@ cdef class BitMaskArray:
         # bitmap only uses a few of the bits within that byte
         # the remaining bits of the byte are of undefined value
         # so be sure to only check bytes we need
-        cdef BitMaskArray self_ = self
-        cdef BitMaskArray bma = BitMaskArray.__new__(BitMaskArray)
+        cdef BitmaskArray self_ = self
+        cdef BitmaskArray bma = BitmaskArray.__new__(BitmaskArray)
         cdef ArrowBitmap bitmap
 
         ArrowBitmapInit(&bitmap)
@@ -454,7 +454,7 @@ cdef class BitMaskArray:
 
     def __and__(self, other):
         cdef ndarray[uint8_t] result
-        cdef BitMaskArray other_bma, self_ = self  # self_ required for Cython < 3
+        cdef BitmaskArray other_bma, self_ = self  # self_ required for Cython < 3
         cdef ArrowBitmap bitmap
 
         if isinstance(other, type(self)):
@@ -471,7 +471,7 @@ cdef class BitMaskArray:
             BitmapAnd(&self_.bitmap, &other_bma.bitmap, &bitmap)
 
             result = np.empty(self_.bitmap.size_bits, dtype=bool)
-            BitMaskArray.buffer_to_array_1d(
+            BitmaskArray.buffer_to_array_1d(
                 result,
                 bitmap.buffer.data,
                 bitmap.size_bits
@@ -483,7 +483,7 @@ cdef class BitMaskArray:
 
     def __or__(self, other):
         cdef ndarray[uint8_t] result
-        cdef BitMaskArray other_bma, self_ = self  # self_ required for Cython < 3
+        cdef BitmaskArray other_bma, self_ = self  # self_ required for Cython < 3
         cdef ArrowBitmap bitmap
 
         if isinstance(other, type(self)):
@@ -500,7 +500,7 @@ cdef class BitMaskArray:
             BitmapOr(&self_.bitmap, &other_bma.bitmap, &bitmap)
 
             result = np.empty(self_.bitmap.size_bits, dtype=bool)
-            BitMaskArray.buffer_to_array_1d(
+            BitmaskArray.buffer_to_array_1d(
                 result,
                 bitmap.buffer.data,
                 bitmap.size_bits
@@ -512,7 +512,7 @@ cdef class BitMaskArray:
 
     def __xor__(self, other):
         cdef ndarray[uint8_t] result
-        cdef BitMaskArray other_bma, self_ = self  # self_ required for Cython < 3
+        cdef BitmaskArray other_bma, self_ = self  # self_ required for Cython < 3
         cdef ArrowBitmap bitmap
 
         if isinstance(other, type(self)):
@@ -529,7 +529,7 @@ cdef class BitMaskArray:
             BitmapXor(&self_.bitmap, &other_bma.bitmap, &bitmap)
 
             result = np.empty(self_.bitmap.size_bits, dtype=bool)
-            BitMaskArray.buffer_to_array_1d(
+            BitmaskArray.buffer_to_array_1d(
                 result,
                 bitmap.buffer.data,
                 bitmap.size_bits
@@ -540,7 +540,7 @@ cdef class BitMaskArray:
         return self.to_numpy() ^ other
 
     def __getstate__(self):
-        cdef BitMaskArray self_ = self
+        cdef BitmaskArray self_ = self
         state = {
             "parent": self.parent,
             "array_shape": self.array_shape,
@@ -563,7 +563,7 @@ cdef class BitMaskArray:
 
     def __setstate__(self, state):
         cdef ArrowBitmap bitmap
-        cdef BitMaskArray self_ = self, other
+        cdef BitmaskArray self_ = self, other
         self.parent = state["parent"]
         self.array_shape = state["array_shape"]
         self_.buffer_owner = state["buffer_owner"]
@@ -595,7 +595,7 @@ cdef class BitMaskArray:
     @cython.wraparound(False)
     def __iter__(self):
         cdef Py_ssize_t i
-        cdef BitMaskArray self_ = self  # self_ required for Cython < 3
+        cdef BitmaskArray self_ = self  # self_ required for Cython < 3
         for i in range(self_.bitmap.size_bits):
             yield bool(ArrowBitGet(self_.bitmap.buffer.data, i))
 
@@ -640,11 +640,11 @@ cdef class BitMaskArray:
         const int64_t[:] indices,
         const int axis=0,
     ):
-        cdef BitMaskArray self_ = self
+        cdef BitmaskArray self_ = self
         cdef Py_ssize_t nindices = len(indices)
         if axis != 0:
             raise NotImplementedError(
-                "BitMaskArray.take_1d only implemented for axis=0"
+                "BitmaskArray.take_1d only implemented for axis=0"
             )
 
         if nindices <= 0:
@@ -653,7 +653,7 @@ cdef class BitMaskArray:
             )
 
         cdef ArrowBitmap bitmap
-        cdef BitMaskArray bma = BitMaskArray.__new__(BitMaskArray)
+        cdef BitmaskArray bma = BitmaskArray.__new__(BitmaskArray)
 
         ArrowBitmapInit(&bitmap)
         ArrowBitmapReserve(&bitmap, nindices)
@@ -668,7 +668,7 @@ cdef class BitMaskArray:
         return bma
 
     def copy(self):
-        return BitMaskArray.copy_from_bitmaskarray(self)
+        return BitmaskArray.copy_from_bitmaskarray(self)
 
     @cython.boundscheck(False)  # TODO: Removing this causes an IndexError? Zero size?
     @cython.wraparound(False)
@@ -678,7 +678,7 @@ cdef class BitMaskArray:
 
     def to_numpy(self) -> ndarray:
         cdef ndarray[uint8_t] result = np.empty(self.bitmap.size_bits, dtype=bool)
-        BitMaskArray.buffer_to_array_1d(
+        BitmaskArray.buffer_to_array_1d(
             result,
             self.bitmap.buffer.data,
             self.bitmap.size_bits
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 058e62e47d063..b4c592af9ff5f 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1298,7 +1298,7 @@ def take(
         warnings.warn(
             "pd.api.extensions.take accepting non-standard inputs is deprecated "
             "and will raise in a future version. Pass either a numpy.ndarray, "
-            "ExtensionArray, Index, Series, or BitMaskArray instead.",
+            "ExtensionArray, Index, Series, or BitmaskArray instead.",
             FutureWarning,
             stacklevel=find_stack_level(),
         )
diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py
index f426e57da4380..e7c39144fad35 100644
--- a/pandas/core/array_algos/masked_reductions.py
+++ b/pandas/core/array_algos/masked_reductions.py
@@ -13,7 +13,7 @@
 import numpy as np
 
 from pandas._libs import missing as libmissing
-from pandas._libs.arrays import BitMaskArray
+from pandas._libs.arrays import BitmaskArray
 
 from pandas.core.nanops import check_below_min_count
 
@@ -24,7 +24,7 @@
 def _reductions(
     func: Callable,
     values: np.ndarray,
-    mask: BitMaskArray,
+    mask: BitmaskArray,
     *,
     skipna: bool = True,
     min_count: int = 0,
@@ -65,7 +65,7 @@ def _reductions(
 
 def sum(
     values: np.ndarray,
-    mask: BitMaskArray,
+    mask: BitmaskArray,
     *,
     skipna: bool = True,
     min_count: int = 0,
@@ -78,7 +78,7 @@ def sum(
 
 def prod(
     values: np.ndarray,
-    mask: BitMaskArray,
+    mask: BitmaskArray,
     *,
     skipna: bool = True,
     min_count: int = 0,
@@ -92,7 +92,7 @@ def prod(
 def _minmax(
     func: Callable,
     values: np.ndarray,
-    mask: np.ndarray | BitMaskArray,
+    mask: np.ndarray | BitmaskArray,
     *,
     skipna: bool = True,
     axis: AxisInt | None = None,
@@ -119,7 +119,7 @@ def _minmax(
         else:
             return func(values, axis=axis)
     else:
-        if isinstance(mask, BitMaskArray):
+        if isinstance(mask, BitmaskArray):
             subset = values[(~mask).to_numpy()]
         else:
             subset = values[~mask]
@@ -132,7 +132,7 @@ def _minmax(
 
 def min(
     values: np.ndarray,
-    mask: np.ndarray | BitMaskArray,
+    mask: np.ndarray | BitmaskArray,
     *,
     skipna: bool = True,
     axis: AxisInt | None = None,
@@ -142,7 +142,7 @@ def min(
 
 def max(
     values: np.ndarray,
-    mask: np.ndarray | BitMaskArray,
+    mask: np.ndarray | BitmaskArray,
     *,
     skipna: bool = True,
     axis: AxisInt | None = None,
@@ -152,7 +152,7 @@ def max(
 
 def mean(
     values: np.ndarray,
-    mask: BitMaskArray,
+    mask: BitmaskArray,
     *,
     skipna: bool = True,
     axis: AxisInt | None = None,
@@ -164,7 +164,7 @@ def mean(
 
 def var(
     values: np.ndarray,
-    mask: BitMaskArray,
+    mask: BitmaskArray,
     *,
     skipna: bool = True,
     axis: AxisInt | None = None,
@@ -182,7 +182,7 @@ def var(
 
 def std(
     values: np.ndarray,
-    mask: BitMaskArray,
+    mask: BitmaskArray,
     *,
     skipna: bool = True,
     axis: AxisInt | None = None,
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index b8e8a99847383..9d0376169fe0d 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -28,7 +28,7 @@
 if TYPE_CHECKING:
     import pyarrow
 
-    from pandas._libs.arrays import BitMaskArray
+    from pandas._libs.arrays import BitmaskArray
     from pandas._typing import (
         Dtype,
         DtypeObj,
@@ -301,14 +301,14 @@ class BooleanArray(BaseMaskedArray):
 
     @classmethod
     def _simple_new(
-        cls, values: np.ndarray, mask: npt.NDArray[np.bool_] | BitMaskArray
+        cls, values: np.ndarray, mask: npt.NDArray[np.bool_] | BitmaskArray
     ) -> Self:
         result = super()._simple_new(values, mask)
         result._dtype = BooleanDtype()
         return result
 
     def __init__(
-        self, values: np.ndarray, mask: np.ndarray | BitMaskArray, copy: bool = False
+        self, values: np.ndarray, mask: np.ndarray | BitmaskArray, copy: bool = False
     ) -> None:
         if not (isinstance(values, np.ndarray) and values.dtype == np.bool_):
             raise TypeError(
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 89b3df8aa89f9..ae04f4a1174b6 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -15,7 +15,7 @@
     lib,
     missing as libmissing,
 )
-from pandas._libs.arrays import BitMaskArray
+from pandas._libs.arrays import BitmaskArray
 from pandas._libs.tslibs import (
     get_unit_from_dtype,
     is_supported_unit,
@@ -113,7 +113,7 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
     _internal_fill_value: Scalar
     # our underlying data and mask are each ndarrays
     _data: np.ndarray
-    _mask: BitMaskArray
+    _mask: BitmaskArray
 
     # Fill values used for any/all
     _truthy_value = Scalar  # bool(_truthy_value) = True
@@ -121,26 +121,26 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
 
     @classmethod
     def _simple_new(
-        cls, values: np.ndarray, mask: npt.NDArray[np.bool_] | BitMaskArray
+        cls, values: np.ndarray, mask: npt.NDArray[np.bool_] | BitmaskArray
     ) -> Self:
         result = BaseMaskedArray.__new__(cls)
         result._data = values
-        result._mask = BitMaskArray(mask)
+        result._mask = BitmaskArray(mask)
         return result
 
     def __init__(
         self,
         values: np.ndarray,
-        mask: npt.NDArray[np.bool_] | BitMaskArray,
+        mask: npt.NDArray[np.bool_] | BitmaskArray,
         copy: bool = False,
     ) -> None:
         # values is supposed to already be validated in the subclass
         if not (
-            isinstance(mask, BitMaskArray)
+            isinstance(mask, BitmaskArray)
             or (isinstance(mask, np.ndarray) and mask.dtype == np.bool_)
         ):
             raise TypeError(
-                "mask should be boolean numpy array or BitMaskArray. "
+                "mask should be boolean numpy array or BitmaskArray. "
                 "Use the 'pd.array' function instead"
             )
         if isinstance(mask, np.ndarray):
@@ -152,7 +152,7 @@ def __init__(
             mask = mask.copy()
 
         self._data = values
-        self._mask = BitMaskArray(mask)
+        self._mask = BitmaskArray(mask)
 
     @classmethod
     def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self:
@@ -320,7 +320,7 @@ def __setitem__(self, key, value) -> None:
         value, mask = self._coerce_to_array(value, dtype=self.dtype)
 
         self._data[key] = value
-        if isinstance(mask, BitMaskArray):
+        if isinstance(mask, BitmaskArray):
             mask = mask.to_numpy()
 
         self._mask[key] = mask
@@ -329,7 +329,7 @@ def __contains__(self, key) -> bool:
         if isna(key) and key is not self.dtype.na_value:
             # GH#52840
             if self._data.dtype.kind == "f" and lib.is_float(key):
-                # TODO: implement low level invert operator on BitMaskArray
+                # TODO: implement low level invert operator on BitmaskArray
                 return bool((np.isnan(self._data) & ~self._mask).any())
 
         return bool(super().__contains__(key))
@@ -696,7 +696,7 @@ def _hasna(self) -> bool:
         return self._mask.any()
 
     def _propagate_mask(
-        self, mask: npt.NDArray[np.bool_] | BitMaskArray | None, other
+        self, mask: npt.NDArray[np.bool_] | BitmaskArray | None, other
     ) -> npt.NDArray[np.bool_]:
         if mask is None:
             mask = (
@@ -916,7 +916,7 @@ def _concat_same_type(
     ) -> Self:
         data = np.concatenate([x._data for x in to_concat], axis=axis)
         try:
-            mask = BitMaskArray.concatenate([x._mask for x in to_concat], axis=axis)
+            mask = BitmaskArray.concatenate([x._mask for x in to_concat], axis=axis)
         except NotImplementedError:
             mask = np.concatenate([x._mask.to_numpy() for x in to_concat], axis=axis)
         return cls(data, mask)
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index babbb757c8a61..b27392ba39155 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -51,7 +51,7 @@
 )
 
 if TYPE_CHECKING:
-    from pandas._libs.arrays import BitMaskArray
+    from pandas._libs.arrays import BitmaskArray
 
 
 bn = import_optional_dependency("bottleneck", errors="warn")
@@ -1543,7 +1543,7 @@ def _maybe_null_out(
 
 def check_below_min_count(
     shape: tuple[int, ...],
-    mask: npt.NDArray[np.bool_] | BitMaskArray | None,
+    mask: npt.NDArray[np.bool_] | BitmaskArray | None,
     min_count: int,
 ) -> bool:
     """
diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py
index 8136354659d6b..dfe64a37bbd30 100644
--- a/pandas/core/ops/mask_ops.py
+++ b/pandas/core/ops/mask_ops.py
@@ -9,14 +9,14 @@
     lib,
     missing as libmissing,
 )
-from pandas._libs.arrays import BitMaskArray
+from pandas._libs.arrays import BitmaskArray
 
 
 def kleene_or(
     left: bool | np.ndarray | libmissing.NAType,
     right: bool | np.ndarray | libmissing.NAType,
-    left_mask: np.ndarray | BitMaskArray | None,
-    right_mask: np.ndarray | BitMaskArray | None,
+    left_mask: np.ndarray | BitmaskArray | None,
+    right_mask: np.ndarray | BitmaskArray | None,
 ):
     """
     Boolean ``or`` using Kleene logic.
@@ -54,9 +54,9 @@ def kleene_or(
         result = left | right
 
     if right_mask is not None:
-        if isinstance(left_mask, BitMaskArray):
+        if isinstance(left_mask, BitmaskArray):
             left_mask = left_mask.to_numpy()
-        if isinstance(right_mask, BitMaskArray):
+        if isinstance(right_mask, BitmaskArray):
             right_mask = right_mask.to_numpy()
         # output is unknown where (False & NA), (NA & False), (NA & NA)
         left_false = ~(left | left_mask)
@@ -70,7 +70,7 @@ def kleene_or(
         if right is True:
             mask = np.zeros(left_mask.shape, left_mask.dtype)
         else:
-            if isinstance(left_mask, BitMaskArray):
+            if isinstance(left_mask, BitmaskArray):
                 left_mask = left_mask.to_numpy()
             if right is libmissing.NA:
                 mask = (~left & ~left_mask) | left_mask
@@ -83,8 +83,8 @@ def kleene_or(
 def kleene_xor(
     left: bool | np.ndarray | libmissing.NAType,
     right: bool | np.ndarray | libmissing.NAType,
-    left_mask: np.ndarray | BitMaskArray | None,
-    right_mask: np.ndarray | BitMaskArray | None,
+    left_mask: np.ndarray | BitmaskArray | None,
+    right_mask: np.ndarray | BitmaskArray | None,
 ):
     """
     Boolean ``xor`` using Kleene logic.
@@ -126,7 +126,7 @@ def kleene_xor(
         if right is libmissing.NA:
             mask = np.ones(left_mask.shape, left_mask.dtype)
         else:
-            if isinstance(left_mask, BitMaskArray):
+            if isinstance(left_mask, BitmaskArray):
                 mask = left_mask.to_numpy()
             else:
                 mask = left_mask.copy()
@@ -139,8 +139,8 @@ def kleene_xor(
 def kleene_and(
     left: bool | libmissing.NAType | np.ndarray,
     right: bool | libmissing.NAType | np.ndarray,
-    left_mask: np.ndarray | BitMaskArray | None,
-    right_mask: np.ndarray | BitMaskArray | None,
+    left_mask: np.ndarray | BitmaskArray | None,
+    right_mask: np.ndarray | BitmaskArray | None,
 ):
     """
     Boolean ``and`` using Kleene logic.
@@ -190,9 +190,9 @@ def kleene_and(
     else:
         # Since we must compare to left / right it helps perf to convert
         # to numpy up front, rather than deferring multiple times
-        if isinstance(left_mask, BitMaskArray):
+        if isinstance(left_mask, BitmaskArray):
             left_mask = left_mask.to_numpy()
-        if isinstance(right_mask, BitMaskArray):
+        if isinstance(right_mask, BitmaskArray):
             right_mask = right_mask.to_numpy()
 
         # unmask where either left or right is False
diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py
index 3e9b669913749..b9d640620655f 100644
--- a/pandas/tests/arrays/floating/test_construction.py
+++ b/pandas/tests/arrays/floating/test_construction.py
@@ -26,7 +26,7 @@ def test_floating_array_constructor():
     tm.assert_numpy_array_equal(result._mask.to_numpy(), mask)
 
     msg = (
-        r".* should be .* numpy array( or BitMaskArray)?. "
+        r".* should be .* numpy array( or BitmaskArray)?. "
         r"Use the 'pd.array' function instead"
     )
     with pytest.raises(TypeError, match=msg):
diff --git a/pandas/tests/arrays/integer/test_construction.py b/pandas/tests/arrays/integer/test_construction.py
index 6cc240cd52aca..9e8f941794d28 100644
--- a/pandas/tests/arrays/integer/test_construction.py
+++ b/pandas/tests/arrays/integer/test_construction.py
@@ -81,7 +81,7 @@ def test_integer_array_constructor():
     tm.assert_extension_array_equal(result, expected)
 
     msg = (
-        r".* should be .* numpy array( or BitMaskArray)?. "
+        r".* should be .* numpy array( or BitmaskArray)?. "
         r"Use the 'pd.array' function instead"
     )
     with pytest.raises(TypeError, match=msg):
diff --git a/pandas/tests/arrays/masked/test_bitmask.py b/pandas/tests/arrays/masked/test_bitmask.py
index 1e1258984edaf..153078de7c32d 100644
--- a/pandas/tests/arrays/masked/test_bitmask.py
+++ b/pandas/tests/arrays/masked/test_bitmask.py
@@ -4,7 +4,7 @@
 import numpy as np
 import pytest
 
-from pandas._libs.arrays import BitMaskArray
+from pandas._libs.arrays import BitmaskArray
 
 import pandas._testing as tm
 
@@ -20,7 +20,7 @@
     ],
 )
 def test_constructor_ndarray(array, expected):
-    bma = BitMaskArray(array)
+    bma = BitmaskArray(array)
     assert bma.bytes == expected
     assert not bma.parent
     assert bma.array_shape == array.shape
@@ -29,27 +29,27 @@ def test_constructor_ndarray(array, expected):
 @pytest.mark.parametrize(
     "parent,expected",
     [
-        (BitMaskArray(np.array([False, False])), bytes([0x0])),
-        (BitMaskArray(np.array([True, False])), bytes([0x1])),
-        (BitMaskArray(np.array([False, True])), bytes([0x2])),
-        (BitMaskArray(np.array([True, True])), bytes([0x3])),
-        (BitMaskArray(np.array([True, False] * 8)), bytes([0x55, 0x55])),
+        (BitmaskArray(np.array([False, False])), bytes([0x0])),
+        (BitmaskArray(np.array([True, False])), bytes([0x1])),
+        (BitmaskArray(np.array([False, True])), bytes([0x2])),
+        (BitmaskArray(np.array([True, True])), bytes([0x3])),
+        (BitmaskArray(np.array([True, False] * 8)), bytes([0x55, 0x55])),
     ],
 )
 def test_constructor_bitmap(parent, expected):
-    bma = BitMaskArray(parent)
+    bma = BitmaskArray(parent)
     assert bma.bytes == expected
     assert bma.parent is parent
     assert bma.array_shape == parent.shape
 
 
 def test_len():
-    bma = BitMaskArray(np.array([True, False, False]))
+    bma = BitmaskArray(np.array([True, False, False]))
     assert len(bma) == 3
 
 
 def test_repr_no_parent():
-    bma = BitMaskArray(np.array([True, False, False]))
+    bma = BitmaskArray(np.array([True, False, False]))
     result = repr(bma)
     assert "parent: None" in result
     assert "shape: (3,)" in result
@@ -57,11 +57,11 @@ def test_repr_no_parent():
 
 
 def test_repr_parent():
-    parent = BitMaskArray(np.array([False, False, True]))
-    bma = BitMaskArray(parent)
+    parent = BitmaskArray(np.array([False, False, True]))
+    bma = BitmaskArray(parent)
     result = repr(bma)
     parent_id = hex(id(parent))
-    assert f"parent: <pandas._libs.arrays.BitMaskArray object at {parent_id}" in result
+    assert f"parent: <pandas._libs.arrays.BitmaskArray object at {parent_id}" in result
     assert "shape: (3,)" in result
     assert "data: b'\\x04'" in result
 
@@ -81,17 +81,17 @@ def test_repr_parent():
     ],
 )
 def test_concatenate(input_data):
-    masks = [BitMaskArray(np.array(x)) for x in input_data]
+    masks = [BitmaskArray(np.array(x)) for x in input_data]
 
-    result = BitMaskArray.concatenate(masks, axis=0)
-    expected = BitMaskArray(np.array(list(itertools.chain.from_iterable(input_data))))
+    result = BitmaskArray.concatenate(masks, axis=0)
+    expected = BitmaskArray(np.array(list(itertools.chain.from_iterable(input_data))))
 
     assert result.bytes == expected.bytes
 
 
 def test_concatenate_raises_not_axis0():
     with pytest.raises(NotImplementedError, match="only implemented for axis=0"):
-        BitMaskArray.concatenate([], axis=1)
+        BitmaskArray.concatenate([], axis=1)
 
 
 @pytest.mark.parametrize(
@@ -102,14 +102,14 @@ def test_concatenate_raises_not_axis0():
     ],
 )
 def test_getitem_scalar(indexer, expected):
-    bma = BitMaskArray(np.array([True, False, True]))
+    bma = BitmaskArray(np.array([True, False, True]))
     result = bma[indexer]
 
     assert result == expected
 
 
 def test_getitem_null_slice():
-    bma = BitMaskArray(np.array([True, False, True]))
+    bma = BitmaskArray(np.array([True, False, True]))
     result = bma[:]
 
     assert result.array_shape == bma.array_shape
@@ -129,14 +129,14 @@ def test_getitem_null_slice():
     ],
 )
 def test_getitem_numpy_fallback(indexer, expected):
-    bma = BitMaskArray(np.array([True, False, True]))
+    bma = BitmaskArray(np.array([True, False, True]))
     result = bma[indexer]
 
     tm.assert_numpy_array_equal(result, expected)
 
 
 def test_setitem_scalar():
-    bma = BitMaskArray(np.array([True, False, True]))
+    bma = BitmaskArray(np.array([True, False, True]))
 
     bma[0] = False
     assert not bma[0]
@@ -152,7 +152,7 @@ def test_setitem_scalar():
 
 
 def test_setitem_array():
-    bma = BitMaskArray(np.array([True, False, True]))
+    bma = BitmaskArray(np.array([True, False, True]))
 
     bma[:] = [False, True, False]
     assert not bma[0] and bma[1] and not bma[2]
@@ -162,11 +162,11 @@ def test_setitem_array():
 
 
 def test_invert():
-    result1 = ~BitMaskArray(np.array([True, False]))
+    result1 = ~BitmaskArray(np.array([True, False]))
     assert (result1.bytes[0] & 0x1) == 0
     assert ((result1.bytes[0] >> 1) & 0x1) == 1
 
-    result2 = ~BitMaskArray(np.array([False, True]))
+    result2 = ~BitmaskArray(np.array([False, True]))
     assert (result2.bytes[0] & 0x1) == 1
     assert ((result2.bytes[0] >> 1) & 0x1) == 0
 
@@ -183,10 +183,10 @@ def test_invert():
     ],
 )
 def test_and(rhs_as_bitmask, lhs, rhs, expected):
-    bma1 = BitMaskArray(np.array(lhs))
+    bma1 = BitmaskArray(np.array(lhs))
 
     if rhs_as_bitmask:
-        bma2 = BitMaskArray(np.array(rhs))
+        bma2 = BitmaskArray(np.array(rhs))
     else:
         bma2 = np.array(rhs)
 
@@ -207,10 +207,10 @@ def test_and(rhs_as_bitmask, lhs, rhs, expected):
     ],
 )
 def test_or(rhs_as_bitmask, lhs, rhs, expected):
-    bma1 = BitMaskArray(np.array(lhs))
+    bma1 = BitmaskArray(np.array(lhs))
 
     if rhs_as_bitmask:
-        bma2 = BitMaskArray(np.array(rhs))
+        bma2 = BitmaskArray(np.array(rhs))
     else:
         bma2 = np.array(rhs)
 
@@ -231,10 +231,10 @@ def test_or(rhs_as_bitmask, lhs, rhs, expected):
     ],
 )
 def test_xor(rhs_as_bitmask, lhs, rhs, expected):
-    bma1 = BitMaskArray(np.array(lhs))
+    bma1 = BitmaskArray(np.array(lhs))
 
     if rhs_as_bitmask:
-        bma2 = BitMaskArray(np.array(rhs))
+        bma2 = BitmaskArray(np.array(rhs))
     else:
         bma2 = np.array(rhs)
 
@@ -244,8 +244,8 @@ def test_xor(rhs_as_bitmask, lhs, rhs, expected):
 
 
 def test_pickle():
-    parent = BitMaskArray(np.array([True, False, True]))
-    child = BitMaskArray(parent)
+    parent = BitmaskArray(np.array([True, False, True]))
+    child = BitmaskArray(parent)
 
     result_child = pickle.loads(pickle.dumps(child))
 
@@ -258,7 +258,7 @@ def test_pickle():
 
 
 def test_iter():
-    bma = BitMaskArray(np.array([True, False, True]))
+    bma = BitmaskArray(np.array([True, False, True]))
     itr = iter(bma)
 
     assert next(itr) is True
@@ -279,7 +279,7 @@ def test_iter():
     ],
 )
 def test_size(data, expected):
-    bma = BitMaskArray(data)
+    bma = BitmaskArray(data)
     result = bma.size
     assert result == expected
 
@@ -294,7 +294,7 @@ def test_size(data, expected):
     ],
 )
 def test_nbytes(data, expected):
-    bma = BitMaskArray(data)
+    bma = BitmaskArray(data)
     result = bma.nbytes
     assert result == expected
 
@@ -308,7 +308,7 @@ def test_nbytes(data, expected):
     ],
 )
 def test_shape(data):
-    bma = BitMaskArray(data)
+    bma = BitmaskArray(data)
     assert bma.array_shape == data.shape
 
 
@@ -322,7 +322,7 @@ def test_shape(data):
     ],
 )
 def test_any(data, expected):
-    bma = BitMaskArray(data)
+    bma = BitmaskArray(data)
     assert bma.any() == expected
 
 
@@ -336,7 +336,7 @@ def test_any(data, expected):
     ],
 )
 def test_all(data, expected):
-    bma = BitMaskArray(data)
+    bma = BitmaskArray(data)
     assert bma.all() == expected
 
 
@@ -350,12 +350,12 @@ def test_all(data, expected):
     ],
 )
 def test_sum(data, expected):
-    bma = BitMaskArray(data)
+    bma = BitmaskArray(data)
     assert bma.sum() == expected
 
 
 def test_take1d():
-    bma = BitMaskArray(np.array([True, False, True, False]))
+    bma = BitmaskArray(np.array([True, False, True, False]))
 
     result1 = bma.take_1d(np.array([0]), axis=0)
     assert (result1.bytes[0] & 0x1) == 1
@@ -379,25 +379,25 @@ def test_take1d():
 
 
 def test_take1d_raises_not_axis0():
-    bma = BitMaskArray(np.array([True, False, True]))
+    bma = BitmaskArray(np.array([True, False, True]))
     with pytest.raises(NotImplementedError, match="only implemented for axis=0"):
         bma.take_1d(np.array([1]), axis=1)
 
 
 def test_take_1d_raises_empty_indices():
-    bma = BitMaskArray(np.array([True, False, True]))
+    bma = BitmaskArray(np.array([True, False, True]))
     with pytest.raises(NotImplementedError, match="does not support empty takes"):
         bma.take_1d(np.array([], dtype="int64"), axis=0)
 
 
 def test_take_1d_raises_negative_indices():
-    bma = BitMaskArray(np.array([True, False, True]))
+    bma = BitmaskArray(np.array([True, False, True]))
     with pytest.raises(NotImplementedError, match="does not support negative indexing"):
         bma.take_1d(np.array([-1], dtype="int64"), axis=0)
 
 
 def test_copy():
-    old_bma = BitMaskArray(np.array([True, False, True, False]))
+    old_bma = BitmaskArray(np.array([True, False, True, False]))
     bma = old_bma.copy()
 
     assert bma.bytes == old_bma.bytes
@@ -415,7 +415,7 @@ def test_copy():
     ],
 )
 def test_to_numpy(data):
-    bma = BitMaskArray(data)
+    bma = BitmaskArray(data)
 
     result = bma.to_numpy()
     tm.assert_numpy_array_equal(result, data)

From 0d78ac39b848295337cfca923f0a19407e7b656b Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 25 Aug 2023 12:35:47 -0400
Subject: [PATCH 098/126] fix error type

---
 pandas/tests/arrays/masked/test_bitmask.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/masked/test_bitmask.py b/pandas/tests/arrays/masked/test_bitmask.py
index 153078de7c32d..746a64c626aef 100644
--- a/pandas/tests/arrays/masked/test_bitmask.py
+++ b/pandas/tests/arrays/masked/test_bitmask.py
@@ -392,7 +392,7 @@ def test_take_1d_raises_empty_indices():
 
 def test_take_1d_raises_negative_indices():
     bma = BitmaskArray(np.array([True, False, True]))
-    with pytest.raises(NotImplementedError, match="does not support negative indexing"):
+    with pytest.raises(ValueError, match="does not support negative indexing"):
         bma.take_1d(np.array([-1], dtype="int64"), axis=0)
 
 

From d40a1d8c16a22a74cc8057247a558140dd2a76df Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 25 Aug 2023 12:50:46 -0400
Subject: [PATCH 099/126] less to_numpy

---
 pandas/core/array_algos/masked_reductions.py      | 2 +-
 pandas/core/arrays/boolean.py                     | 2 +-
 pandas/core/arrays/numeric.py                     | 2 +-
 pandas/tests/arrays/boolean/test_construction.py  | 3 +--
 pandas/tests/arrays/floating/test_arithmetic.py   | 4 ++--
 pandas/tests/arrays/floating/test_construction.py | 1 -
 pandas/tests/arrays/integer/test_construction.py  | 1 -
 pandas/tests/arrays/integer/test_function.py      | 2 +-
 pandas/tests/arrays/masked_shared.py              | 2 +-
 9 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py
index e7c39144fad35..3e34fb03d657e 100644
--- a/pandas/core/array_algos/masked_reductions.py
+++ b/pandas/core/array_algos/masked_reductions.py
@@ -60,7 +60,7 @@ def _reductions(
         ):
             return libmissing.NA
 
-        return func(values, where=~mask.to_numpy(), axis=axis, **kwargs)
+        return func(values, where=(~mask).to_numpy(), axis=axis, **kwargs)
 
 
 def sum(
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index 9d0376169fe0d..ab0c81aa68c34 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -397,7 +397,7 @@ def _accumulate(
         self, name: str, *, skipna: bool = True, **kwargs
     ) -> BaseMaskedArray:
         data = self._data
-        mask = self._mask.to_numpy()
+        mask = self._mask
         if name in ("cummin", "cummax"):
             op = getattr(masked_accumulations, name)
             data, mask = op(data, mask, skipna=skipna, **kwargs)
diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
index 897ae8a89c73c..864fc2c975c86 100644
--- a/pandas/core/arrays/numeric.py
+++ b/pandas/core/arrays/numeric.py
@@ -146,7 +146,7 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
 
     cls = dtype_cls.construct_array_type()
     if isinstance(values, cls):
-        values, mask = values._data, values._mask.to_numpy()
+        values, mask = values._data, values._mask
         if dtype is not None:
             values = values.astype(dtype.numpy_dtype, copy=False)
 
diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py
index 37745f589e26d..12378cf719065 100644
--- a/pandas/tests/arrays/boolean/test_construction.py
+++ b/pandas/tests/arrays/boolean/test_construction.py
@@ -40,7 +40,6 @@ def test_boolean_array_constructor_copy():
 
     result = BooleanArray(values, mask)
     assert result._data is values
-    # assert result._mask is mask
 
     result = BooleanArray(values, mask, copy=True)
     assert result._data is not values
@@ -159,7 +158,7 @@ def test_coerce_to_array():
     expected = BooleanArray(values, mask)
     tm.assert_extension_array_equal(result, expected)
     assert result._data is values
-    # assert result._mask is mask
+
     result = BooleanArray(*coerce_to_array(values, mask=mask, copy=True))
     expected = BooleanArray(values, mask)
     tm.assert_extension_array_equal(result, expected)
diff --git a/pandas/tests/arrays/floating/test_arithmetic.py b/pandas/tests/arrays/floating/test_arithmetic.py
index f7fd08361f5e1..052f38dfce5af 100644
--- a/pandas/tests/arrays/floating/test_arithmetic.py
+++ b/pandas/tests/arrays/floating/test_arithmetic.py
@@ -67,7 +67,7 @@ def test_pow_scalar(dtype):
     # TODO np.nan should be converted to pd.NA / missing before operation?
     expected = FloatingArray(
         np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype=dtype.numpy_dtype),
-        mask=a._mask.to_numpy(),
+        mask=a._mask,
     )
     tm.assert_extension_array_equal(result, expected)
 
@@ -89,7 +89,7 @@ def test_pow_scalar(dtype):
     result = np.nan**a
     expected = FloatingArray(
         np.array([1, np.nan, np.nan, np.nan], dtype=dtype.numpy_dtype),
-        mask=a._mask.to_numpy(),
+        mask=a._mask,
     )
     tm.assert_extension_array_equal(result, expected)
 
diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py
index b9d640620655f..5a58125a1c126 100644
--- a/pandas/tests/arrays/floating/test_construction.py
+++ b/pandas/tests/arrays/floating/test_construction.py
@@ -65,7 +65,6 @@ def test_floating_array_constructor_copy():
 
     result = FloatingArray(values, mask)
     assert result._data is values
-    # assert result._mask is mask
 
     result = FloatingArray(values, mask, copy=True)
     assert result._data is not values
diff --git a/pandas/tests/arrays/integer/test_construction.py b/pandas/tests/arrays/integer/test_construction.py
index 9e8f941794d28..d442a26c9c4dc 100644
--- a/pandas/tests/arrays/integer/test_construction.py
+++ b/pandas/tests/arrays/integer/test_construction.py
@@ -103,7 +103,6 @@ def test_integer_array_constructor_copy():
 
     result = IntegerArray(values, mask)
     assert result._data is values
-    # assert result._mask is mask
 
     result = IntegerArray(values, mask, copy=True)
     assert result._data is not values
diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py
index 40c9dcc697f46..d48b636a98feb 100644
--- a/pandas/tests/arrays/integer/test_function.py
+++ b/pandas/tests/arrays/integer/test_function.py
@@ -26,7 +26,7 @@ def test_ufuncs_single_float(ufunc):
     a = pd.array([1, 2, -3, np.nan])
     with np.errstate(invalid="ignore"):
         result = ufunc(a)
-        expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask.to_numpy())
+        expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask)
     tm.assert_extension_array_equal(result, expected)
 
     s = pd.Series(a)
diff --git a/pandas/tests/arrays/masked_shared.py b/pandas/tests/arrays/masked_shared.py
index 22caeb94a13a1..78726b2a90471 100644
--- a/pandas/tests/arrays/masked_shared.py
+++ b/pandas/tests/arrays/masked_shared.py
@@ -43,7 +43,7 @@ def test_scalar(self, other, comparison_op, dtype):
             expected = pd.array([None, None, None], dtype="boolean")
         else:
             values = op(left._data, other)
-            expected = pd.arrays.BooleanArray(values, left._mask.to_numpy(), copy=True)
+            expected = pd.arrays.BooleanArray(values, left._mask, copy=True)
         tm.assert_extension_array_equal(result, expected)
 
         # ensure we haven't mutated anything inplace

From 35da3f6469e20191e17c42bbe0a38ae852408ec3 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 25 Aug 2023 14:42:35 -0400
Subject: [PATCH 100/126] licenses

---
 pandas/_libs/include/pandas/bitmask_algorithms.h | 2 ++
 pandas/_libs/src/bitmask_algorithms.c            | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/pandas/_libs/include/pandas/bitmask_algorithms.h b/pandas/_libs/include/pandas/bitmask_algorithms.h
index 3ca086acf77a6..f29fddf6551ed 100644
--- a/pandas/_libs/include/pandas/bitmask_algorithms.h
+++ b/pandas/_libs/include/pandas/bitmask_algorithms.h
@@ -1,3 +1,5 @@
+// The full license is in the LICENSE file, distributed with this software.
+
 #pragma once
 
 #include <stdbool.h>
diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
index e74f7a2aa6ab3..d78d8b973a789 100644
--- a/pandas/_libs/src/bitmask_algorithms.c
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -1,3 +1,5 @@
+// The full license is in the LICENSE file, distributed with this software.
+
 #include <string.h>
 
 #include "pandas/bitmask_algorithms.h"

From 5b7d0c2d49b7ba7018b5fde087f0d97535f7cff1 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 25 Aug 2023 14:46:54 -0400
Subject: [PATCH 101/126] typing fixes

---
 pandas/core/arrays/numeric.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
index 864fc2c975c86..38cb1ccc14b7f 100644
--- a/pandas/core/arrays/numeric.py
+++ b/pandas/core/arrays/numeric.py
@@ -32,6 +32,7 @@
 
     import pyarrow
 
+    from pandas._libs.arrays import BitmaskArray
     from pandas._typing import (
         Dtype,
         DtypeObj,
@@ -232,7 +233,7 @@ class NumericArray(BaseMaskedArray):
     def __init__(
         self,
         values: np.ndarray,
-        mask: npt.NDArray[np.bool_],
+        mask: npt.NDArray[np.bool_] | BitmaskArray,
         copy: bool = False,
     ) -> None:
         checker = self._dtype_cls._checker

From e08a6471708c1d7e8ae21a0b86962d091f293aa0 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sat, 26 Aug 2023 17:24:45 -0400
Subject: [PATCH 102/126] buffer protocol implementation for BitmaskArray

---
 pandas/_libs/arrays.pyi                      |   7 +-
 pandas/_libs/arrays.pyx                      | 205 ++++++++++++++++---
 pandas/_libs/hashtable.pyi                   |   3 +-
 pandas/_libs/hashtable_class_helper.pxi.in   |   2 +-
 pandas/core/algorithms.py                    |  20 +-
 pandas/core/array_algos/masked_reductions.py |   9 +-
 pandas/core/array_algos/quantile.py          |   7 +-
 pandas/core/arrays/masked.py                 |  70 +++----
 pandas/core/groupby/groupby.py               |   5 +-
 pandas/core/missing.py                       |   4 +-
 pandas/core/reshape/merge.py                 |   4 +-
 pandas/tests/arrays/boolean/test_logical.py  |   4 +-
 pandas/tests/arrays/masked/test_bitmask.py   |  57 ++++--
 pandas/tests/arrays/masked_shared.py         |   2 +-
 14 files changed, 284 insertions(+), 115 deletions(-)

diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi
index d890a59218235..7a6d16c8cefd7 100644
--- a/pandas/_libs/arrays.pyi
+++ b/pandas/_libs/arrays.pyi
@@ -45,12 +45,13 @@ class NDArrayBacked:
 class BitmaskArray:
     parent: Self
     def __init__(self, data: np.ndarray | Self) -> None: ...
+    def __len__(self) -> int: ...
     def __setitem__(self, key: PositionalIndexer, value: ArrayLike | bool) -> None: ...
     def __getitem__(self, key: PositionalIndexer) -> bool: ...
     def __invert__(self) -> Self: ...
-    def __and__(self, other: np.ndarray | Self) -> np.ndarray: ...
-    def __or__(self, other: np.ndarray | Self) -> np.ndarray: ...
-    def __xor__(self, other: np.ndarray | Self) -> np.ndarray: ...
+    def __and__(self, other: np.ndarray | Self | bool) -> np.ndarray: ...
+    def __or__(self, other: np.ndarray | Self | bool) -> np.ndarray: ...
+    def __xor__(self, other: np.ndarray | Self | bool) -> np.ndarray: ...
     def __getstate__(self) -> dict: ...
     def __setstate__(self, other: dict) -> None: ...
     def __iter__(self): ...
diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 9c0186e4c221e..df1de3cfb035f 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -240,8 +240,14 @@ cdef class BitmaskArray:
     cdef:
         ArrowBitmap bitmap
         bint buffer_owner  # set when parent is None, but gives C-level access
+        # NumPy compatibility
+        cdef Py_ssize_t ndim
+        cdef Py_ssize_t[2] shape
+        cdef Py_ssize_t[2] strides
+        # Buffer protocol support
+        int n_consumers
+        uint8_t* memview_buf
     cdef public:
-        object array_shape
         object parent  # assignments gives RC to ensure proper buffer lifecycle
 
     @cython.boundscheck(False)
@@ -254,24 +260,41 @@ cdef class BitmaskArray:
         ArrowBitmapInit(&bitmap)
         ArrowBitmapReserve(&bitmap, nobs)
         ArrowBitmapAppendInt8Unsafe(&bitmap, <const int8_t*>&arr[0], nobs)
-        self.buffer_owner = True
         self.bitmap = bitmap
+        self.buffer_owner = True
 
     cdef void init_from_bitmaskarray(self, BitmaskArray bma):
-        self.buffer_owner = False
         self.bitmap = bma.bitmap
+        self.buffer_owner = False
+        self.ndim = bma.ndim
+        self.shape[0] = bma.shape[0]
+        self.strides[0] = bma.strides[0]
+        if self.ndim == 2:
+            self.shape[1] = bma.shape[1]
+            self.strides[1] = bma.strides[1]
 
     def __cinit__(self):
+        cdef BitmaskArray self_ = self
         self.parent = False
+        self_.n_consumers = 0
+        self_.memview_buf = NULL
 
     def __init__(self, data):
+        cdef BitmaskArray self_ = self
         if isinstance(data, np.ndarray):
+            if not data.flags["C_CONTIGUOUS"]:
+                data = np.ascontiguousarray(data)
+
             self.init_from_ndarray(data.ravel())
-            self.array_shape = data.shape
             self.parent = None
+            self_.ndim = data.ndim
+            self_.shape[0] = data.shape[0]
+            self_.strides[0] = data.strides[0]
+            if (data.ndim == 2):
+                self_.shape[1] = data.shape[1]
+                self_.strides[1] = data.strides[1]
         elif isinstance(data, type(self)):
             self.init_from_bitmaskarray(data)
-            self.array_shape = data.array_shape
             self.parent = data
         else:
             raise TypeError("Unsupported argument to BitmaskArray constructor")
@@ -301,8 +324,12 @@ cdef class BitmaskArray:
         bitmap.buffer.data = buf
 
         bma.bitmap = bitmap
-        bma.array_shape = old_bma.array_shape
         bma.buffer_owner = True
+        bma.ndim = old_bma.ndim
+        bma.shape = old_bma.shape
+        bma.strides = old_bma.strides
+        bma.parent = False
+
         return bma
 
     def __len__(self):
@@ -314,11 +341,10 @@ cdef class BitmaskArray:
         else:
             par = None
 
-        shape = self.array_shape
         data = self.bytes
 
         return (
-            f"{object.__repr__(self)}\nparent: {par}\nshape: {shape}\ndata: {data}\n"
+            f"{object.__repr__(self)}\nparent: {par}\ndata: {data}\n"
         )
 
     @cython.wraparound(False)
@@ -330,22 +356,39 @@ cdef class BitmaskArray:
         cdef BitmaskArray current_bma
         cdef Py_ssize_t nbitmaps = len(objs)
 
-        cdef Py_ssize_t second_dim = 0
-        if any(len(x.array_shape) > 1 for x in objs):
-            second_dim = objs[0].array_shape[1]
-            for obj in objs:
-                if not obj.array_shape[1] == second_dim:
-                    raise NotImplementedError(
-                        "BitmaskArray.concatenate does not support broadcasting"
-                    )
+        cdef BitmaskArray first_bma = objs[0]
+        cdef int expected_ndim = first_bma.ndim
+        cdef Py_ssize_t expected_stride0 = first_bma.strides[0]
+        cdef Py_ssize_t expected_shape1, expected_stride1
+        if expected_ndim == 2:
+            expected_stride1 = first_bma.strides[1]
+            expected_shape1 = first_bma.shape[1]
+
+        cdef Py_ssize_t dim0shape = 0
 
         cdef ArrowBitmap** bitmaps = <ArrowBitmap**>malloc(
             sizeof(ArrowBitmap*) * nbitmaps
         )
+
         for i in range(nbitmaps):
             current_bma = <BitmaskArray?>objs[i]
+            if (
+                    current_bma.ndim != expected_ndim
+                    or current_bma.strides[0] != expected_stride0
+                    or (
+                        expected_ndim == 2 and (
+                            current_bma.shape[1] != expected_shape1
+                            or current_bma.strides[1] != expected_stride1
+                        )
+                    )
+            ):
+                free(bitmaps)
+                raise NotImplementedError(
+                    "BitmaskArray.concatenate does not support broadcasting"
+                )
             total_bits += current_bma.bitmap.size_bits
             bitmaps[i] = &current_bma.bitmap
+            dim0shape += current_bma.shape[0]
 
         # Bypass __init__ calls
         cdef BitmaskArray bma = BitmaskArray.__new__(BitmaskArray)
@@ -357,11 +400,15 @@ cdef class BitmaskArray:
         free(bitmaps)
 
         bma.bitmap = bitmap
-        if second_dim != 0:
-            bma.array_shape = tuple((total_bits // second_dim, second_dim))
-        else:
-            bma.array_shape = tuple((total_bits,))
         bma.buffer_owner = True
+
+        bma.ndim = expected_ndim
+        bma.shape[0] = dim0shape  # only allowed because of axis=0 assumption
+        bma.strides[0] = expected_stride0
+        if expected_ndim == 2:
+            bma.shape[1] = expected_shape1
+            bma.strides[1] = expected_stride1
+
         bma.parent = None
 
         return bma
@@ -446,8 +493,10 @@ cdef class BitmaskArray:
         BitmapInvert(&self_.bitmap, &bitmap)
 
         bma.bitmap = bitmap
-        bma.array_shape = self.array_shape
         bma.buffer_owner = True
+        bma.ndim = self_.ndim
+        bma.shape = self_.shape
+        bma.strides = self_.strides
         bma.parent = None
 
         return bma
@@ -461,7 +510,10 @@ cdef class BitmaskArray:
             # TODO: maybe should return Self here instead of ndarray
             other_bma = other
             if self_.bitmap.size_bits == 0:
-                return np.empty(dtype=bool).reshape(self.array_shape)
+                result = np.empty([], dtype=bool)
+                if self_.ndim == 2:
+                    return result.reshape(self_.shape[0], self_.shape[1])
+                return result
 
             if self_.bitmap.size_bits != other_bma.bitmap.size_bits:
                 raise ValueError("bitmaps are not equal size")
@@ -477,7 +529,10 @@ cdef class BitmaskArray:
                 bitmap.size_bits
             )
             ArrowBitmapReset(&bitmap)
-            return result.reshape(self.array_shape)
+
+            if self_.ndim == 2:
+                return result.reshape(self_.shape[0], self_.shape[1])
+            return result
 
         return self.to_numpy() & other
 
@@ -490,7 +545,10 @@ cdef class BitmaskArray:
             # TODO: maybe should return Self here instead of ndarray
             other_bma = other
             if self_.bitmap.size_bits == 0:
-                return np.empty(dtype=bool).reshape(self.array_shape)
+                result = np.empty([], dtype=bool)
+                if self_.ndim == 2:
+                    return result.reshape(self_.shape[0], self_.shape[1])
+                return result
 
             if self_.bitmap.size_bits != other_bma.bitmap.size_bits:
                 raise ValueError("bitmaps are not equal size")
@@ -506,7 +564,10 @@ cdef class BitmaskArray:
                 bitmap.size_bits
             )
             ArrowBitmapReset(&bitmap)
-            return result.reshape(self.array_shape)
+
+            if self_.ndim == 2:
+                return result.reshape(self_.shape[0], self_.shape[1])
+            return result
 
         return self.to_numpy() | other
 
@@ -519,7 +580,10 @@ cdef class BitmaskArray:
             # TODO: maybe should return Self here instead of ndarray
             other_bma = other
             if self_.bitmap.size_bits == 0:
-                return np.empty(dtype=bool).reshape(self.array_shape)
+                result = np.empty([], dtype=bool)
+                if self_.ndim == 2:
+                    return result.reshape(self_.shape[0], self_.shape[1])
+                return result
 
             if self_.bitmap.size_bits != other_bma.bitmap.size_bits:
                 raise ValueError("bitmaps are not equal size")
@@ -535,7 +599,9 @@ cdef class BitmaskArray:
                 bitmap.size_bits
             )
             ArrowBitmapReset(&bitmap)
-            return result.reshape(self.array_shape)
+            if self_.ndim == 2:
+                return result.reshape(self_.shape[0], self_.shape[1])
+            return result
 
         return self.to_numpy() ^ other
 
@@ -543,7 +609,10 @@ cdef class BitmaskArray:
         cdef BitmaskArray self_ = self
         state = {
             "parent": self.parent,
-            "array_shape": self.array_shape,
+            "ndim": self_.ndim,
+            "shape0": self_.shape[0],
+            "stride0": self_.strides[0],
+            "n_consumers": self_.n_consumers,
             "buffer_owner": self_.buffer_owner,
             # Private ArrowBitmap attributes below
             "bitmap.buffer.size_bytes": self_.bitmap.buffer.size_bytes,
@@ -551,6 +620,18 @@ cdef class BitmaskArray:
             "bitmap.size_bits": self_.bitmap.size_bits
         }
 
+        if self_.ndim == 2:
+            state["shape1"] = self_.shape[1]
+            state["stride1"] = self_.strides[1]
+
+        # memview should only exist when n_consumers > 0
+        if self_.n_consumers > 0:
+            memview_buf_data = bytearray(len(self))
+            for i in range(len(self)):
+                memview_buf_data[i] = self_.memview_buf[i]
+
+            state["memview_buf_data"] = memview_buf_data
+
         # Only parents own data
         if self_.buffer_owner:
             bitmap_data = bytearray(self_.bitmap.buffer.size_bytes)
@@ -565,12 +646,26 @@ cdef class BitmaskArray:
         cdef ArrowBitmap bitmap
         cdef BitmaskArray self_ = self, other
         self.parent = state["parent"]
-        self.array_shape = state["array_shape"]
+        self_.ndim = state["ndim"]
+        self_.shape[0] = state["shape0"]
+        self_.strides[0] = state["stride0"]
+        self_.n_consumers = state["n_consumers"]
         self_.buffer_owner = state["buffer_owner"]
 
         nbytes = state["bitmap.buffer.size_bytes"]
         capacity_bytes = state["bitmap.buffer.capacity_bytes"]
         nbits = state["bitmap.size_bits"]
+
+        if self_.ndim == 2:
+            self_.shape[1] = state["shape1"]
+            self_.strides[1] = state["stride1"]
+
+        if self_.n_consumers > 0:
+            self_.memview_buf = <uint8_t *>malloc(nbits)
+            memview_buf_data = state["memview_buf_data"]
+            for i in range(nbits):
+                self_.memview_buf[i] = memview_buf_data[i]
+
         if not self_.buffer_owner:
             other = self.parent
             self_.bitmap = other.bitmap
@@ -599,6 +694,38 @@ cdef class BitmaskArray:
         for i in range(self_.bitmap.size_bits):
             yield bool(ArrowBitGet(self_.bitmap.buffer.data, i))
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cdef BitmaskArray self_ = self
+
+        if self_.n_consumers == 0:
+            self_.memview_buf = <uint8_t*>malloc(self_.bitmap.size_bits)
+            ArrowBitsUnpackInt8(
+                self_.bitmap.buffer.data,
+                0,
+                self_.bitmap.size_bits,
+                <const int8_t*>self_.memview_buf
+            )
+
+        buffer.buf = self_.memview_buf
+        buffer.format = "?"
+        buffer.internal = NULL
+        buffer.itemsize = 1
+        buffer.len = self_.bitmap.size_bits
+        buffer.ndim = self_.ndim
+        buffer.obj = self
+        buffer.readonly = 1
+        buffer.shape = self_.shape
+        buffer.strides = self_.strides
+        buffer.suboffsets = NULL
+
+        self_.n_consumers += 1
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cdef BitmaskArray self_ = self
+        self_.n_consumers -= 1
+        if self_.n_consumers == 0:
+            free(self_.memview_buf)
+
     @property
     def size(self) -> int:
         return self.bitmap.size_bits
@@ -619,12 +746,15 @@ cdef class BitmaskArray:
     @property
     def shape(self):
         """Strictly for NumPy compat in mask_ops"""
-        return self.array_shape
+        cdef BitmaskArray self_ = self
+        if self_.ndim == 1:
+            return tuple((self_.shape[0],))
+        return tuple((self_.shape[0], self_.shape[1]))
 
     @property
     def dtype(self):
         """Strictly for NumPy compat in mask_ops"""
-        return bool
+        return np.dtype("bool")
 
     def any(self) -> bool:
         return BitmapAny(&self.bitmap)
@@ -663,8 +793,14 @@ cdef class BitmaskArray:
             raise ValueError("take_1d does not support negative indexing")
 
         bma.bitmap = bitmap
-        bma.array_shape = tuple((indices.shape[0],))
         bma.buffer_owner = True
+
+        bma.ndim = self_.ndim
+        bma.shape[0] = indices.shape[0]
+        bma.strides = self_.strides
+
+        bma.parent = None
+
         return bma
 
     def copy(self):
@@ -677,6 +813,7 @@ cdef class BitmaskArray:
         ArrowBitsUnpackInt8(buf, 0, size, <const int8_t*>&out[0])
 
     def to_numpy(self) -> ndarray:
+        cdef BitmaskArray self_ = self
         cdef ndarray[uint8_t] result = np.empty(self.bitmap.size_bits, dtype=bool)
         BitmaskArray.buffer_to_array_1d(
             result,
@@ -684,4 +821,6 @@ cdef class BitmaskArray:
             self.bitmap.size_bits
         )
 
-        return result.reshape(self.array_shape)
+        if self_.ndim == 2:
+            return result.reshape(self_.shape[0], self_.shape[1])
+        return result
diff --git a/pandas/_libs/hashtable.pyi b/pandas/_libs/hashtable.pyi
index 2bc6d74fe6aee..cd51875eb3df1 100644
--- a/pandas/_libs/hashtable.pyi
+++ b/pandas/_libs/hashtable.pyi
@@ -6,6 +6,7 @@ from typing import (
 
 import numpy as np
 
+from pandas._libs.arrays import BitmaskArray
 from pandas._typing import npt
 
 def unique_label_indices(
@@ -239,7 +240,7 @@ def mode(
 def value_count(
     values: np.ndarray,
     dropna: bool,
-    mask: npt.NDArray[np.bool_] | None = ...,
+    mask: npt.NDArray[np.bool_] | BitmaskArray | None = ...,
 ) -> tuple[np.ndarray, npt.NDArray[np.int64]]: ...  # np.ndarray[same-as-values]
 
 # arr and values should have same dtype
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 1cf5d734705af..4a94e5c256eb3 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -665,7 +665,7 @@ cdef class {{name}}HashTable(HashTable):
         rmd = result_mask.data
 
         if use_mask:
-            mask_values = mask.view("uint8")
+            mask_values = mask
 
         if use_na_value:
             # We need this na_value2 because we want to allow users
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index b4c592af9ff5f..53a75754a49c2 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -21,6 +21,7 @@
     iNaT,
     lib,
 )
+from pandas._libs.arrays import BitmaskArray
 from pandas._typing import (
     AnyArrayLike,
     ArrayLike,
@@ -424,7 +425,7 @@ def nunique_ints(values: ArrayLike) -> int:
     return result
 
 
-def unique_with_mask(values, mask: npt.NDArray[np.bool_] | None = None):
+def unique_with_mask(values, mask: npt.NDArray[np.bool_] | BitmaskArray | None = None):
     """See algorithms.unique for docs. Takes a mask for masked arrays."""
     values = _ensure_arraylike(values, func_name="unique")
 
@@ -442,10 +443,9 @@ def unique_with_mask(values, mask: npt.NDArray[np.bool_] | None = None):
         return uniques
 
     else:
-        uniques, mask = table.unique(values, mask=mask)
+        uniques, np_mask = table.unique(values, mask=mask)
         uniques = _reconstruct_data(uniques, original.dtype, original)
-        assert mask is not None  # for mypy
-        return uniques, mask.astype("bool")
+        return uniques, np_mask.astype("bool")
 
 
 unique1d = unique
@@ -550,7 +550,7 @@ def factorize_array(
     use_na_sentinel: bool = True,
     size_hint: int | None = None,
     na_value: object = None,
-    mask: npt.NDArray[np.bool_] | None = None,
+    mask: npt.NDArray[np.bool_] | BitmaskArray | None = None,
 ) -> tuple[npt.NDArray[np.intp], np.ndarray]:
     """
     Factorize a numpy array to codes and uniques.
@@ -946,7 +946,9 @@ def value_counts_internal(
 
 # Called once from SparseArray, otherwise could be private
 def value_counts_arraylike(
-    values: np.ndarray, dropna: bool, mask: npt.NDArray[np.bool_] | None = None
+    values: np.ndarray,
+    dropna: bool,
+    mask: npt.NDArray[np.bool_] | BitmaskArray | None = None,
 ) -> tuple[ArrayLike, npt.NDArray[np.int64]]:
     """
     Parameters
@@ -970,7 +972,7 @@ def value_counts_arraylike(
 
         if dropna:
             mask = keys != iNaT
-            keys, counts = keys[mask], counts[mask]
+            keys, counts = keys[mask], counts[mask]  # type: ignore[index]
 
     res_keys = _reconstruct_data(keys, original.dtype, original)
     return res_keys, counts
@@ -1293,7 +1295,9 @@ def take(
     ...      fill_value=-10)
     array([ 10,  10, -10])
     """
-    if not isinstance(arr, (np.ndarray, ABCExtensionArray, ABCIndex, ABCSeries)):
+    if not isinstance(
+        arr, (np.ndarray, ABCExtensionArray, ABCIndex, ABCSeries, BitmaskArray)
+    ):
         # GH#52981
         warnings.warn(
             "pd.api.extensions.take accepting non-standard inputs is deprecated "
diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py
index 3e34fb03d657e..2149d168cf898 100644
--- a/pandas/core/array_algos/masked_reductions.py
+++ b/pandas/core/array_algos/masked_reductions.py
@@ -13,11 +13,11 @@
 import numpy as np
 
 from pandas._libs import missing as libmissing
-from pandas._libs.arrays import BitmaskArray
 
 from pandas.core.nanops import check_below_min_count
 
 if TYPE_CHECKING:
+    from pandas._libs.arrays import BitmaskArray
     from pandas._typing import AxisInt
 
 
@@ -60,7 +60,7 @@ def _reductions(
         ):
             return libmissing.NA
 
-        return func(values, where=(~mask).to_numpy(), axis=axis, **kwargs)
+        return func(values, where=~mask, axis=axis, **kwargs)
 
 
 def sum(
@@ -119,10 +119,7 @@ def _minmax(
         else:
             return func(values, axis=axis)
     else:
-        if isinstance(mask, BitmaskArray):
-            subset = values[(~mask).to_numpy()]
-        else:
-            subset = values[~mask]
+        subset = values[~mask]  # type: ignore[index]
         if subset.size:
             return func(subset, axis=axis)
         else:
diff --git a/pandas/core/array_algos/quantile.py b/pandas/core/array_algos/quantile.py
index ee6f00b219a15..f6ea0b3fff3d6 100644
--- a/pandas/core/array_algos/quantile.py
+++ b/pandas/core/array_algos/quantile.py
@@ -10,6 +10,7 @@
 )
 
 if TYPE_CHECKING:
+    from pandas._libs.arrays import BitmaskArray
     from pandas._typing import (
         ArrayLike,
         Scalar,
@@ -43,7 +44,7 @@ def quantile_compat(
 
 def quantile_with_mask(
     values: np.ndarray,
-    mask: npt.NDArray[np.bool_],
+    mask: npt.NDArray[np.bool_] | BitmaskArray,
     fill_value,
     qs: npt.NDArray[np.float64],
     interpolation: str,
@@ -80,7 +81,7 @@ def quantile_with_mask(
     if values.ndim == 1:
         # unsqueeze, operate, re-squeeze
         values = np.atleast_2d(values)
-        mask = np.atleast_2d(mask)
+        mask = np.atleast_2d(mask)  # type: ignore[arg-type]
         res_values = quantile_with_mask(values, mask, fill_value, qs, interpolation)
         return res_values[0]
 
@@ -157,7 +158,7 @@ def _nanpercentile(
     qs: npt.NDArray[np.float64],
     *,
     na_value,
-    mask: npt.NDArray[np.bool_],
+    mask: npt.NDArray[np.bool_] | BitmaskArray,
     interpolation: str,
 ):
     """
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index ae04f4a1174b6..9e5180b3cad4e 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -216,13 +216,13 @@ def pad_or_backfill(
         limit_area: Literal["inside", "outside"] | None = None,
         copy: bool = True,
     ) -> Self:
-        mask = self._mask.to_numpy()
+        mask = self._mask
 
         if mask.any():
             func = missing.get_fill_func(method, ndim=self.ndim)
 
             npvalues = self._data.T
-            new_mask = mask.T
+            new_mask = mask.to_numpy().T
             if copy:
                 npvalues = npvalues.copy()
                 new_mask = new_mask.copy()
@@ -244,7 +244,7 @@ def fillna(
     ) -> Self:
         value, method = validate_fillna_kwargs(value, method)
 
-        mask = self._mask.to_numpy()
+        mask = self._mask
 
         value = missing.check_value_size(value, mask, len(self))
 
@@ -252,7 +252,7 @@ def fillna(
             if method is not None:
                 func = missing.get_fill_func(method, ndim=self.ndim)
                 npvalues = self._data.T
-                new_mask = mask.T
+                new_mask = mask.to_numpy().T
                 if copy:
                     npvalues = npvalues.copy()
                     new_mask = new_mask.copy()
@@ -368,7 +368,7 @@ def swapaxes(self, axis1, axis2) -> Self:
 
     def delete(self, loc, axis: AxisInt = 0) -> Self:
         data = np.delete(self._data, loc, axis=axis)
-        mask = np.delete(self._mask.to_numpy(), loc, axis=axis)
+        mask = np.delete(self._mask, loc, axis=axis)  # type: ignore[call-overload]
         return self._simple_new(data, mask)
 
     def reshape(self, *args, **kwargs) -> Self:
@@ -414,7 +414,7 @@ def round(self, decimals: int = 0, *args, **kwargs):
         values = np.round(self._data, decimals=decimals, **kwargs)
 
         # Usually we'll get same type as self, but ndarray[bool] casts to float
-        return self._maybe_mask_result(values, self._mask.to_numpy())
+        return self._maybe_mask_result(values, self._mask)
 
     # ------------------------------------------------------------------
     # Unary Methods
@@ -520,7 +520,7 @@ def to_numpy(
             with warnings.catch_warnings():
                 warnings.filterwarnings("ignore", category=RuntimeWarning)
                 data = self._data.astype(dtype)
-            data[self._mask.to_numpy()] = na_value
+            data[self._mask] = na_value
         else:
             with warnings.catch_warnings():
                 warnings.filterwarnings("ignore", category=RuntimeWarning)
@@ -563,9 +563,9 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
                 data = self._data.astype(dtype.numpy_dtype, copy=copy)
             # mask is copied depending on whether the data was copied, and
             # not directly depending on the `copy` keyword
-            mask = self._mask if data is self._data else self._mask.to_numpy()
+            mask = self._mask if data is self._data else self._mask.copy()
             cls = dtype.construct_array_type()
-            return cls(data, mask, copy=False)  # type: ignore[arg-type]
+            return cls(data, mask, copy=False)
 
         if isinstance(dtype, ExtensionDtype):
             eacls = dtype.construct_array_type()
@@ -637,7 +637,7 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
         inputs2 = []
         for x in inputs:
             if isinstance(x, BaseMaskedArray):
-                mask |= x._mask.to_numpy()
+                mask |= x._mask
                 inputs2.append(x._data)
             else:
                 inputs2.append(x)
@@ -699,9 +699,7 @@ def _propagate_mask(
         self, mask: npt.NDArray[np.bool_] | BitmaskArray | None, other
     ) -> npt.NDArray[np.bool_]:
         if mask is None:
-            mask = (
-                self._mask.to_numpy()
-            )  # TODO: need test for BooleanArray needing a copy
+            mask = self._mask.copy()  # TODO: need test for BooleanArray needing a copy
             if other is libmissing.NA:
                 # GH#45421 don't alter inplace
                 mask = mask | True
@@ -788,21 +786,21 @@ def _arith_method(self, other, op):
 
         if op_name == "pow":
             # 1 ** x is 1.
-            mask = np.where((self._data == 1) & (~self._mask).to_numpy(), False, mask)
+            mask = np.where((self._data == 1) & ~self._mask, False, mask)
             # x ** 0 is 1.
             if omask is not None:
-                mask = np.where((other == 0) & (~omask).to_numpy(), False, mask)
+                mask = np.where((other == 0) & ~omask, False, mask)
             elif other is not libmissing.NA:
                 mask = np.where(other == 0, False, mask)
 
         elif op_name == "rpow":
             # 1 ** x is 1.
             if omask is not None:
-                mask = np.where((other == 1) & (~omask).to_numpy(), False, mask)
+                mask = np.where((other == 1) & ~omask, False, mask)
             elif other is not libmissing.NA:
                 mask = np.where(other == 1, False, mask)
             # x ** 0 is 1.
-            mask = np.where((self._data == 0) & (~self._mask).to_numpy(), False, mask)
+            mask = np.where((self._data == 0) & ~self._mask, False, mask)
 
         return self._maybe_mask_result(result, mask)
 
@@ -814,7 +812,7 @@ def _cmp_method(self, other, op) -> BooleanArray:
         mask = None
 
         if isinstance(other, BaseMaskedArray):
-            other, mask = other._data, other._mask.to_numpy()
+            other, mask = other._data, other._mask
 
         elif is_list_like(other):
             other = np.asarray(other)
@@ -849,7 +847,9 @@ def _cmp_method(self, other, op) -> BooleanArray:
         return BooleanArray(result, mask, copy=False)
 
     def _maybe_mask_result(
-        self, result: np.ndarray | tuple[np.ndarray, np.ndarray], mask: np.ndarray
+        self,
+        result: np.ndarray | tuple[np.ndarray, np.ndarray],
+        mask: np.ndarray | BitmaskArray,
     ):
         """
         Parameters
@@ -979,7 +979,7 @@ def isin(self, values) -> BooleanArray:  # type: ignore[override]
 
             # For now, NA does not propagate so set result according to presence of NA,
             # see https://github.com/pandas-dev/pandas/pull/38379 for some discussion
-            result[self._mask.to_numpy()] = values_have_NA
+            result[self._mask] = values_have_NA
 
         mask = np.zeros(self._data.shape, dtype=bool)
         return BooleanArray(result, mask, copy=False)
@@ -997,7 +997,7 @@ def unique(self) -> Self:
         -------
         uniques : BaseMaskedArray
         """
-        uniques, mask = algos.unique_with_mask(self._data, self._mask.to_numpy())
+        uniques, mask = algos.unique_with_mask(self._data, self._mask)
         return self._simple_new(uniques, mask)
 
     @doc(ExtensionArray.searchsorted)
@@ -1023,7 +1023,7 @@ def factorize(
         use_na_sentinel: bool = True,
     ) -> tuple[np.ndarray, ExtensionArray]:
         arr = self._data
-        mask = self._mask.to_numpy()
+        mask = self._mask
 
         # Use a sentinel for na; recode and add NA to uniques if necessary below
         codes, uniques = factorize_array(arr, use_na_sentinel=True, mask=mask)
@@ -1039,7 +1039,7 @@ def factorize(
             size = len(uniques) + 1
         uniques_mask = np.zeros(size, dtype=bool)
         if not use_na_sentinel and has_na:
-            na_index = mask.argmax()
+            na_index = mask.to_numpy().argmax()
             # Insert na with the proper code
             if na_index == 0:
                 na_code = np.intp(0)
@@ -1082,7 +1082,7 @@ def value_counts(self, dropna: bool = True) -> Series:
         from pandas.arrays import IntegerArray
 
         keys, value_counts = algos.value_counts_arraylike(
-            self._data, dropna=True, mask=self._mask.to_numpy()
+            self._data, dropna=True, mask=self._mask
         )
 
         if dropna:
@@ -1116,8 +1116,8 @@ def equals(self, other) -> bool:
         if not np.array_equal(self._mask.to_numpy(), other._mask.to_numpy()):
             return False
 
-        left = self._data[(~self._mask).to_numpy()]
-        right = other._data[(~other._mask).to_numpy()]
+        left = self._data[~self._mask]  # type: ignore[call-overload]
+        right = other._data[~other._mask]
         return array_equivalent(left, right, strict_nan=True, dtype_equal=True)
 
     def _quantile(
@@ -1133,7 +1133,7 @@ def _quantile(
         """
         res = quantile_with_mask(
             self._data,
-            mask=self._mask.to_numpy(),
+            mask=self._mask,
             # TODO(GH#40932): na_value_for_dtype(self.dtype.numpy_dtype)
             #  instead of np.nan
             fill_value=np.nan,
@@ -1172,7 +1172,7 @@ def _reduce(
         else:
             # median, skew, kurt, sem
             data = self._data
-            mask = self._mask.to_numpy()
+            mask = self._mask
             op = getattr(nanops, f"nan{name}")
             axis = kwargs.pop("axis", None)
             result = op(data, axis=axis, skipna=skipna, mask=mask, **kwargs)
@@ -1182,8 +1182,8 @@ def _reduce(
                 return self._wrap_na_result(name=name, axis=0, mask_size=(1,))
             else:
                 result = result.reshape(1)
-                mask = np.zeros(1, dtype=bool)
-                return self._maybe_mask_result(result, mask)
+                np_mask = np.zeros(1, dtype=bool)
+                return self._maybe_mask_result(result, np_mask)
 
         if isna(result):
             return libmissing.NA
@@ -1406,7 +1406,7 @@ def any(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
             # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
             np.putmask(
                 values,
-                self._mask.to_numpy(),
+                self._mask,  # type: ignore[arg-type]
                 self._falsey_value,  # type: ignore[arg-type]
             )
         else:
@@ -1501,7 +1501,7 @@ def all(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
             # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
             np.putmask(
                 values,
-                self._mask.to_numpy(),
+                self._mask,  # type: ignore[arg-type]
                 self._truthy_value,  # type: ignore[arg-type]
             )
         else:
@@ -1547,9 +1547,9 @@ def _groupby_op(
         op = WrappedCythonOp(how=how, kind=kind, has_dropped_na=has_dropped_na)
 
         # libgroupby functions are responsible for NOT altering mask
-        mask = self._mask.to_numpy()
+        mask = self._mask
         if op.kind != "aggregate":
-            result_mask = mask.copy()
+            result_mask = mask.to_numpy()
         else:
             result_mask = np.zeros(ngroups, dtype=bool)
 
@@ -1558,7 +1558,7 @@ def _groupby_op(
             min_count=min_count,
             ngroups=ngroups,
             comp_ids=ids,
-            mask=mask,
+            mask=mask.to_numpy(),
             result_mask=result_mask,
             **kwargs,
         )
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 8c1d5075480b6..2692ae29fe9d1 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -143,6 +143,8 @@ class providing the base-class of operations.
 if TYPE_CHECKING:
     from typing import Any
 
+    from pandas._libs.arrays import BitmaskArray
+
     from pandas.core.window import (
         ExpandingGroupby,
         ExponentialMovingWindowGroupby,
@@ -4387,8 +4389,9 @@ def post_processor(
 
         def blk_func(values: ArrayLike) -> ArrayLike:
             orig_vals = values
+            mask: np.ndarray | BitmaskArray
             if isinstance(values, BaseMaskedArray):
-                mask = values._mask.to_numpy()
+                mask = values._mask
                 result_mask = np.zeros((ngroups, nqs), dtype=np.bool_)
             else:
                 mask = isna(values)
diff --git a/pandas/core/missing.py b/pandas/core/missing.py
index 58b0e2907b8ce..c3b258481a2f1 100644
--- a/pandas/core/missing.py
+++ b/pandas/core/missing.py
@@ -46,10 +46,12 @@
 )
 
 if TYPE_CHECKING:
+    from pandas._libs.arrays import BitmaskArray
+
     from pandas import Index
 
 
-def check_value_size(value, mask: npt.NDArray[np.bool_], length: int):
+def check_value_size(value, mask: npt.NDArray[np.bool_] | BitmaskArray, length: int):
     """
     Validate the size of the values passed to ExtensionArray.fillna.
     """
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 78ba95e959042..140a3024a8684 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -2456,8 +2456,8 @@ def _factorize_keys(
 
     if isinstance(lk, BaseMaskedArray):
         assert isinstance(rk, BaseMaskedArray)
-        llab = rizer.factorize(lk._data, mask=lk._mask.to_numpy())
-        rlab = rizer.factorize(rk._data, mask=rk._mask.to_numpy())
+        llab = rizer.factorize(lk._data, mask=lk._mask)
+        rlab = rizer.factorize(rk._data, mask=rk._mask)
     elif isinstance(lk, ArrowExtensionArray):
         assert isinstance(rk, ArrowExtensionArray)
         # we can only get here with numeric dtypes
diff --git a/pandas/tests/arrays/boolean/test_logical.py b/pandas/tests/arrays/boolean/test_logical.py
index 4cdaf3a90b21d..66c117ea3fc66 100644
--- a/pandas/tests/arrays/boolean/test_logical.py
+++ b/pandas/tests/arrays/boolean/test_logical.py
@@ -238,8 +238,8 @@ def test_no_masked_assumptions(self, other, all_logical_operators):
         tm.assert_extension_array_equal(result, expected)
 
         if isinstance(other, BooleanArray):
-            other._data[other._mask.to_numpy()] = True
-            a._data[a._mask.to_numpy()] = False
+            other._data[other._mask] = True
+            a._data[a._mask] = False
 
             result = getattr(a, all_logical_operators)(other)
             expected = getattr(b, all_logical_operators)(other)
diff --git a/pandas/tests/arrays/masked/test_bitmask.py b/pandas/tests/arrays/masked/test_bitmask.py
index 746a64c626aef..a7b80c9a2c0c5 100644
--- a/pandas/tests/arrays/masked/test_bitmask.py
+++ b/pandas/tests/arrays/masked/test_bitmask.py
@@ -12,18 +12,22 @@
 @pytest.mark.parametrize(
     "array,expected",
     [
-        (np.array([False, False]), bytes([0x0])),
-        (np.array([True, False]), bytes([0x1])),
-        (np.array([False, True]), bytes([0x2])),
-        (np.array([True, True]), bytes([0x3])),
-        (np.array([True, False] * 8), bytes([0x55, 0x55])),
+        pytest.param(np.array([False, False]), bytes([0x0]), id="all_false"),
+        pytest.param(np.array([True, False]), bytes([0x1]), id="first_true"),
+        pytest.param(np.array([False, True]), bytes([0x2]), id="second_true"),
+        pytest.param(np.array([True, True]), bytes([0x3]), id="all_true"),
+        pytest.param(np.array([True, False] * 8), bytes([0x55, 0x55]), id="multibyte"),
+        pytest.param(
+            np.array([[False, False], [True, True], [False, False]])[:, 0],
+            [False, True, False],
+            id="non-contiguous",
+        ),
     ],
 )
 def test_constructor_ndarray(array, expected):
     bma = BitmaskArray(array)
     assert bma.bytes == expected
     assert not bma.parent
-    assert bma.array_shape == array.shape
 
 
 @pytest.mark.parametrize(
@@ -40,7 +44,6 @@ def test_constructor_bitmap(parent, expected):
     bma = BitmaskArray(parent)
     assert bma.bytes == expected
     assert bma.parent is parent
-    assert bma.array_shape == parent.shape
 
 
 def test_len():
@@ -52,7 +55,6 @@ def test_repr_no_parent():
     bma = BitmaskArray(np.array([True, False, False]))
     result = repr(bma)
     assert "parent: None" in result
-    assert "shape: (3,)" in result
     assert "data: b'\\x01'" in result
 
 
@@ -60,9 +62,8 @@ def test_repr_parent():
     parent = BitmaskArray(np.array([False, False, True]))
     bma = BitmaskArray(parent)
     result = repr(bma)
-    parent_id = hex(id(parent))
-    assert f"parent: <pandas._libs.arrays.BitmaskArray object at {parent_id}" in result
-    assert "shape: (3,)" in result
+    parent_repr = object.__repr__(parent)
+    assert parent_repr in result
     assert "data: b'\\x04'" in result
 
 
@@ -112,7 +113,6 @@ def test_getitem_null_slice():
     bma = BitmaskArray(np.array([True, False, True]))
     result = bma[:]
 
-    assert result.array_shape == bma.array_shape
     assert not result.parent
 
     assert result.bytes[0] & 1 == 1
@@ -309,7 +309,7 @@ def test_nbytes(data, expected):
 )
 def test_shape(data):
     bma = BitmaskArray(data)
-    assert bma.array_shape == data.shape
+    assert bma.shape == data.shape
 
 
 @pytest.mark.parametrize(
@@ -357,21 +357,21 @@ def test_sum(data, expected):
 def test_take1d():
     bma = BitmaskArray(np.array([True, False, True, False]))
 
-    result1 = bma.take_1d(np.array([0]), axis=0)
+    result1 = bma.take_1d(np.array([0], dtype=np.int64), axis=0)
     assert (result1.bytes[0] & 0x1) == 1
 
-    result2 = bma.take_1d(np.array([1]), axis=0)
+    result2 = bma.take_1d(np.array([1], dtype=np.int64), axis=0)
     assert (result2.bytes[0] & 0x1) == 0
 
-    result3 = bma.take_1d(np.array([0, 1]), axis=0)
+    result3 = bma.take_1d(np.array([0, 1], dtype=np.int64), axis=0)
     assert (result3.bytes[0] & 0x1) == 1
     assert ((result3.bytes[0] >> 1) & 0x1) == 0
 
-    result4 = bma.take_1d(np.array([0, 0]), axis=0)
+    result4 = bma.take_1d(np.array([0, 0], dtype=np.int64), axis=0)
     assert (result4.bytes[0] & 0x1) == 1
     assert ((result4.bytes[0] >> 1) & 0x1) == 1
 
-    result5 = bma.take_1d(np.array([3, 2, 1, 0]), axis=0)
+    result5 = bma.take_1d(np.array([3, 2, 1, 0], dtype=np.int64), axis=0)
     assert (result5.bytes[0] & 0x1) == 0
     assert ((result5.bytes[0] >> 1) & 0x1) == 1
     assert ((result5.bytes[0] >> 2) & 0x1) == 0
@@ -419,3 +419,24 @@ def test_to_numpy(data):
 
     result = bma.to_numpy()
     tm.assert_numpy_array_equal(result, data)
+
+
+@pytest.mark.parametrize(
+    "array,expected",
+    [
+        pytest.param(np.array([False, False]), [False, False], id="all_false"),
+        pytest.param(np.array([True, False]), [True, False], id="first_true"),
+        pytest.param(np.array([False, True]), [False, True], id="second_true"),
+        pytest.param(np.array([True, True]), [True, True], id="all_true"),
+        pytest.param(np.array([True, False] * 8), [True, False] * 8, id="multibyte"),
+        pytest.param(
+            np.array([[False, False], [True, True], [False, False]])[:, 0],
+            [False, True, False],
+            id="non-contiguous",
+        ),
+    ],
+)
+def test_memoryview(array, expected):
+    bma = BitmaskArray(array)
+    vw = memoryview(bma)
+    assert vw.tolist() == expected
diff --git a/pandas/tests/arrays/masked_shared.py b/pandas/tests/arrays/masked_shared.py
index 78726b2a90471..2f025d039389d 100644
--- a/pandas/tests/arrays/masked_shared.py
+++ b/pandas/tests/arrays/masked_shared.py
@@ -16,7 +16,7 @@ def _compare_other(self, data, op, other):
         expected = pd.Series(op(data._data, other), dtype="boolean")
 
         # fill the nan locations
-        expected[data._mask.to_numpy()] = pd.NA
+        expected[data._mask.to_numpy()] = pd.NA  # TODO: have series accept memview
 
         tm.assert_series_equal(result, expected)
 

From a0d538a4a8ee835adbd26455b9ee3443aaff2c63 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 28 Aug 2023 09:57:50 -0400
Subject: [PATCH 103/126] fixups

---
 pandas/_libs/arrays.pyx                    |  2 +-
 pandas/core/arrays/masked.py               | 10 +---------
 pandas/tests/arrays/masked/test_bitmask.py |  2 +-
 3 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index df1de3cfb035f..010629d257547 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -469,7 +469,7 @@ cdef class BitmaskArray:
     def __getitem__(self, key):
         cdef Py_ssize_t ckey
         # to_numpy can be expensive, so try to avoid for simple cases
-        if isinstance(key, int):
+        if isinstance(key, int) and self.ndim == 1:
             ckey = key
             if ckey >= 0 and ckey < self.bitmap.size_bits:
                 return bool(ArrowBitGet(self.bitmap.buffer.data, ckey))
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 9e5180b3cad4e..445986e642828 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -191,21 +191,13 @@ def __getitem__(self, item: SequenceIndexer) -> Self:
     def __getitem__(self, item: PositionalIndexer) -> Self | Any:
         item = check_array_indexer(self, item)
 
-        # TODO: some of the numpy semantics for handling 2D indexing
-        # are not implemented in the bitmaskarray, hence the to_numpy()
-        # requirement, though that slows things down
-        np_mask = self._mask.to_numpy()
-        newmask = np_mask[item]
+        newmask = self._mask[item]
         if is_bool(newmask):
             # This is a scalar indexing
             if newmask:
                 return self.dtype.na_value
             return self._data[item]
 
-        # sending self._mask avoids copy of buffer
-        if np.array_equal(newmask, np_mask):
-            return self._simple_new(self._data[item], self._mask)
-
         return self._simple_new(self._data[item], newmask)
 
     def pad_or_backfill(
diff --git a/pandas/tests/arrays/masked/test_bitmask.py b/pandas/tests/arrays/masked/test_bitmask.py
index a7b80c9a2c0c5..42b471508864f 100644
--- a/pandas/tests/arrays/masked/test_bitmask.py
+++ b/pandas/tests/arrays/masked/test_bitmask.py
@@ -19,7 +19,7 @@
         pytest.param(np.array([True, False] * 8), bytes([0x55, 0x55]), id="multibyte"),
         pytest.param(
             np.array([[False, False], [True, True], [False, False]])[:, 0],
-            [False, True, False],
+            bytes([0x2]),
             id="non-contiguous",
         ),
     ],

From 9a97677f329a7a76e602867eb1f07950d435fa5c Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 28 Aug 2023 10:52:12 -0400
Subject: [PATCH 104/126] getitem fastpath for slice

---
 pandas/_libs/arrays.pyx                    | 28 ++++++++++++++++++++++
 pandas/tests/arrays/masked/test_bitmask.py | 12 ++++++++++
 2 files changed, 40 insertions(+)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 010629d257547..41363cbe666fc 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -7,6 +7,7 @@ import numpy as np
 
 cimport numpy as cnp
 from cpython cimport PyErr_Clear
+from cpython.slice cimport PySlice_Unpack
 from libc.stdlib cimport (
     free,
     malloc,
@@ -468,6 +469,11 @@ cdef class BitmaskArray:
 
     def __getitem__(self, key):
         cdef Py_ssize_t ckey
+        cdef Py_ssize_t start, stop, step
+        cdef BitmaskArray bma
+        cdef ArrowBitmap bitmap
+        cdef int64_t nbytes
+        cdef BitmaskArray self_ = self
         # to_numpy can be expensive, so try to avoid for simple cases
         if isinstance(key, int) and self.ndim == 1:
             ckey = key
@@ -475,6 +481,28 @@ cdef class BitmaskArray:
                 return bool(ArrowBitGet(self.bitmap.buffer.data, ckey))
         elif is_null_slice(key):
             return self.copy()
+        elif isinstance(key, slice):
+            # fastpath for slices that start at 0 and step 1 at a time
+            # towards a positive number.
+            # TODO: upstream generic ArrowBitsGet function in nanoarrow
+            PySlice_Unpack(key, &start, &stop, &step)
+            if start == 0 and stop > 0 and step == 1:
+                bma = BitmaskArray.__new__(BitmaskArray)
+                ArrowBitmapInit(&bitmap)
+                nbytes = (stop + 7) // 8
+                ArrowBitmapReserve(&bitmap, nbytes)
+                memcpy(bitmap.buffer.data, self_.bitmap.buffer.data, nbytes)
+                bitmap.buffer.size_bytes = nbytes
+                bitmap.size_bits = stop
+
+                bma.bitmap = bitmap
+                bma.buffer_owner = True
+                bma.ndim = self_.ndim
+                bma.shape = self_.shape
+                bma.strides = self_.strides
+                bma.parent = False
+
+                return bma
 
         return self.to_numpy()[key]
 
diff --git a/pandas/tests/arrays/masked/test_bitmask.py b/pandas/tests/arrays/masked/test_bitmask.py
index 42b471508864f..351cae23384ae 100644
--- a/pandas/tests/arrays/masked/test_bitmask.py
+++ b/pandas/tests/arrays/masked/test_bitmask.py
@@ -114,12 +114,24 @@ def test_getitem_null_slice():
     result = bma[:]
 
     assert not result.parent
+    assert len(result) == 3
 
     assert result.bytes[0] & 1 == 1
     assert (result.bytes[0] >> 1) & 1 == 0
     assert (result.bytes[0] >> 2) & 1 == 1
 
 
+def test_getitem_monotonic_slice():
+    bma = BitmaskArray(np.array([True, False, True]))
+    result = bma[slice(2)]
+
+    assert not result.parent
+    assert len(result) == 2
+
+    assert result.bytes[0] & 1 == 1
+    assert (result.bytes[0] >> 1) & 1 == 0
+
+
 @pytest.mark.parametrize(
     "indexer,expected",
     [

From 96f080d94f24b4ec5931b91823fa7525f146a5e7 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 28 Aug 2023 10:59:05 -0400
Subject: [PATCH 105/126] mypy fix

---
 pandas/core/arrays/masked.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 445986e642828..a4343970d50dc 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -198,6 +198,7 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:
                 return self.dtype.na_value
             return self._data[item]
 
+        assert not isinstance(newmask, bool)  # for mypy
         return self._simple_new(self._data[item], newmask)
 
     def pad_or_backfill(

From e35b769900eef034b8586cd3ae43c9b6bda414cc Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 28 Aug 2023 12:30:08 -0400
Subject: [PATCH 106/126] fix OOB memcpy

---
 pandas/_libs/arrays.pyx                    |  5 ++++-
 pandas/tests/arrays/masked/test_bitmask.py | 18 +++++++++++++-----
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 41363cbe666fc..aa65148d5f7cc 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -487,9 +487,12 @@ cdef class BitmaskArray:
             # TODO: upstream generic ArrowBitsGet function in nanoarrow
             PySlice_Unpack(key, &start, &stop, &step)
             if start == 0 and stop > 0 and step == 1:
+                nbytes = (stop + 7) // 8
+                if nbytes > self_.bitmap.size_bits:
+                    nbytes = self_.bitmap.size_bits
+
                 bma = BitmaskArray.__new__(BitmaskArray)
                 ArrowBitmapInit(&bitmap)
-                nbytes = (stop + 7) // 8
                 ArrowBitmapReserve(&bitmap, nbytes)
                 memcpy(bitmap.buffer.data, self_.bitmap.buffer.data, nbytes)
                 bitmap.buffer.size_bytes = nbytes
diff --git a/pandas/tests/arrays/masked/test_bitmask.py b/pandas/tests/arrays/masked/test_bitmask.py
index 351cae23384ae..ddbb9d87a9a2e 100644
--- a/pandas/tests/arrays/masked/test_bitmask.py
+++ b/pandas/tests/arrays/masked/test_bitmask.py
@@ -121,15 +121,23 @@ def test_getitem_null_slice():
     assert (result.bytes[0] >> 2) & 1 == 1
 
 
-def test_getitem_monotonic_slice():
+@pytest.mark.parametrize(
+    "indexer,mask,expected",
+    [
+        pytest.param(slice(2), bytes([0x3]), bytes([0x1]), id="basic_slice"),
+        pytest.param(
+            slice(1000), bytes([0x7]), bytes([0x05]), id="slice_exceeding_bounds"
+        ),
+    ],
+)
+def test_getitem_monotonic_slice(indexer, mask, expected):
     bma = BitmaskArray(np.array([True, False, True]))
-    result = bma[slice(2)]
+    result = bma[indexer]
 
     assert not result.parent
-    assert len(result) == 2
 
-    assert result.bytes[0] & 1 == 1
-    assert (result.bytes[0] >> 1) & 1 == 0
+    # the bits past the length of result are undefined, so explicitly mask them out
+    assert (result.bytes[0] & mask[0]) == expected[0]
 
 
 @pytest.mark.parametrize(

From 8149e03400438266f2266b235d578f095e8fb169 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 28 Aug 2023 14:33:06 -0400
Subject: [PATCH 107/126] fix slicing issue with memview

---
 pandas/_libs/arrays.pyx                    | 9 +++++----
 pandas/tests/arrays/masked/test_bitmask.py | 7 +++++++
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index aa65148d5f7cc..15ebecf55be6f 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -481,15 +481,16 @@ cdef class BitmaskArray:
                 return bool(ArrowBitGet(self.bitmap.buffer.data, ckey))
         elif is_null_slice(key):
             return self.copy()
-        elif isinstance(key, slice):
+        elif isinstance(key, slice) and self.ndim == 1:
             # fastpath for slices that start at 0 and step 1 at a time
             # towards a positive number.
             # TODO: upstream generic ArrowBitsGet function in nanoarrow
             PySlice_Unpack(key, &start, &stop, &step)
             if start == 0 and stop > 0 and step == 1:
+                if stop > self_.bitmap.size_bits:
+                    stop = self_.bitmap.size_bits
+
                 nbytes = (stop + 7) // 8
-                if nbytes > self_.bitmap.size_bits:
-                    nbytes = self_.bitmap.size_bits
 
                 bma = BitmaskArray.__new__(BitmaskArray)
                 ArrowBitmapInit(&bitmap)
@@ -501,7 +502,7 @@ cdef class BitmaskArray:
                 bma.bitmap = bitmap
                 bma.buffer_owner = True
                 bma.ndim = self_.ndim
-                bma.shape = self_.shape
+                bma.shape[0] = stop
                 bma.strides = self_.strides
                 bma.parent = False
 
diff --git a/pandas/tests/arrays/masked/test_bitmask.py b/pandas/tests/arrays/masked/test_bitmask.py
index ddbb9d87a9a2e..95d8f1d233e1d 100644
--- a/pandas/tests/arrays/masked/test_bitmask.py
+++ b/pandas/tests/arrays/masked/test_bitmask.py
@@ -460,3 +460,10 @@ def test_memoryview(array, expected):
     bma = BitmaskArray(array)
     vw = memoryview(bma)
     assert vw.tolist() == expected
+
+
+def test_bitmask_array_shape_from_sliced_bitmask():
+    orig_bma = BitmaskArray([True] * 100)
+    bma = BitmaskArray(orig_bma[:10])
+
+    assert bma.shape == (10,)

From 202de07da3a34c146523165834192ff252f89872 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 28 Aug 2023 15:03:21 -0400
Subject: [PATCH 108/126] fixups

---
 pandas/_libs/arrays.pyx                    | 10 ++++++----
 pandas/tests/arrays/masked/test_bitmask.py |  2 +-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 15ebecf55be6f..dcf0dceaf13f1 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -472,7 +472,7 @@ cdef class BitmaskArray:
         cdef Py_ssize_t start, stop, step
         cdef BitmaskArray bma
         cdef ArrowBitmap bitmap
-        cdef int64_t nbytes
+        cdef int64_t nbytes, nbits
         cdef BitmaskArray self_ = self
         # to_numpy can be expensive, so try to avoid for simple cases
         if isinstance(key, int) and self.ndim == 1:
@@ -488,13 +488,15 @@ cdef class BitmaskArray:
             PySlice_Unpack(key, &start, &stop, &step)
             if start == 0 and stop > 0 and step == 1:
                 if stop > self_.bitmap.size_bits:
-                    stop = self_.bitmap.size_bits
+                    nbits = self_.bitmap.size_bits
+                else:
+                    nbits = stop
 
                 nbytes = (stop + 7) // 8
 
                 bma = BitmaskArray.__new__(BitmaskArray)
                 ArrowBitmapInit(&bitmap)
-                ArrowBitmapReserve(&bitmap, nbytes)
+                ArrowBitmapReserve(&bitmap, nbits)
                 memcpy(bitmap.buffer.data, self_.bitmap.buffer.data, nbytes)
                 bitmap.buffer.size_bytes = nbytes
                 bitmap.size_bits = stop
@@ -502,7 +504,7 @@ cdef class BitmaskArray:
                 bma.bitmap = bitmap
                 bma.buffer_owner = True
                 bma.ndim = self_.ndim
-                bma.shape[0] = stop
+                bma.shape[0] = nbits
                 bma.strides = self_.strides
                 bma.parent = False
 
diff --git a/pandas/tests/arrays/masked/test_bitmask.py b/pandas/tests/arrays/masked/test_bitmask.py
index 95d8f1d233e1d..66e31eca67d53 100644
--- a/pandas/tests/arrays/masked/test_bitmask.py
+++ b/pandas/tests/arrays/masked/test_bitmask.py
@@ -463,7 +463,7 @@ def test_memoryview(array, expected):
 
 
 def test_bitmask_array_shape_from_sliced_bitmask():
-    orig_bma = BitmaskArray([True] * 100)
+    orig_bma = BitmaskArray(np.array([True] * 100))
     bma = BitmaskArray(orig_bma[:10])
 
     assert bma.shape == (10,)

From 73f438c7094bed3312e4f29fb3d8ff19b79867e7 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 28 Aug 2023 17:11:34 -0400
Subject: [PATCH 109/126] fixed memory issues with getitem fastpath

---
 pandas/_libs/arrays.pyx | 46 ++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index dcf0dceaf13f1..10f73cd7a3fa0 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -397,6 +397,7 @@ cdef class BitmaskArray:
 
         ArrowBitmapInit(&bitmap)
         ArrowBitmapReserve(&bitmap, total_bits)
+
         ConcatenateBitmapData(bitmaps, nbitmaps, &bitmap)
         free(bitmaps)
 
@@ -492,14 +493,14 @@ cdef class BitmaskArray:
                 else:
                     nbits = stop
 
-                nbytes = (stop + 7) // 8
+                nbytes = (nbits + 7) // 8
 
                 bma = BitmaskArray.__new__(BitmaskArray)
                 ArrowBitmapInit(&bitmap)
                 ArrowBitmapReserve(&bitmap, nbits)
                 memcpy(bitmap.buffer.data, self_.bitmap.buffer.data, nbytes)
                 bitmap.buffer.size_bytes = nbytes
-                bitmap.size_bits = stop
+                bitmap.size_bits = nbits
 
                 bma.bitmap = bitmap
                 bma.buffer_owner = True
@@ -557,10 +558,11 @@ cdef class BitmaskArray:
             BitmapAnd(&self_.bitmap, &other_bma.bitmap, &bitmap)
 
             result = np.empty(self_.bitmap.size_bits, dtype=bool)
-            BitmaskArray.buffer_to_array_1d(
-                result,
+            ArrowBitsUnpackInt8(
                 bitmap.buffer.data,
-                bitmap.size_bits
+                0,
+                bitmap.size_bits,
+                <int8_t*>&result[0]
             )
             ArrowBitmapReset(&bitmap)
 
@@ -592,10 +594,11 @@ cdef class BitmaskArray:
             BitmapOr(&self_.bitmap, &other_bma.bitmap, &bitmap)
 
             result = np.empty(self_.bitmap.size_bits, dtype=bool)
-            BitmaskArray.buffer_to_array_1d(
-                result,
+            ArrowBitsUnpackInt8(
                 bitmap.buffer.data,
-                bitmap.size_bits
+                0,
+                bitmap.size_bits,
+                <int8_t*>&result[0]
             )
             ArrowBitmapReset(&bitmap)
 
@@ -627,10 +630,11 @@ cdef class BitmaskArray:
             BitmapXor(&self_.bitmap, &other_bma.bitmap, &bitmap)
 
             result = np.empty(self_.bitmap.size_bits, dtype=bool)
-            BitmaskArray.buffer_to_array_1d(
-                result,
+            ArrowBitsUnpackInt8(
                 bitmap.buffer.data,
-                bitmap.size_bits
+                0,
+                bitmap.size_bits,
+                <int8_t*>&result[0]
             )
             ArrowBitmapReset(&bitmap)
             if self_.ndim == 2:
@@ -737,7 +741,7 @@ cdef class BitmaskArray:
                 self_.bitmap.buffer.data,
                 0,
                 self_.bitmap.size_bits,
-                <const int8_t*>self_.memview_buf
+                <int8_t*>self_.memview_buf
             )
 
         buffer.buf = self_.memview_buf
@@ -840,19 +844,15 @@ cdef class BitmaskArray:
     def copy(self):
         return BitmaskArray.copy_from_bitmaskarray(self)
 
-    @cython.boundscheck(False)  # TODO: Removing this causes an IndexError? Zero size?
-    @cython.wraparound(False)
-    @staticmethod
-    cdef void buffer_to_array_1d(uint8_t[:] out, const uint8_t* buf, Py_ssize_t size):
-        ArrowBitsUnpackInt8(buf, 0, size, <const int8_t*>&out[0])
-
     def to_numpy(self) -> ndarray:
         cdef BitmaskArray self_ = self
-        cdef ndarray[uint8_t] result = np.empty(self.bitmap.size_bits, dtype=bool)
-        BitmaskArray.buffer_to_array_1d(
-            result,
-            self.bitmap.buffer.data,
-            self.bitmap.size_bits
+        cdef ndarray[uint8_t] result = np.empty(self_.bitmap.size_bits, dtype=bool)
+
+        ArrowBitsUnpackInt8(
+            self_.bitmap.buffer.data,
+            0,
+            self_.bitmap.size_bits,
+            <int8_t*>cnp.PyArray_BYTES(result),
         )
 
         if self_.ndim == 2:

From e09743f1fc00d7865aecafcf996a234ad649acea Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 28 Aug 2023 18:14:09 -0400
Subject: [PATCH 110/126] fix copy

---
 pandas/_libs/arrays.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 10f73cd7a3fa0..ddc708af2411f 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -481,7 +481,7 @@ cdef class BitmaskArray:
             if ckey >= 0 and ckey < self.bitmap.size_bits:
                 return bool(ArrowBitGet(self.bitmap.buffer.data, ckey))
         elif is_null_slice(key):
-            return self.copy()
+            return self
         elif isinstance(key, slice) and self.ndim == 1:
             # fastpath for slices that start at 0 and step 1 at a time
             # towards a positive number.

From 3303be760bcb31dce9f92355fc705a4c77f4bbb4 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 29 Aug 2023 14:07:53 -0400
Subject: [PATCH 111/126] win/32bit support

---
 pandas/tests/arrays/masked/test_bitmask.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/masked/test_bitmask.py b/pandas/tests/arrays/masked/test_bitmask.py
index 66e31eca67d53..704c22b73e182 100644
--- a/pandas/tests/arrays/masked/test_bitmask.py
+++ b/pandas/tests/arrays/masked/test_bitmask.py
@@ -401,7 +401,7 @@ def test_take1d():
 def test_take1d_raises_not_axis0():
     bma = BitmaskArray(np.array([True, False, True]))
     with pytest.raises(NotImplementedError, match="only implemented for axis=0"):
-        bma.take_1d(np.array([1]), axis=1)
+        bma.take_1d(np.array([1], dtype=np.int64), axis=1)
 
 
 def test_take_1d_raises_empty_indices():

From 29873e442cc11d1a37a652036ef452a86c52df97 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 29 Aug 2023 17:25:03 -0400
Subject: [PATCH 112/126] NumPy compat

---
 pandas/tests/frame/indexing/test_where.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py
index 3d3df2d714ca4..3cc0f48613f08 100644
--- a/pandas/tests/frame/indexing/test_where.py
+++ b/pandas/tests/frame/indexing/test_where.py
@@ -4,6 +4,8 @@
 import numpy as np
 import pytest
 
+from pandas.compat.numpy import np_version_gte1p24
+
 from pandas.core.dtypes.common import is_scalar
 
 import pandas as pd
@@ -702,6 +704,11 @@ def test_where_categorical_filtering(self):
 
         tm.assert_equal(result, expected)
 
+    @pytest.mark.xfail(
+        not np_version_gte1p24,
+        reason="Changed NumPy behavior for >1D non-tuple sequence indexing",
+        strict=False,
+    )
     def test_where_ea_other(self):
         # GH#38729/GH#38742
         df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})

From 173b4cbe53ec787ee31408d5132623e93a1527a0 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 29 Aug 2023 18:42:14 -0400
Subject: [PATCH 113/126] test restructure

---
 pandas/tests/arrays/masked/test_bitmask.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/pandas/tests/arrays/masked/test_bitmask.py b/pandas/tests/arrays/masked/test_bitmask.py
index 704c22b73e182..9141587d27171 100644
--- a/pandas/tests/arrays/masked/test_bitmask.py
+++ b/pandas/tests/arrays/masked/test_bitmask.py
@@ -10,7 +10,7 @@
 
 
 @pytest.mark.parametrize(
-    "array,expected",
+    "data,expected",
     [
         pytest.param(np.array([False, False]), bytes([0x0]), id="all_false"),
         pytest.param(np.array([True, False]), bytes([0x1]), id="first_true"),
@@ -24,23 +24,24 @@
         ),
     ],
 )
-def test_constructor_ndarray(array, expected):
-    bma = BitmaskArray(array)
+def test_constructor_ndarray(data, expected):
+    bma = BitmaskArray(data)
     assert bma.bytes == expected
     assert not bma.parent
 
 
 @pytest.mark.parametrize(
-    "parent,expected",
+    "data,expected",
     [
-        (BitmaskArray(np.array([False, False])), bytes([0x0])),
-        (BitmaskArray(np.array([True, False])), bytes([0x1])),
-        (BitmaskArray(np.array([False, True])), bytes([0x2])),
-        (BitmaskArray(np.array([True, True])), bytes([0x3])),
-        (BitmaskArray(np.array([True, False] * 8)), bytes([0x55, 0x55])),
+        (np.array([False, False]), bytes([0x0])),
+        (np.array([True, False]), bytes([0x1])),
+        (np.array([False, True]), bytes([0x2])),
+        (np.array([True, True]), bytes([0x3])),
+        (np.array([True, False] * 8), bytes([0x55, 0x55])),
     ],
 )
-def test_constructor_bitmap(parent, expected):
+def test_constructor_bitmap(data, expected):
+    parent = BitmaskArray(data)
     bma = BitmaskArray(parent)
     assert bma.bytes == expected
     assert bma.parent is parent

From a1278a94cada4aca1dab6fdc165532ce515d5c5a Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 30 Aug 2023 12:22:06 -0400
Subject: [PATCH 114/126] more performance

---
 pandas/_libs/arrays.pyx                       | 181 ++++++++++--------
 .../_libs/include/pandas/bitmask_algorithms.h |  12 ++
 pandas/_libs/src/bitmask_algorithms.c         | 178 +++++++++++++++--
 pandas/tests/arrays/masked/test_bitmask.py    | 161 +++++++++++++---
 4 files changed, 406 insertions(+), 126 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index ddc708af2411f..ad421e06b1864 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -53,6 +53,9 @@ cdef extern from "pandas/bitmask_algorithms.h":
     bint BitmapOr(const ArrowBitmap*, const ArrowBitmap*, ArrowBitmap*)
     bint BitmapXor(const ArrowBitmap*, const ArrowBitmap*, ArrowBitmap*)
     bint BitmapAnd(const ArrowBitmap*, const ArrowBitmap*, ArrowBitmap*)
+    bint BitmapOrBool(const ArrowBitmap*, bint, ArrowBitmap*)
+    bint BitmapXorBool(const ArrowBitmap*, bint, ArrowBitmap*)
+    bint BitmapAndBool(const ArrowBitmap*, bint, ArrowBitmap*)
     bint BitmapInvert(const ArrowBitmap*, ArrowBitmap*)
     bint BitmapTake(const ArrowBitmap*, const int64_t*, size_t, ArrowBitmap*)
     bint BitmapPutFromBufferMask(ArrowBitmap*, const uint8_t*, size_t, uint8_t)
@@ -253,7 +256,7 @@ cdef class BitmaskArray:
 
     @cython.boundscheck(False)
     @cython.wraparound(False)
-    cdef void init_from_ndarray(self, const uint8_t[::1] arr):
+    cdef void init_from_ndarray(self, const uint8_t[::1] arr) noexcept:
         cdef ArrowBitmap bitmap
         # As long as we have a 1D arr argument we can use .shape[0] to avoid
         # a call to Python via .size
@@ -264,7 +267,7 @@ cdef class BitmaskArray:
         self.bitmap = bitmap
         self.buffer_owner = True
 
-    cdef void init_from_bitmaskarray(self, BitmaskArray bma):
+    cdef void init_from_bitmaskarray(self, BitmaskArray bma) noexcept:
         self.bitmap = bma.bitmap
         self.buffer_owner = False
         self.ndim = bma.ndim
@@ -274,27 +277,22 @@ cdef class BitmaskArray:
             self.shape[1] = bma.shape[1]
             self.strides[1] = bma.strides[1]
 
-    def __cinit__(self):
-        cdef BitmaskArray self_ = self
-        self.parent = False
-        self_.n_consumers = 0
-        self_.memview_buf = NULL
-
     def __init__(self, data):
-        cdef BitmaskArray self_ = self
-        if isinstance(data, np.ndarray):
-            if not data.flags["C_CONTIGUOUS"]:
-                data = np.ascontiguousarray(data)
-
-            self.init_from_ndarray(data.ravel())
+        cdef ndarray arr
+        if cnp.PyArray_Check(data):
+            arr = data
+            if not cnp.PyArray_IS_C_CONTIGUOUS(arr):
+                arr = cnp.PyArray_GETCONTIGUOUS(arr)
+
+            self.init_from_ndarray(arr.ravel())
+            self.ndim = arr.ndim
+            self.shape[0] = arr.shape[0]
+            self.strides[0] = arr.strides[0]
+            if self.ndim == 2:
+                self.shape[1] = arr.shape[1]
+                self.strides[1] = arr.strides[1]
             self.parent = None
-            self_.ndim = data.ndim
-            self_.shape[0] = data.shape[0]
-            self_.strides[0] = data.strides[0]
-            if (data.ndim == 2):
-                self_.shape[1] = data.shape[1]
-                self_.strides[1] = data.strides[1]
-        elif isinstance(data, type(self)):
+        elif isinstance(data, BitmaskArray):
             self.init_from_bitmaskarray(data)
             self.parent = data
         else:
@@ -475,11 +473,13 @@ cdef class BitmaskArray:
         cdef ArrowBitmap bitmap
         cdef int64_t nbytes, nbits
         cdef BitmaskArray self_ = self
+        cdef bint result
         # to_numpy can be expensive, so try to avoid for simple cases
         if isinstance(key, int) and self.ndim == 1:
             ckey = key
             if ckey >= 0 and ckey < self.bitmap.size_bits:
-                return bool(ArrowBitGet(self.bitmap.buffer.data, ckey))
+                result = ArrowBitGet(self.bitmap.buffer.data, ckey)
+                return result
         elif is_null_slice(key):
             return self
         elif isinstance(key, slice) and self.ndim == 1:
@@ -537,19 +537,14 @@ cdef class BitmaskArray:
         return bma
 
     def __and__(self, other):
-        cdef ndarray[uint8_t] result
         cdef BitmaskArray other_bma, self_ = self  # self_ required for Cython < 3
+        cdef BitmaskArray bma
         cdef ArrowBitmap bitmap
+        cdef bint bval
 
-        if isinstance(other, type(self)):
+        if isinstance(other, BitmaskArray):
             # TODO: maybe should return Self here instead of ndarray
-            other_bma = other
-            if self_.bitmap.size_bits == 0:
-                result = np.empty([], dtype=bool)
-                if self_.ndim == 2:
-                    return result.reshape(self_.shape[0], self_.shape[1])
-                return result
-
+            other_bma = <BitmaskArray>other
             if self_.bitmap.size_bits != other_bma.bitmap.size_bits:
                 raise ValueError("bitmaps are not equal size")
 
@@ -557,29 +552,40 @@ cdef class BitmaskArray:
             ArrowBitmapReserve(&bitmap, self_.bitmap.size_bits)
             BitmapAnd(&self_.bitmap, &other_bma.bitmap, &bitmap)
 
-            result = np.empty(self_.bitmap.size_bits, dtype=bool)
-            ArrowBitsUnpackInt8(
-                bitmap.buffer.data,
-                0,
-                bitmap.size_bits,
-                <int8_t*>&result[0]
-            )
-            ArrowBitmapReset(&bitmap)
+            bma = BitmaskArray.__new__(BitmaskArray)
+            bma.bitmap = bitmap
+            bma.buffer_owner = True
+            bma.ndim = self_.ndim
+            bma.shape = self_.shape
+            bma.strides = self_.strides
+
+            return bma
+        elif isinstance(other, bool):
+            bval = other
+            ArrowBitmapInit(&bitmap)
+            ArrowBitmapReserve(&bitmap, self_.bitmap.size_bits)
+            BitmapAndBool(&self_.bitmap, bval, &bitmap)
+
+            bma = BitmaskArray.__new__(BitmaskArray)
+            bma.bitmap = bitmap
+            bma.buffer_owner = True
+            bma.ndim = self_.ndim
+            bma.shape = self_.shape
+            bma.strides = self_.strides
 
-            if self_.ndim == 2:
-                return result.reshape(self_.shape[0], self_.shape[1])
-            return result
+            return bma
 
         return self.to_numpy() & other
 
     def __or__(self, other):
         cdef ndarray[uint8_t] result
         cdef BitmaskArray other_bma, self_ = self  # self_ required for Cython < 3
+        cdef BitmaskArray bma
         cdef ArrowBitmap bitmap
+        cdef bint bval
 
-        if isinstance(other, type(self)):
-            # TODO: maybe should return Self here instead of ndarray
-            other_bma = other
+        if isinstance(other, BitmaskArray):
+            other_bma = <BitmaskArray>other
             if self_.bitmap.size_bits == 0:
                 result = np.empty([], dtype=bool)
                 if self_.ndim == 2:
@@ -593,35 +599,40 @@ cdef class BitmaskArray:
             ArrowBitmapReserve(&bitmap, self_.bitmap.size_bits)
             BitmapOr(&self_.bitmap, &other_bma.bitmap, &bitmap)
 
-            result = np.empty(self_.bitmap.size_bits, dtype=bool)
-            ArrowBitsUnpackInt8(
-                bitmap.buffer.data,
-                0,
-                bitmap.size_bits,
-                <int8_t*>&result[0]
-            )
-            ArrowBitmapReset(&bitmap)
+            bma = BitmaskArray.__new__(BitmaskArray)
+            bma.bitmap = bitmap
+            bma.buffer_owner = True
+            bma.ndim = self_.ndim
+            bma.shape = self_.shape
+            bma.strides = self_.strides
+
+            return bma
+        elif isinstance(other, bool):
+            bval = other
+            ArrowBitmapInit(&bitmap)
+            ArrowBitmapReserve(&bitmap, self_.bitmap.size_bits)
+            BitmapOrBool(&self_.bitmap, bval, &bitmap)
+
+            bma = BitmaskArray.__new__(BitmaskArray)
+            bma.bitmap = bitmap
+            bma.buffer_owner = True
+            bma.ndim = self_.ndim
+            bma.shape = self_.shape
+            bma.strides = self_.strides
 
-            if self_.ndim == 2:
-                return result.reshape(self_.shape[0], self_.shape[1])
-            return result
+            return bma
 
         return self.to_numpy() | other
 
     def __xor__(self, other):
-        cdef ndarray[uint8_t] result
         cdef BitmaskArray other_bma, self_ = self  # self_ required for Cython < 3
+        cdef BitmaskArray bma
         cdef ArrowBitmap bitmap
+        cdef bint bval
 
-        if isinstance(other, type(self)):
+        if isinstance(other, BitmaskArray):
             # TODO: maybe should return Self here instead of ndarray
-            other_bma = other
-            if self_.bitmap.size_bits == 0:
-                result = np.empty([], dtype=bool)
-                if self_.ndim == 2:
-                    return result.reshape(self_.shape[0], self_.shape[1])
-                return result
-
+            other_bma = <BitmaskArray>other
             if self_.bitmap.size_bits != other_bma.bitmap.size_bits:
                 raise ValueError("bitmaps are not equal size")
 
@@ -629,17 +640,28 @@ cdef class BitmaskArray:
             ArrowBitmapReserve(&bitmap, self_.bitmap.size_bits)
             BitmapXor(&self_.bitmap, &other_bma.bitmap, &bitmap)
 
-            result = np.empty(self_.bitmap.size_bits, dtype=bool)
-            ArrowBitsUnpackInt8(
-                bitmap.buffer.data,
-                0,
-                bitmap.size_bits,
-                <int8_t*>&result[0]
-            )
-            ArrowBitmapReset(&bitmap)
-            if self_.ndim == 2:
-                return result.reshape(self_.shape[0], self_.shape[1])
-            return result
+            bma = BitmaskArray.__new__(BitmaskArray)
+            bma.bitmap = bitmap
+            bma.buffer_owner = True
+            bma.ndim = self_.ndim
+            bma.shape = self_.shape
+            bma.strides = self_.strides
+
+            return bma
+        elif isinstance(other, bool):
+            bval = other
+            ArrowBitmapInit(&bitmap)
+            ArrowBitmapReserve(&bitmap, self_.bitmap.size_bits)
+            BitmapXorBool(&self_.bitmap, bval, &bitmap)
+
+            bma = BitmaskArray.__new__(BitmaskArray)
+            bma.bitmap = bitmap
+            bma.buffer_owner = True
+            bma.ndim = self_.ndim
+            bma.shape = self_.shape
+            bma.strides = self_.strides
+
+            return bma
 
         return self.to_numpy() ^ other
 
@@ -729,8 +751,10 @@ cdef class BitmaskArray:
     def __iter__(self):
         cdef Py_ssize_t i
         cdef BitmaskArray self_ = self  # self_ required for Cython < 3
+        cdef bint result
         for i in range(self_.bitmap.size_bits):
-            yield bool(ArrowBitGet(self_.bitmap.buffer.data, i))
+            result = ArrowBitGet(self_.bitmap.buffer.data, i)
+            yield result
 
     def __getbuffer__(self, Py_buffer *buffer, int flags):
         cdef BitmaskArray self_ = self
@@ -795,7 +819,8 @@ cdef class BitmaskArray:
         return np.dtype("bool")
 
     def any(self) -> bool:
-        return BitmapAny(&self.bitmap)
+        cdef bint result = BitmapAny(&self.bitmap)
+        return result
 
     def all(self) -> bool:
         return BitmapAll(&self.bitmap)
diff --git a/pandas/_libs/include/pandas/bitmask_algorithms.h b/pandas/_libs/include/pandas/bitmask_algorithms.h
index f29fddf6551ed..8598b5ebdd568 100644
--- a/pandas/_libs/include/pandas/bitmask_algorithms.h
+++ b/pandas/_libs/include/pandas/bitmask_algorithms.h
@@ -23,14 +23,26 @@ bool BitmapAll(const struct ArrowBitmap *bitmap);
 int BitmapOr(const struct ArrowBitmap *bitmap1,
              const struct ArrowBitmap *bitmap2, struct ArrowBitmap *out);
 
+/* Returns -1 on failure. On success returns 0 and writes to out */
+int BitmapOrBool(const struct ArrowBitmap *bitmap1, bool,
+                 struct ArrowBitmap *out);
+
 /* Returns -1 on failure. On success returns 0 and writes to out */
 int BitmapXor(const struct ArrowBitmap *bitmap1,
               const struct ArrowBitmap *bitmap2, struct ArrowBitmap *out);
 
+/* Returns -1 on failure. On success returns 0 and writes to out */
+int BitmapXorBool(const struct ArrowBitmap *bitmap1, bool,
+                  struct ArrowBitmap *out);
+
 /* Returns -1 on failure. On success returns 0 and writes to out */
 int BitmapAnd(const struct ArrowBitmap *bitmap1,
               const struct ArrowBitmap *bitmap2, struct ArrowBitmap *out);
 
+/* Returns -1 on failure. On success returns 0 and writes to out */
+int BitmapAndBool(const struct ArrowBitmap *bitmap1, bool,
+                  struct ArrowBitmap *out);
+
 /* Returns -1 on failure. On success returns 0 and writes to out */
 int BitmapInvert(const struct ArrowBitmap *bitmap, struct ArrowBitmap *out);
 
diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
index d78d8b973a789..14d27b206838b 100644
--- a/pandas/_libs/src/bitmask_algorithms.c
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -69,23 +69,22 @@ void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
 }
 
 bool BitmapAny(const struct ArrowBitmap *bitmap) {
-  const size_t nbits = bitmap->size_bits;
-  const size_t size_bytes = bitmap->buffer.size_bytes;
-  const uint8_t *buf = bitmap->buffer.data;
-
-  if (nbits < 1) {
+  if (bitmap->size_bits < 1) {
     return false;
   }
 
-  for (size_t i = 0; i < size_bytes - 1; i++) {
-    if (buf[i] > 0) {
+  size_t i = 0;
+  for (; i + sizeof(size_t) - 1 < bitmap->buffer.size_bytes;
+       i += sizeof(size_t)) {
+    size_t value;
+    memcpy(&value, &bitmap->buffer.data[i], sizeof(size_t));
+    if (value != 0x0) {
       return true;
     }
   }
 
-  const size_t bits_remaining = nbits - ((size_bytes - 1) * 8);
-  for (size_t i = 0; i < bits_remaining; i++) {
-    if (ArrowBitGet(buf, nbits - i - 1)) {
+  for (; i < bitmap->buffer.size_bytes; i++) {
+    if (bitmap->buffer.data[i] != 0x0) {
       return true;
     }
   }
@@ -96,21 +95,29 @@ bool BitmapAny(const struct ArrowBitmap *bitmap) {
 bool BitmapAll(const struct ArrowBitmap *bitmap) {
   const size_t nbits = bitmap->size_bits;
   const size_t size_bytes = bitmap->buffer.size_bytes;
-  const uint8_t *buf = bitmap->buffer.data;
-
   if (nbits < 1) {
     return true;
   }
 
-  for (size_t i = 0; i < size_bytes - 1; i++) {
-    if (buf[i] != 0xff) {
+  size_t i = 0;
+  for (; i + sizeof(size_t) - 1 < bitmap->buffer.size_bytes;
+       i += sizeof(size_t)) {
+    size_t value;
+    memcpy(&value, &bitmap->buffer.data[i], sizeof(size_t));
+    if (value != SIZE_MAX) {
+      return false;
+    }
+  }
+
+  for (; i < bitmap->buffer.size_bytes - 1; i++) {
+    if (bitmap->buffer.data[i] != 0xff) {
       return false;
     }
   }
 
   const size_t bits_remaining = nbits - ((size_bytes - 1) * 8);
   for (size_t i = 0; i < bits_remaining; i++) {
-    if (ArrowBitGet(buf, nbits - i - 1) == 0) {
+    if (ArrowBitGet(bitmap->buffer.data, nbits - i - 1) == 0) {
       return false;
     }
   }
@@ -126,7 +133,19 @@ int BitmapOr(const struct ArrowBitmap *bitmap1,
     return -1;
   }
 
-  for (int64_t i = 0; i < bitmap1->buffer.size_bytes; i++) {
+  size_t i = 0;
+  for (; i + sizeof(size_t) - 1 < bitmap1->buffer.size_bytes;
+       i += sizeof(size_t)) {
+    size_t value1;
+    size_t value2;
+    size_t result;
+    memcpy(&value1, &bitmap1->buffer.data[i], sizeof(size_t));
+    memcpy(&value2, &bitmap2->buffer.data[i], sizeof(size_t));
+    result = value1 | value2;
+    memcpy(&out->buffer.data[i], &result, sizeof(size_t));
+  }
+
+  for (; i < bitmap1->buffer.size_bytes; i++) {
     out->buffer.data[i] = bitmap1->buffer.data[i] | bitmap2->buffer.data[i];
   }
 
@@ -136,6 +155,35 @@ int BitmapOr(const struct ArrowBitmap *bitmap1,
   return 0;
 }
 
+int BitmapOrBool(const struct ArrowBitmap *bitmap1, bool other,
+                 struct ArrowBitmap *out) {
+  if (!(out->buffer.capacity_bytes >= bitmap1->buffer.size_bytes)) {
+    return -1;
+  }
+
+  const size_t mask = other ? SIZE_MAX : 0;
+  const uint8_t umask = other ? UINT8_MAX : 0;
+
+  size_t i = 0;
+  for (; i + sizeof(size_t) - 1 < bitmap1->buffer.size_bytes;
+       i += sizeof(size_t)) {
+    size_t value1;
+    size_t result;
+    memcpy(&value1, &bitmap1->buffer.data[i], sizeof(size_t));
+    result = value1 | mask;
+    memcpy(&out->buffer.data[i], &result, sizeof(size_t));
+  }
+
+  for (; i < bitmap1->buffer.size_bytes; i++) {
+    out->buffer.data[i] = bitmap1->buffer.data[i] | umask;
+  }
+
+  out->size_bits = bitmap1->size_bits;
+  out->buffer.size_bytes = bitmap1->buffer.size_bytes;
+
+  return 0;
+}
+
 int BitmapAnd(const struct ArrowBitmap *bitmap1,
               const struct ArrowBitmap *bitmap2, struct ArrowBitmap *out) {
   if (bitmap1->size_bits != bitmap2->size_bits) {
@@ -144,7 +192,19 @@ int BitmapAnd(const struct ArrowBitmap *bitmap1,
     return -1;
   }
 
-  for (int64_t i = 0; i < bitmap1->buffer.size_bytes; i++) {
+  size_t i = 0;
+  for (; i + sizeof(size_t) - 1 < bitmap1->buffer.size_bytes;
+       i += sizeof(size_t)) {
+    size_t value1;
+    size_t value2;
+    size_t result;
+    memcpy(&value1, &bitmap1->buffer.data[i], sizeof(size_t));
+    memcpy(&value2, &bitmap2->buffer.data[i], sizeof(size_t));
+    result = value1 & value2;
+    memcpy(&out->buffer.data[i], &result, sizeof(size_t));
+  }
+
+  for (; i < bitmap1->buffer.size_bytes; i++) {
     out->buffer.data[i] = bitmap1->buffer.data[i] & bitmap2->buffer.data[i];
   }
 
@@ -154,6 +214,35 @@ int BitmapAnd(const struct ArrowBitmap *bitmap1,
   return 0;
 }
 
+int BitmapAndBool(const struct ArrowBitmap *bitmap1, bool other,
+                  struct ArrowBitmap *out) {
+  if (!(out->buffer.capacity_bytes >= bitmap1->buffer.size_bytes)) {
+    return -1;
+  }
+
+  const size_t mask = other ? SIZE_MAX : 0;
+  const uint8_t umask = other ? UINT8_MAX : 0;
+
+  size_t i = 0;
+  for (; i + sizeof(size_t) - 1 < bitmap1->buffer.size_bytes;
+       i += sizeof(size_t)) {
+    size_t value1;
+    size_t result;
+    memcpy(&value1, &bitmap1->buffer.data[i], sizeof(size_t));
+    result = value1 & mask;
+    memcpy(&out->buffer.data[i], &result, sizeof(size_t));
+  }
+
+  for (; i < bitmap1->buffer.size_bytes; i++) {
+    out->buffer.data[i] = bitmap1->buffer.data[i] & umask;
+  }
+
+  out->size_bits = bitmap1->size_bits;
+  out->buffer.size_bytes = bitmap1->buffer.size_bytes;
+
+  return 0;
+}
+
 int BitmapXor(const struct ArrowBitmap *bitmap1,
               const struct ArrowBitmap *bitmap2, struct ArrowBitmap *out) {
   if (bitmap1->size_bits != bitmap2->size_bits) {
@@ -162,7 +251,19 @@ int BitmapXor(const struct ArrowBitmap *bitmap1,
     return -1;
   }
 
-  for (int64_t i = 0; i < bitmap1->buffer.size_bytes; i++) {
+  size_t i = 0;
+  for (; i + sizeof(size_t) - 1 < bitmap1->buffer.size_bytes;
+       i += sizeof(size_t)) {
+    size_t value1;
+    size_t value2;
+    size_t result;
+    memcpy(&value1, &bitmap1->buffer.data[i], sizeof(size_t));
+    memcpy(&value2, &bitmap2->buffer.data[i], sizeof(size_t));
+    result = value1 ^ value2;
+    memcpy(&out->buffer.data[i], &result, sizeof(size_t));
+  }
+
+  for (; i < bitmap1->buffer.size_bytes; i++) {
     out->buffer.data[i] = bitmap1->buffer.data[i] ^ bitmap2->buffer.data[i];
   }
 
@@ -172,12 +273,51 @@ int BitmapXor(const struct ArrowBitmap *bitmap1,
   return 0;
 }
 
+int BitmapXorBool(const struct ArrowBitmap *bitmap1, bool other,
+                  struct ArrowBitmap *out) {
+  if (!(out->buffer.capacity_bytes >= bitmap1->buffer.size_bytes)) {
+    return -1;
+  }
+
+  const size_t mask = other ? SIZE_MAX : 0;
+  const uint8_t umask = other ? UINT8_MAX : 0;
+
+  size_t i = 0;
+  for (; i + sizeof(size_t) - 1 < bitmap1->buffer.size_bytes;
+       i += sizeof(size_t)) {
+    size_t value1;
+    size_t result;
+    memcpy(&value1, &bitmap1->buffer.data[i], sizeof(size_t));
+    result = value1 ^ mask;
+    memcpy(&out->buffer.data[i], &result, sizeof(size_t));
+  }
+
+  for (; i < bitmap1->buffer.size_bytes; i++) {
+    out->buffer.data[i] = bitmap1->buffer.data[i] ^ umask;
+  }
+
+  out->size_bits = bitmap1->size_bits;
+  out->buffer.size_bytes = bitmap1->buffer.size_bytes;
+
+  return 0;
+}
+
 int BitmapInvert(const struct ArrowBitmap *bitmap, struct ArrowBitmap *out) {
   if (!(out->buffer.capacity_bytes >= bitmap->buffer.size_bytes)) {
     return -1;
   }
 
-  for (int64_t i = 0; i < bitmap->buffer.size_bytes; i++) {
+  size_t i = 0;
+  for (; i + sizeof(size_t) - 1 < bitmap->buffer.size_bytes;
+       i += sizeof(size_t)) {
+    size_t value;
+    size_t result;
+    memcpy(&value, &bitmap->buffer.data[i], sizeof(size_t));
+    result = ~value;
+    memcpy(&out->buffer.data[i], &result, sizeof(size_t));
+  }
+
+  for (; i < bitmap->buffer.size_bytes; i++) {
     out->buffer.data[i] = ~bitmap->buffer.data[i];
   }
 
diff --git a/pandas/tests/arrays/masked/test_bitmask.py b/pandas/tests/arrays/masked/test_bitmask.py
index 9141587d27171..0144b8df55f38 100644
--- a/pandas/tests/arrays/masked/test_bitmask.py
+++ b/pandas/tests/arrays/masked/test_bitmask.py
@@ -192,7 +192,22 @@ def test_invert():
     assert ((result2.bytes[0] >> 1) & 0x1) == 0
 
 
-@pytest.mark.parametrize("rhs_as_bitmask", [True, False])
+@pytest.mark.parametrize(
+    "lhs,rhs,expected",
+    [
+        ([True], [True], bytes([0x1])),
+        ([True], [False], bytes([0x0])),
+        ([False], [False], bytes([0x0])),
+        ([True] * 10, [True] * 10, bytes([0xFF, 0x3])),
+        ([False] * 10, [True] * 10, bytes([0x0, 0x0])),
+    ],
+)
+def test_and_bitmask(lhs, rhs, expected):
+    bma1 = BitmaskArray(np.array(lhs))
+    result = bma1 & BitmaskArray(np.array(rhs))
+    assert result.bytes == expected
+
+
 @pytest.mark.parametrize(
     "lhs,rhs,expected",
     [
@@ -203,44 +218,115 @@ def test_invert():
         ([False] * 10, [True] * 10, [False] * 10),
     ],
 )
-def test_and(rhs_as_bitmask, lhs, rhs, expected):
+def test_and_ndarray(lhs, rhs, expected):
     bma1 = BitmaskArray(np.array(lhs))
 
-    if rhs_as_bitmask:
-        bma2 = BitmaskArray(np.array(rhs))
-    else:
-        bma2 = np.array(rhs)
+    result = bma1 & np.array(rhs)
+    assert (result == np.array(expected)).all()
 
-    expected = np.array(expected)
-    result = bma1 & bma2
-    assert (result == expected).all()
+
+@pytest.mark.parametrize(
+    "lhs,rhs,expected",
+    [
+        ([True], True, bytes([0x1])),
+        ([True], False, bytes([0x0])),
+        ([False], False, bytes([0x0])),
+        ([True] * 10, True, bytes([0xFF, 0x3])),
+        ([False] * 10, True, bytes([0x0, 0x0])),
+    ],
+)
+def test_and_scalar(lhs, rhs, expected):
+    bma1 = BitmaskArray(np.array(lhs))
+    result = bma1 & rhs
+
+    # We don't really care about the bits that
+    # exist beyond the length of the bitmask, but
+    # to make testing easy we assume XOR still operates
+    # on them. Might be better to implement equality
+    # on bitmaskarray and test instead of looking at bytes
+    assert result.bytes == expected
+
+
+@pytest.mark.parametrize(
+    "lhs,rhs,expected",
+    [
+        ([True], [True], bytes([0x1])),
+        ([True], [False], bytes([0x1])),
+        ([False], [False], bytes([0x0])),
+        ([True] * 10, [True] * 10, bytes([0xFF, 0x3])),
+        ([False] * 10, [True] * 10, bytes([0xFF, 0x3])),
+    ],
+)
+def test_or_bitmask(lhs, rhs, expected):
+    bma1 = BitmaskArray(np.array(lhs))
+    result = bma1 | BitmaskArray(np.array(rhs))
+    assert result.bytes == expected
 
 
-@pytest.mark.parametrize("rhs_as_bitmask", [True, False])
 @pytest.mark.parametrize(
     "lhs,rhs,expected",
     [
         ([True], [True], [True]),
-        ([True], [False], [True]),
-        ([False], [False], [False]),
+        (
+            [True],
+            [False],
+            [True],
+        ),
+        (
+            [False],
+            [False],
+            [False],
+        ),
         ([True] * 10, [True] * 10, [True] * 10),
         ([False] * 10, [True] * 10, [True] * 10),
     ],
 )
-def test_or(rhs_as_bitmask, lhs, rhs, expected):
+def test_or_ndarray(lhs, rhs, expected):
     bma1 = BitmaskArray(np.array(lhs))
 
-    if rhs_as_bitmask:
-        bma2 = BitmaskArray(np.array(rhs))
-    else:
-        bma2 = np.array(rhs)
+    result = bma1 | np.array(rhs)
+    assert (result == np.array(expected)).all()
 
-    expected = np.array(expected)
-    result = bma1 | bma2
-    assert (result == expected).all()
+
+@pytest.mark.parametrize(
+    "lhs,rhs,expected",
+    [
+        ([True], True, bytes([0xFF])),
+        ([True], False, bytes([0x1])),
+        ([False], False, bytes([0x0])),
+        ([True] * 10, True, bytes([0xFF, 0xFF])),
+        ([False] * 10, True, bytes([0xFF, 0xFF])),
+    ],
+)
+def test_or_scalar(lhs, rhs, expected):
+    bma1 = BitmaskArray(np.array(lhs))
+    result = bma1 | rhs
+
+    # We don't really care about the bits that
+    # exist beyond the length of the bitmask, but
+    # to make testing easy we assume XOR still operates
+    # on them. Might be better to implement equality
+    # on bitmaskarray and test instead of looking at bytes
+    assert result.bytes == expected
+
+
+@pytest.mark.parametrize(
+    "lhs,rhs,expected",
+    [
+        ([True], [True], bytes([0x0])),
+        ([True], [False], bytes([0x1])),
+        ([False], [False], bytes([0x0])),
+        ([True] * 10, [True] * 10, bytes([0x0, 0x0])),
+        ([False] * 10, [True] * 10, bytes([0xFF, 0x3])),
+    ],
+)
+def test_xor_bitmask(lhs, rhs, expected):
+    bma1 = BitmaskArray(np.array(lhs))
+    other = BitmaskArray(np.array(rhs))
+    result = bma1 ^ other
+    assert result.bytes == expected
 
 
-@pytest.mark.parametrize("rhs_as_bitmask", [True, False])
 @pytest.mark.parametrize(
     "lhs,rhs,expected",
     [
@@ -251,17 +337,34 @@ def test_or(rhs_as_bitmask, lhs, rhs, expected):
         ([False] * 10, [True] * 10, [True] * 10),
     ],
 )
-def test_xor(rhs_as_bitmask, lhs, rhs, expected):
+def test_xor_ndarray(lhs, rhs, expected):
     bma1 = BitmaskArray(np.array(lhs))
+    other = np.array(rhs)
+    result = bma1 ^ other
+    assert (result == np.array(expected)).all()
 
-    if rhs_as_bitmask:
-        bma2 = BitmaskArray(np.array(rhs))
-    else:
-        bma2 = np.array(rhs)
 
-    expected = np.array(expected)
-    result = bma1 ^ bma2
-    assert (result == expected).all()
+@pytest.mark.parametrize(
+    "lhs,rhs,expected",
+    [
+        ([True], True, bytes([0xFE])),
+        ([True], False, bytes([0x1])),
+        ([False], False, bytes([0x0])),
+        ([True] * 10, True, bytes([0x0, 0xFC])),
+        ([False] * 10, True, bytes([0xFF, 0xFF])),
+    ],
+)
+def test_xor_scalar(lhs, rhs, expected):
+    bma1 = BitmaskArray(np.array(lhs))
+    other = rhs
+    result = bma1 ^ other
+
+    # We don't really care about the bits that
+    # exist beyond the length of the bitmask, but
+    # to make testing easy we assume XOR still operates
+    # on them. Might be better to implement equality
+    # on bitmaskarray and test instead of looking at bytes
+    assert result.bytes == expected
 
 
 def test_pickle():

From bc772c302a591ab200bc6982b1f80465eaff245e Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 30 Aug 2023 16:45:26 -0400
Subject: [PATCH 115/126] bugfix with all refactor

---
 pandas/_libs/src/bitmask_algorithms.c      | 13 +++++++++++--
 pandas/tests/arrays/masked/test_bitmask.py | 18 ++++++++++++++++++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
index 14d27b206838b..ccb682549d16a 100644
--- a/pandas/_libs/src/bitmask_algorithms.c
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -69,7 +69,9 @@ void ConcatenateBitmapData(const struct ArrowBitmap **bitmaps, size_t nbitmaps,
 }
 
 bool BitmapAny(const struct ArrowBitmap *bitmap) {
-  if (bitmap->size_bits < 1) {
+  const size_t nbits = bitmap->size_bits;
+  const size_t size_bytes = bitmap->buffer.size_bytes;
+  if (nbits < 1) {
     return false;
   }
 
@@ -83,12 +85,19 @@ bool BitmapAny(const struct ArrowBitmap *bitmap) {
     }
   }
 
-  for (; i < bitmap->buffer.size_bytes; i++) {
+  for (; i < bitmap->buffer.size_bytes - 1; i++) {
     if (bitmap->buffer.data[i] != 0x0) {
       return true;
     }
   }
 
+  const size_t bits_remaining = nbits - ((size_bytes - 1) * 8);
+  for (size_t i = 0; i < bits_remaining; i++) {
+    if (ArrowBitGet(bitmap->buffer.data, nbits - i - 1) == 1) {
+      return true;
+    }
+  }
+
   return false;
 }
 
diff --git a/pandas/tests/arrays/masked/test_bitmask.py b/pandas/tests/arrays/masked/test_bitmask.py
index 0144b8df55f38..d895618ba3483 100644
--- a/pandas/tests/arrays/masked/test_bitmask.py
+++ b/pandas/tests/arrays/masked/test_bitmask.py
@@ -450,6 +450,15 @@ def test_any(data, expected):
     assert bma.any() == expected
 
 
+def test_any_sliced_bitmask():
+    # Need to ensure any doesn't look beyond bounds of slice
+    bma = BitmaskArray(np.array([False, False, True, True]))
+    assert bma.any()
+
+    new_bma = bma[:2]
+    assert not new_bma.any()
+
+
 @pytest.mark.parametrize(
     "data,expected",
     [
@@ -464,6 +473,15 @@ def test_all(data, expected):
     assert bma.all() == expected
 
 
+def test_all_sliced_bitmask():
+    # Need to ensure all doesn't look beyond bounds of slice
+    bma = BitmaskArray(np.array([True, True, False, False]))
+    assert not bma.all()
+
+    new_bma = bma[:2]
+    assert new_bma.all()
+
+
 @pytest.mark.parametrize(
     "data,expected",
     [

From 1c637a16458af2d62cc3b7b4d198b14daa013c4c Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 30 Aug 2023 17:20:45 -0400
Subject: [PATCH 116/126] less to_numpy()

---
 pandas/core/algorithms.py          |  4 +---
 pandas/core/arrays/masked.py       | 15 ++++-----------
 pandas/core/arrays/string_.py      |  2 +-
 pandas/core/arrays/string_arrow.py |  4 ++--
 pandas/core/arrays/timedeltas.py   |  2 +-
 5 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 53a75754a49c2..9769e37829ec8 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1005,9 +1005,7 @@ def duplicated(
 
         if isinstance(values.dtype, BaseMaskedDtype):
             values = cast("BaseMaskedArray", values)
-            return htable.duplicated(
-                values._data, keep=keep, mask=values._mask.to_numpy()
-            )
+            return htable.duplicated(values._data, keep=keep, mask=values._mask)
 
     values = _ensure_data(values)
     return htable.duplicated(values, keep=keep)
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index da5f501e9b3f7..8883095b56b09 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -308,16 +308,12 @@ def __setitem__(self, key, value) -> None:
         value, mask = self._coerce_to_array(value, dtype=self.dtype)
 
         self._data[key] = value
-        if isinstance(mask, BitmaskArray):
-            mask = mask.to_numpy()
-
         self._mask[key] = mask
 
     def __contains__(self, key) -> bool:
         if isna(key) and key is not self.dtype.na_value:
             # GH#52840
             if self._data.dtype.kind == "f" and lib.is_float(key):
-                # TODO: implement low level invert operator on BitmaskArray
                 return bool((np.isnan(self._data) & ~self._mask).any())
 
         return bool(super().__contains__(key))
@@ -402,7 +398,7 @@ def round(self, decimals: int = 0, *args, **kwargs):
         values = np.round(self._data, decimals=decimals, **kwargs)
 
         # Usually we'll get same type as self, but ndarray[bool] casts to float
-        return self._maybe_mask_result(values, self._mask)
+        return self._maybe_mask_result(values, self._mask.copy())
 
     # ------------------------------------------------------------------
     # Unary Methods
@@ -1378,14 +1374,12 @@ def any(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         """
         nv.validate_any((), kwargs)
 
-        # attempt to avoid to_numpy call on mask for best performance
         is_all_na = self._mask.all()
-        is_any_na = self._mask.any()
         if len(self) == 0 or (skipna and is_all_na):
             return np.bool_(False)
 
+        is_any_na = self._mask.any()
         if is_any_na:
-            # fallback to numpy - will be slower
             values = self._data.copy()
             # error: Argument 3 to "putmask" has incompatible type "object";
             # expected "Union[_SupportsArray[dtype[Any]],
@@ -1474,12 +1468,11 @@ def all(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
         """
         nv.validate_all((), kwargs)
 
-        # attempt to avoid to_numpy call on mask for best performance
         is_all_na = self._mask.all()
-        is_any_na = self._mask.any()
         if len(self) == 0 or (skipna and is_all_na):
             return np.bool_(True)
 
+        is_any_na = self._mask.any()
         if is_any_na:
             values = self._data.copy()
             # error: Argument 3 to "putmask" has incompatible type "object";
@@ -1546,7 +1539,7 @@ def _groupby_op(
             min_count=min_count,
             ngroups=ngroups,
             comp_ids=ids,
-            mask=mask.to_numpy(),
+            mask=mask,
             result_mask=result_mask,
             **kwargs,
         )
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 967039c1498fc..72ba95e5fa258 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -372,7 +372,7 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
 
         if isinstance(scalars, BaseMaskedArray):
             # avoid costly conversion to object dtype
-            na_values = scalars._mask.to_numpy()
+            na_values = scalars._mask
             result = scalars._data
             result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
             result[na_values] = libmissing.NA
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 607c549996eb2..f4f64e20a7977 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -155,10 +155,10 @@ def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False)
         if isinstance(scalars, BaseMaskedArray):
             # avoid costly conversion to object dtype in ensure_string_array and
             # numerical issues with Float32Dtype
-            na_values = scalars._mask.to_numpy()
+            na_values = scalars._mask
             result = scalars._data
             result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
-            return cls(pa.array(result, mask=na_values, type=pa.string()))
+            return cls(pa.array(result, mask=na_values.to_numpy(), type=pa.string()))
         elif isinstance(scalars, (pa.Array, pa.ChunkedArray)):
             return cls(pc.cast(scalars, pa.string()))
 
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index a8a6e028b11f9..b7b81b8271106 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -1071,7 +1071,7 @@ def sequence_to_td64ns(
         # cast the unit, multiply base/frac separately
         # to avoid precision issues from float -> int
         if isinstance(data.dtype, ExtensionDtype):
-            mask = data._mask.to_numpy()
+            mask = data._mask
             data = data._data
         else:
             mask = np.isnan(data)

From 3dfe66823c8d9546b2d5bcece9608130e5b60c31 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 30 Aug 2023 17:37:58 -0400
Subject: [PATCH 117/126] Error message cleanups

---
 pandas/_libs/arrays.pyx               | 32 +++++++++---
 pandas/_libs/src/bitmask_algorithms.c | 71 ++++++++++++++++++---------
 2 files changed, 73 insertions(+), 30 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index ad421e06b1864..fd8382c13e6c3 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -521,11 +521,14 @@ cdef class BitmaskArray:
         cdef BitmaskArray self_ = self
         cdef BitmaskArray bma = BitmaskArray.__new__(BitmaskArray)
         cdef ArrowBitmap bitmap
+        cdef int ret
 
         ArrowBitmapInit(&bitmap)
         ArrowBitmapReserve(&bitmap, self_.bitmap.size_bits)
 
-        BitmapInvert(&self_.bitmap, &bitmap)
+        ret = BitmapInvert(&self_.bitmap, &bitmap)
+        if ret == -1:
+            raise RuntimeError("BitmapInvert failed")
 
         bma.bitmap = bitmap
         bma.buffer_owner = True
@@ -541,6 +544,7 @@ cdef class BitmaskArray:
         cdef BitmaskArray bma
         cdef ArrowBitmap bitmap
         cdef bint bval
+        cdef int ret
 
         if isinstance(other, BitmaskArray):
             # TODO: maybe should return Self here instead of ndarray
@@ -550,7 +554,9 @@ cdef class BitmaskArray:
 
             ArrowBitmapInit(&bitmap)
             ArrowBitmapReserve(&bitmap, self_.bitmap.size_bits)
-            BitmapAnd(&self_.bitmap, &other_bma.bitmap, &bitmap)
+            ret = BitmapAnd(&self_.bitmap, &other_bma.bitmap, &bitmap)
+            if ret == -1:
+                raise RuntimeError("BitmapAnd failed")
 
             bma = BitmaskArray.__new__(BitmaskArray)
             bma.bitmap = bitmap
@@ -564,7 +570,9 @@ cdef class BitmaskArray:
             bval = other
             ArrowBitmapInit(&bitmap)
             ArrowBitmapReserve(&bitmap, self_.bitmap.size_bits)
-            BitmapAndBool(&self_.bitmap, bval, &bitmap)
+            ret = BitmapAndBool(&self_.bitmap, bval, &bitmap)
+            if ret == -1:
+                raise RuntimeError("BitmapAndBool failed")
 
             bma = BitmaskArray.__new__(BitmaskArray)
             bma.bitmap = bitmap
@@ -583,6 +591,7 @@ cdef class BitmaskArray:
         cdef BitmaskArray bma
         cdef ArrowBitmap bitmap
         cdef bint bval
+        cdef int ret
 
         if isinstance(other, BitmaskArray):
             other_bma = <BitmaskArray>other
@@ -597,7 +606,9 @@ cdef class BitmaskArray:
 
             ArrowBitmapInit(&bitmap)
             ArrowBitmapReserve(&bitmap, self_.bitmap.size_bits)
-            BitmapOr(&self_.bitmap, &other_bma.bitmap, &bitmap)
+            ret = BitmapOr(&self_.bitmap, &other_bma.bitmap, &bitmap)
+            if ret == -1:
+                raise RuntimeError("BitmapOr failed")
 
             bma = BitmaskArray.__new__(BitmaskArray)
             bma.bitmap = bitmap
@@ -611,7 +622,9 @@ cdef class BitmaskArray:
             bval = other
             ArrowBitmapInit(&bitmap)
             ArrowBitmapReserve(&bitmap, self_.bitmap.size_bits)
-            BitmapOrBool(&self_.bitmap, bval, &bitmap)
+            ret = BitmapOrBool(&self_.bitmap, bval, &bitmap)
+            if ret == -1:
+                raise RuntimeError("BitmapOrBool failed")
 
             bma = BitmaskArray.__new__(BitmaskArray)
             bma.bitmap = bitmap
@@ -629,6 +642,7 @@ cdef class BitmaskArray:
         cdef BitmaskArray bma
         cdef ArrowBitmap bitmap
         cdef bint bval
+        cdef int ret
 
         if isinstance(other, BitmaskArray):
             # TODO: maybe should return Self here instead of ndarray
@@ -638,7 +652,9 @@ cdef class BitmaskArray:
 
             ArrowBitmapInit(&bitmap)
             ArrowBitmapReserve(&bitmap, self_.bitmap.size_bits)
-            BitmapXor(&self_.bitmap, &other_bma.bitmap, &bitmap)
+            ret = BitmapXor(&self_.bitmap, &other_bma.bitmap, &bitmap)
+            if ret == -1:
+                raise RuntimeError("BitmapXor failed")
 
             bma = BitmaskArray.__new__(BitmaskArray)
             bma.bitmap = bitmap
@@ -652,7 +668,9 @@ cdef class BitmaskArray:
             bval = other
             ArrowBitmapInit(&bitmap)
             ArrowBitmapReserve(&bitmap, self_.bitmap.size_bits)
-            BitmapXorBool(&self_.bitmap, bval, &bitmap)
+            ret = BitmapXorBool(&self_.bitmap, bval, &bitmap)
+            if ret == -1:
+                raise RuntimeError("BitmapXorBool failed")
 
             bma = BitmaskArray.__new__(BitmaskArray)
             bma.bitmap = bitmap
diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
index ccb682549d16a..399705fcbbdd6 100644
--- a/pandas/_libs/src/bitmask_algorithms.c
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -75,9 +75,11 @@ bool BitmapAny(const struct ArrowBitmap *bitmap) {
     return false;
   }
 
+  const size_t overflow_limit = SIZE_MAX - sizeof(size_t);
+  const size_t limit =
+      size_bytes > overflow_limit ? overflow_limit : size_bytes;
   size_t i = 0;
-  for (; i + sizeof(size_t) - 1 < bitmap->buffer.size_bytes;
-       i += sizeof(size_t)) {
+  for (; i + sizeof(size_t) - 1 < limit; i += sizeof(size_t)) {
     size_t value;
     memcpy(&value, &bitmap->buffer.data[i], sizeof(size_t));
     if (value != 0x0) {
@@ -108,9 +110,11 @@ bool BitmapAll(const struct ArrowBitmap *bitmap) {
     return true;
   }
 
+  const size_t overflow_limit = SIZE_MAX - sizeof(size_t);
+  const size_t limit =
+      size_bytes > overflow_limit ? overflow_limit : size_bytes;
   size_t i = 0;
-  for (; i + sizeof(size_t) - 1 < bitmap->buffer.size_bytes;
-       i += sizeof(size_t)) {
+  for (; i + sizeof(size_t) - 1 < limit; i += sizeof(size_t)) {
     size_t value;
     memcpy(&value, &bitmap->buffer.data[i], sizeof(size_t));
     if (value != SIZE_MAX) {
@@ -136,15 +140,18 @@ bool BitmapAll(const struct ArrowBitmap *bitmap) {
 
 int BitmapOr(const struct ArrowBitmap *bitmap1,
              const struct ArrowBitmap *bitmap2, struct ArrowBitmap *out) {
+  const size_t size_bytes = bitmap1->buffer.size_bytes;
   if (bitmap1->size_bits != bitmap2->size_bits) {
     return -1;
-  } else if (!(out->buffer.capacity_bytes >= bitmap1->buffer.size_bytes)) {
+  } else if (!(out->buffer.capacity_bytes >= size_bytes)) {
     return -1;
   }
 
+  const size_t overflow_limit = SIZE_MAX - sizeof(size_t);
+  const size_t limit =
+      size_bytes > overflow_limit ? overflow_limit : size_bytes;
   size_t i = 0;
-  for (; i + sizeof(size_t) - 1 < bitmap1->buffer.size_bytes;
-       i += sizeof(size_t)) {
+  for (; i + sizeof(size_t) - 1 < limit; i += sizeof(size_t)) {
     size_t value1;
     size_t value2;
     size_t result;
@@ -166,16 +173,19 @@ int BitmapOr(const struct ArrowBitmap *bitmap1,
 
 int BitmapOrBool(const struct ArrowBitmap *bitmap1, bool other,
                  struct ArrowBitmap *out) {
-  if (!(out->buffer.capacity_bytes >= bitmap1->buffer.size_bytes)) {
+  const size_t size_bytes = bitmap1->buffer.size_bytes;
+  if (!(out->buffer.capacity_bytes >= size_bytes)) {
     return -1;
   }
 
   const size_t mask = other ? SIZE_MAX : 0;
   const uint8_t umask = other ? UINT8_MAX : 0;
 
+  const size_t overflow_limit = SIZE_MAX - sizeof(size_t);
+  const size_t limit =
+      size_bytes > overflow_limit ? overflow_limit : size_bytes;
   size_t i = 0;
-  for (; i + sizeof(size_t) - 1 < bitmap1->buffer.size_bytes;
-       i += sizeof(size_t)) {
+  for (; i + sizeof(size_t) - 1 < limit; i += sizeof(size_t)) {
     size_t value1;
     size_t result;
     memcpy(&value1, &bitmap1->buffer.data[i], sizeof(size_t));
@@ -195,15 +205,18 @@ int BitmapOrBool(const struct ArrowBitmap *bitmap1, bool other,
 
 int BitmapAnd(const struct ArrowBitmap *bitmap1,
               const struct ArrowBitmap *bitmap2, struct ArrowBitmap *out) {
+  const size_t size_bytes = bitmap1->buffer.size_bytes;
   if (bitmap1->size_bits != bitmap2->size_bits) {
     return -1;
-  } else if (!(out->buffer.capacity_bytes >= bitmap1->buffer.size_bytes)) {
+  } else if (!(out->buffer.capacity_bytes >= size_bytes)) {
     return -1;
   }
 
+  const size_t overflow_limit = SIZE_MAX - sizeof(size_t);
+  const size_t limit =
+      size_bytes > overflow_limit ? overflow_limit : size_bytes;
   size_t i = 0;
-  for (; i + sizeof(size_t) - 1 < bitmap1->buffer.size_bytes;
-       i += sizeof(size_t)) {
+  for (; i + sizeof(size_t) - 1 < limit; i += sizeof(size_t)) {
     size_t value1;
     size_t value2;
     size_t result;
@@ -225,6 +238,7 @@ int BitmapAnd(const struct ArrowBitmap *bitmap1,
 
 int BitmapAndBool(const struct ArrowBitmap *bitmap1, bool other,
                   struct ArrowBitmap *out) {
+  const size_t size_bytes = bitmap1->buffer.size_bytes;
   if (!(out->buffer.capacity_bytes >= bitmap1->buffer.size_bytes)) {
     return -1;
   }
@@ -232,9 +246,11 @@ int BitmapAndBool(const struct ArrowBitmap *bitmap1, bool other,
   const size_t mask = other ? SIZE_MAX : 0;
   const uint8_t umask = other ? UINT8_MAX : 0;
 
+  const size_t overflow_limit = SIZE_MAX - sizeof(size_t);
+  const size_t limit =
+      size_bytes > overflow_limit ? overflow_limit : size_bytes;
   size_t i = 0;
-  for (; i + sizeof(size_t) - 1 < bitmap1->buffer.size_bytes;
-       i += sizeof(size_t)) {
+  for (; i + sizeof(size_t) - 1 < limit; i += sizeof(size_t)) {
     size_t value1;
     size_t result;
     memcpy(&value1, &bitmap1->buffer.data[i], sizeof(size_t));
@@ -254,15 +270,18 @@ int BitmapAndBool(const struct ArrowBitmap *bitmap1, bool other,
 
 int BitmapXor(const struct ArrowBitmap *bitmap1,
               const struct ArrowBitmap *bitmap2, struct ArrowBitmap *out) {
+  const size_t size_bytes = bitmap1->buffer.size_bytes;
   if (bitmap1->size_bits != bitmap2->size_bits) {
     return -1;
-  } else if (!(out->buffer.capacity_bytes >= bitmap1->buffer.size_bytes)) {
+  } else if (!(out->buffer.capacity_bytes >= size_bytes)) {
     return -1;
   }
 
+  const size_t overflow_limit = SIZE_MAX - sizeof(size_t);
+  const size_t limit =
+      size_bytes > overflow_limit ? overflow_limit : size_bytes;
   size_t i = 0;
-  for (; i + sizeof(size_t) - 1 < bitmap1->buffer.size_bytes;
-       i += sizeof(size_t)) {
+  for (; i + sizeof(size_t) - 1 < limit; i += sizeof(size_t)) {
     size_t value1;
     size_t value2;
     size_t result;
@@ -284,6 +303,7 @@ int BitmapXor(const struct ArrowBitmap *bitmap1,
 
 int BitmapXorBool(const struct ArrowBitmap *bitmap1, bool other,
                   struct ArrowBitmap *out) {
+  const size_t size_bytes = bitmap1->buffer.size_bytes;
   if (!(out->buffer.capacity_bytes >= bitmap1->buffer.size_bytes)) {
     return -1;
   }
@@ -291,9 +311,11 @@ int BitmapXorBool(const struct ArrowBitmap *bitmap1, bool other,
   const size_t mask = other ? SIZE_MAX : 0;
   const uint8_t umask = other ? UINT8_MAX : 0;
 
+  const size_t overflow_limit = SIZE_MAX - sizeof(size_t);
+  const size_t limit =
+      size_bytes > overflow_limit ? overflow_limit : size_bytes;
   size_t i = 0;
-  for (; i + sizeof(size_t) - 1 < bitmap1->buffer.size_bytes;
-       i += sizeof(size_t)) {
+  for (; i + sizeof(size_t) - 1 < limit; i += sizeof(size_t)) {
     size_t value1;
     size_t result;
     memcpy(&value1, &bitmap1->buffer.data[i], sizeof(size_t));
@@ -312,13 +334,16 @@ int BitmapXorBool(const struct ArrowBitmap *bitmap1, bool other,
 }
 
 int BitmapInvert(const struct ArrowBitmap *bitmap, struct ArrowBitmap *out) {
-  if (!(out->buffer.capacity_bytes >= bitmap->buffer.size_bytes)) {
+  const size_t size_bytes = bitmap->buffer.size_bytes;
+  if (!(out->buffer.capacity_bytes >= size_bytes)) {
     return -1;
   }
 
+  const size_t overflow_limit = SIZE_MAX - sizeof(size_t);
+  const size_t limit =
+      size_bytes > overflow_limit ? overflow_limit : size_bytes;
   size_t i = 0;
-  for (; i + sizeof(size_t) - 1 < bitmap->buffer.size_bytes;
-       i += sizeof(size_t)) {
+  for (; i + sizeof(size_t) - 1 < limit; i += sizeof(size_t)) {
     size_t value;
     size_t result;
     memcpy(&value, &bitmap->buffer.data[i], sizeof(size_t));

From 5e9f08c83b330b25b9e1e1520eddb4bf1af3a3f9 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 30 Aug 2023 17:42:49 -0400
Subject: [PATCH 118/126] re-enable cpplint

---
 .pre-commit-config.yaml                          | 16 ++++++++++++++++
 pandas/_libs/include/pandas/bitmask_algorithms.h | 11 ++++++++++-
 pandas/_libs/src/bitmask_algorithms.c            | 12 +++++++++++-
 3 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f024245009d71..f73eba7070025 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -73,6 +73,22 @@ repos:
     -   id: fix-encoding-pragma
         args: [--remove]
     -   id: trailing-whitespace
+-   repo: https://github.com/cpplint/cpplint
+    rev: 1.6.1
+    hooks:
+    -   id: cpplint
+        exclude: |
+            ^pandas/_libs/include/pandas/vendored/klib
+            |pandas/_libs/include/pandas/vendored/nanoarrow.h
+            |pandas/_libs/src/vendored/nanoarrow.c
+        args: [
+            --quiet,
+            '--extensions=c,h',
+            '--headers=h',
+            --recursive,
+            --linelength=88,
+            '--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size'
+        ]
 -   repo: https://github.com/pylint-dev/pylint
     rev: v3.0.0a6
     hooks:
diff --git a/pandas/_libs/include/pandas/bitmask_algorithms.h b/pandas/_libs/include/pandas/bitmask_algorithms.h
index 8598b5ebdd568..fa70b1a472fc4 100644
--- a/pandas/_libs/include/pandas/bitmask_algorithms.h
+++ b/pandas/_libs/include/pandas/bitmask_algorithms.h
@@ -1,4 +1,13 @@
-// The full license is in the LICENSE file, distributed with this software.
+/*
+
+Copyright (c) 2023, PyData Development Team
+All rights reserved.
+
+Distributed under the terms of the BSD Simplified License.
+
+The full license is in the LICENSE file, distributed with this software.
+
+*/
 
 #pragma once
 
diff --git a/pandas/_libs/src/bitmask_algorithms.c b/pandas/_libs/src/bitmask_algorithms.c
index 399705fcbbdd6..6b944729445d0 100644
--- a/pandas/_libs/src/bitmask_algorithms.c
+++ b/pandas/_libs/src/bitmask_algorithms.c
@@ -1,4 +1,14 @@
-// The full license is in the LICENSE file, distributed with this software.
+/*
+
+Copyright (c) 2023, PyData Development Team
+All rights reserved.
+
+Distributed under the terms of the BSD Simplified License.
+
+The full license is in the LICENSE file, distributed with this software.
+
+*/
+
 
 #include <string.h>
 

From 97da641b90b12ef4c32358e25c5dfc23ae4b71c2 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 30 Aug 2023 17:57:37 -0400
Subject: [PATCH 119/126] updated pre-commit

---
 .pre-commit-config.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f73eba7070025..52707d305b886 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -47,6 +47,7 @@ repos:
         types_or: [python, rst, markdown, cython, c]
         additional_dependencies: [tomli]
         exclude: |
+            (?x)
             ^pandas/_libs/include/pandas/vendored/nanoarrow.h
             |pandas/_libs/src/vendored/nanoarrow.c
 -   repo: https://github.com/MarcoGorelli/cython-lint
@@ -78,6 +79,7 @@ repos:
     hooks:
     -   id: cpplint
         exclude: |
+            (?x)
             ^pandas/_libs/include/pandas/vendored/klib
             |pandas/_libs/include/pandas/vendored/nanoarrow.h
             |pandas/_libs/src/vendored/nanoarrow.c

From a3dca8abd0c1b674e48d6f8f89eed094678e5876 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 30 Aug 2023 19:17:28 -0400
Subject: [PATCH 120/126] Fix typing issues

---
 pandas/_libs/hashtable.pyi |  2 +-
 pandas/core/groupby/ops.py | 10 ++++++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/hashtable.pyi b/pandas/_libs/hashtable.pyi
index cd51875eb3df1..3e219e4974a89 100644
--- a/pandas/_libs/hashtable.pyi
+++ b/pandas/_libs/hashtable.pyi
@@ -232,7 +232,7 @@ class IntpHashTable(HashTable): ...
 def duplicated(
     values: np.ndarray,
     keep: Literal["last", "first", False] = ...,
-    mask: npt.NDArray[np.bool_] | None = ...,
+    mask: npt.NDArray[np.bool_] | BitmaskArray | None = ...,
 ) -> npt.NDArray[np.bool_]: ...
 def mode(
     values: np.ndarray, dropna: bool, mask: npt.NDArray[np.bool_] | None = ...
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 71525c8c1a223..2ba913cf1de65 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -22,6 +22,7 @@
     NaT,
     lib,
 )
+from pandas._libs.arrays import BitmaskArray
 import pandas._libs.groupby as libgroupby
 from pandas._typing import (
     ArrayLike,
@@ -309,11 +310,13 @@ def _cython_op_ndim_compat(
         min_count: int,
         ngroups: int,
         comp_ids: np.ndarray,
-        mask: npt.NDArray[np.bool_] | None = None,
+        mask: npt.NDArray[np.bool_] | BitmaskArray | None = None,
         result_mask: npt.NDArray[np.bool_] | None = None,
         **kwargs,
     ) -> np.ndarray:
         if values.ndim == 1:
+            if isinstance(mask, BitmaskArray):
+                mask = mask.to_numpy()
             # expand to 2d, dispatch, then squeeze if appropriate
             values2d = values[None, :]
             if mask is not None:
@@ -353,7 +356,7 @@ def _call_cython_op(
         min_count: int,
         ngroups: int,
         comp_ids: np.ndarray,
-        mask: npt.NDArray[np.bool_] | None,
+        mask: npt.NDArray[np.bool_] | BitmaskArray | None,
         result_mask: npt.NDArray[np.bool_] | None,
         **kwargs,
     ) -> np.ndarray:  # np.ndarray[ndim=2]
@@ -387,6 +390,9 @@ def _call_cython_op(
 
         values = values.T
         if mask is not None:
+            if isinstance(mask, BitmaskArray):
+                mask = mask.to_numpy()
+
             mask = mask.T
             if result_mask is not None:
                 result_mask = result_mask.T

From 1f77d9a7ed58f01a7e04cd849ac6c50000e46ea7 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 30 Aug 2023 21:50:43 -0400
Subject: [PATCH 121/126] more cleanups

---
 pandas/core/algorithms.py                       |  9 +++++++--
 pandas/core/arrays/masked.py                    |  5 +++--
 pandas/core/tools/numeric.py                    | 12 +++++++-----
 pandas/tests/arrays/categorical/test_astype.py  |  9 ---------
 pandas/tests/arrays/floating/test_arithmetic.py |  3 +--
 5 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 9769e37829ec8..ba3d9fca3fea1 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -443,9 +443,14 @@ def unique_with_mask(values, mask: npt.NDArray[np.bool_] | BitmaskArray | None =
         return uniques
 
     else:
-        uniques, np_mask = table.unique(values, mask=mask)
+        uniques, mask = table.unique(values, mask=mask)
         uniques = _reconstruct_data(uniques, original.dtype, original)
-        return uniques, np_mask.astype("bool")
+
+        if isinstance(mask, BitmaskArray):
+            mask = mask.to_numpy()
+
+        assert mask is not None  # for mypy
+        return uniques, mask.astype("bool")
 
 
 unique1d = unique
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 8883095b56b09..a74a803d862e5 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -1151,6 +1151,7 @@ def _quantile(
     def _reduce(
         self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
     ):
+        mask: BitmaskArray | np.ndarray
         if name in {"any", "all", "min", "max", "sum", "prod", "mean", "var", "std"}:
             result = getattr(self, name)(skipna=skipna, **kwargs)
         else:
@@ -1166,8 +1167,8 @@ def _reduce(
                 return self._wrap_na_result(name=name, axis=0, mask_size=(1,))
             else:
                 result = result.reshape(1)
-                np_mask = np.zeros(1, dtype=bool)
-                return self._maybe_mask_result(result, np_mask)
+                mask = np.zeros(1, dtype=bool)
+                return self._maybe_mask_result(result, mask)
 
         if isna(result):
             return libmissing.NA
diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py
index 23dc964246b91..28612e7aef6ef 100644
--- a/pandas/core/tools/numeric.py
+++ b/pandas/core/tools/numeric.py
@@ -8,6 +8,7 @@
 import numpy as np
 
 from pandas._libs import lib
+from pandas._libs.arrays import BitmaskArray
 from pandas.util._validators import check_dtype_backend
 
 from pandas.core.dtypes.cast import maybe_downcast_numeric
@@ -201,10 +202,10 @@ def to_numeric(
 
     # GH33013: for IntegerArray & FloatingArray extract non-null values for casting
     # save mask to reconstruct the full array after casting
-    mask: npt.NDArray[np.bool_] | None = None
+    mask: npt.NDArray[np.bool_] | BitmaskArray | None = None
     if isinstance(values, BaseMaskedArray):
-        mask = values._mask.to_numpy()
-        values = values._data[~mask]
+        mask = values._mask
+        values = values._data[~mask]  # type: ignore[call-overload]
 
     values_dtype = getattr(values, "dtype", None)
     if isinstance(values_dtype, ArrowDtype):
@@ -278,8 +279,9 @@ def to_numeric(
         if mask is None or (new_mask is not None and new_mask.shape == mask.shape):
             # GH 52588
             mask = new_mask
-
-        assert isinstance(mask, np.ndarray)
+        else:
+            mask = mask.copy()
+        assert isinstance(mask, (np.ndarray, BitmaskArray))
         data = np.zeros(mask.shape, dtype=values.dtype)
         data[~mask] = values
 
diff --git a/pandas/tests/arrays/categorical/test_astype.py b/pandas/tests/arrays/categorical/test_astype.py
index ace785e6ae5c8..94b095fc0fa91 100644
--- a/pandas/tests/arrays/categorical/test_astype.py
+++ b/pandas/tests/arrays/categorical/test_astype.py
@@ -145,12 +145,3 @@ def test_astype_object_timestamp_categories(self):
         result = cat.astype(object)
         expected = np.array([Timestamp("2014-01-01 00:00:00")], dtype="object")
         tm.assert_numpy_array_equal(result, expected)
-
-    @pytest.mark.skip(reason="Not applicable with bitmask backed arrays")
-    def test_astype_category_readonly_mask_values(self):
-        # GH#53658
-        arr = array([0, 1, 2], dtype="Int64")
-        arr._mask.flags["WRITEABLE"] = False
-        result = arr.astype("category")
-        expected = array([0, 1, 2], dtype="Int64").astype("category")
-        tm.assert_extension_array_equal(result, expected)
diff --git a/pandas/tests/arrays/floating/test_arithmetic.py b/pandas/tests/arrays/floating/test_arithmetic.py
index 052f38dfce5af..056c22d8c1131 100644
--- a/pandas/tests/arrays/floating/test_arithmetic.py
+++ b/pandas/tests/arrays/floating/test_arithmetic.py
@@ -88,8 +88,7 @@ def test_pow_scalar(dtype):
 
     result = np.nan**a
     expected = FloatingArray(
-        np.array([1, np.nan, np.nan, np.nan], dtype=dtype.numpy_dtype),
-        mask=a._mask,
+        np.array([1, np.nan, np.nan, np.nan], dtype=dtype.numpy_dtype), mask=a._mask
     )
     tm.assert_extension_array_equal(result, expected)
 

From 6a56ec1884ba861205fbf1f152b7b78d7f229040 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 6 Sep 2023 16:22:30 -0400
Subject: [PATCH 122/126] remove cast

---
 pandas/core/algorithms.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index ac3c6830fa006..565a7f3b36cae 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -446,11 +446,8 @@ def unique_with_mask(values, mask: npt.NDArray[np.bool_] | BitmaskArray | None =
         uniques, mask = table.unique(values, mask=mask)
         uniques = _reconstruct_data(uniques, original.dtype, original)
 
-        if isinstance(mask, BitmaskArray):
-            mask = mask.to_numpy()
-
         assert mask is not None  # for mypy
-        return uniques, mask.astype("bool")
+        return uniques, mask
 
 
 unique1d = unique

From 23fb76d427ba6d03e80be0dfff88b06ae2dde16c Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 6 Sep 2023 17:11:11 -0400
Subject: [PATCH 123/126] less diff

---
 pandas/core/algorithms.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 565a7f3b36cae..de9e6a894669b 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -445,7 +445,6 @@ def unique_with_mask(values, mask: npt.NDArray[np.bool_] | BitmaskArray | None =
     else:
         uniques, mask = table.unique(values, mask=mask)
         uniques = _reconstruct_data(uniques, original.dtype, original)
-
         assert mask is not None  # for mypy
         return uniques, mask
 

From 3fb26ec41603c2d2b5b713a81536ef91eb9e3b7d Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 15 Sep 2023 11:54:32 -0400
Subject: [PATCH 124/126] reverted cythonized is_null_slice

---
 pandas/_libs/arrays.pyx |  2 +-
 pandas/_libs/lib.pyi    |  1 -
 pandas/_libs/lib.pyx    | 19 -------------------
 pandas/core/common.py   |  7 ++++++-
 4 files changed, 7 insertions(+), 22 deletions(-)

diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index fd8382c13e6c3..8f0f0f58db983 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -20,7 +20,7 @@ from numpy cimport (
     uint8_t,
 )
 
-from pandas._libs.lib import is_null_slice
+from pandas.core.common import is_null_slice
 
 cnp.import_array()
 
diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
index 946cbb7dbf6f1..15bd5a7379105 100644
--- a/pandas/_libs/lib.pyi
+++ b/pandas/_libs/lib.pyi
@@ -44,7 +44,6 @@ def is_iterator(obj: object) -> bool: ...
 def is_scalar(val: object) -> bool: ...
 def is_list_like(obj: object, allow_sets: bool = ...) -> bool: ...
 def is_pyarrow_array(obj: object) -> bool: ...
-def is_null_slice(obj: object) -> bool: ...
 def is_period(val: object) -> TypeGuard[Period]: ...
 def is_interval(val: object) -> TypeGuard[Interval]: ...
 def is_decimal(val: object) -> TypeGuard[Decimal]: ...
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index a9e23b95d9b7d..0c0610f72044e 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -29,7 +29,6 @@ from cpython.object cimport (
 )
 from cpython.ref cimport Py_INCREF
 from cpython.sequence cimport PySequence_Check
-from cpython.slice cimport PySlice_Unpack
 from cpython.tuple cimport (
     PyTuple_New,
     PyTuple_SET_ITEM,
@@ -72,7 +71,6 @@ cdef extern from "Python.h":
     # Note: importing extern-style allows us to declare these as nogil
     # functions, whereas `from cpython cimport` does not.
     bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil
-    cdef Py_ssize_t PY_SSIZE_T_MAX
 
 cdef extern from "numpy/arrayobject.h":
     # cython's numpy.dtype specification is incorrect, which leads to
@@ -1252,23 +1250,6 @@ def is_pyarrow_array(obj):
     return False
 
 
-def is_null_slice(obj):
-    """
-    Return True if given object
-    """
-    cdef Py_ssize_t start, stop, step
-    if isinstance(obj, slice):
-        try:
-            PySlice_Unpack(obj, &start, &stop, &step)
-        except TypeError:
-            return False
-
-        if start == 0 and stop == PY_SSIZE_T_MAX and step == 1:
-            return True
-
-    return False
-
-
 _TYPE_MAP = {
     "categorical": "categorical",
     "category": "categorical",
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 2b243ec21818d..8fd8b10c6fc32 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -307,7 +307,12 @@ def is_null_slice(obj) -> bool:
     """
     We have a null slice.
     """
-    return lib.is_null_slice(obj)
+    return (
+        isinstance(obj, slice)
+        and obj.start is None
+        and obj.stop is None
+        and obj.step is None
+    )
 
 
 def is_empty_slice(obj) -> bool:

From 541de2e9c1b27445668bf331aa510cb1b4e7dc4b Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 15 Sep 2023 11:55:22 -0400
Subject: [PATCH 125/126] remove xfail of test

---
 pandas/tests/frame/indexing/test_where.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py
index d08701bf5035c..1eb67671da0b8 100644
--- a/pandas/tests/frame/indexing/test_where.py
+++ b/pandas/tests/frame/indexing/test_where.py
@@ -4,8 +4,6 @@
 import numpy as np
 import pytest
 
-from pandas.compat.numpy import np_version_gte1p24
-
 from pandas.core.dtypes.common import is_scalar
 
 import pandas as pd
@@ -707,11 +705,6 @@ def test_where_categorical_filtering(self):
 
         tm.assert_equal(result, expected)
 
-    @pytest.mark.xfail(
-        not np_version_gte1p24,
-        reason="Changed NumPy behavior for >1D non-tuple sequence indexing",
-        strict=False,
-    )
     def test_where_ea_other(self):
         # GH#38729/GH#38742
         df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})

From 34bc194b199735c842439c4f32ff3f06a3bc4e0d Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 15 Sep 2023 11:59:54 -0400
Subject: [PATCH 126/126] change assert to ignore

---
 pandas/core/arrays/masked.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index a74a803d862e5..1fc1dcb5578aa 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -198,8 +198,7 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:
                 return self.dtype.na_value
             return self._data[item]
 
-        assert not isinstance(newmask, bool)  # for mypy
-        return self._simple_new(self._data[item], newmask)
+        return self._simple_new(self._data[item], newmask)  # type: ignore[arg-type]
 
     def _pad_or_backfill(
         self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True