Merge pull request #4219 from jepler/bit_transpose

jepler · web-flow · commit 1cd4e4552e1a · 2021-02-18T20:29:05.000-06:00
Add Bit transpose function to support piopixl8
diff --git a/locale/circuitpython.pot b/locale/circuitpython.pot
@@ -1092,6 +1092,11 @@ msgstr ""
 msgid "Initialization failed due to lack of memory"
 msgstr ""
 
+#: shared-bindings/bitops/__init__.c
+#, c-format
+msgid "Input buffer length (%d) must be a multiple of the strand count (%d)"
+msgstr ""
+
 #: ports/atmel-samd/common-hal/pulseio/PulseIn.c
 msgid "Input taking too long"
 msgstr ""
@@ -1659,6 +1664,11 @@ msgstr ""
 msgid "Out of sockets"
 msgstr ""
 
+#: shared-bindings/bitops/__init__.c
+#, c-format
+msgid "Output buffer must be at least %d bytes"
+msgstr ""
+
 #: shared-bindings/audiobusio/PDMIn.c
 msgid "Oversample must be multiple of 8."
 msgstr ""
@@ -4101,6 +4111,11 @@ msgstr ""
 msgid "watchdog timeout must be greater than 0"
 msgstr ""
 
+#: shared-bindings/bitops/__init__.c
+#, c-format
+msgid "width must be from 2 to 8 (inclusive), not %d"
+msgstr ""
+
 #: shared-bindings/rgbmatrix/RGBMatrix.c
 msgid "width must be greater than zero"
 msgstr ""
diff --git a/ports/raspberrypi/mpconfigport.mk b/ports/raspberrypi/mpconfigport.mk
@@ -24,6 +24,7 @@ CIRCUITPY_NEOPIXEL_WRITE = 0
 endif
 
 CIRCUITPY_FULL_BUILD = 1
+CIRCUITPY_BITOPS = 1
 CIRCUITPY_PWMIO = 1
 
 # Things that need to be implemented.
diff --git a/py/circuitpy_defns.mk b/py/circuitpy_defns.mk
@@ -132,6 +132,9 @@ endif
 ifeq ($(CIRCUITPY_AUDIOMP3),1)
 SRC_PATTERNS += audiomp3/%
 endif
+ifeq ($(CIRCUITPY_BITOPS),1)
+SRC_PATTERNS += bitops/%
+endif
 ifeq ($(CIRCUITPY_BITBANGIO),1)
 SRC_PATTERNS += bitbangio/%
 endif
@@ -466,6 +469,7 @@ SRC_SHARED_MODULE_ALL = \
 	bitbangio/OneWire.c \
 	bitbangio/SPI.c \
 	bitbangio/__init__.c \
+	bitops/__init__.c \
 	board/__init__.c \
 	adafruit_bus_device/__init__.c \
 	adafruit_bus_device/I2CDevice.c \
diff --git a/py/circuitpy_mpconfig.h b/py/circuitpy_mpconfig.h
@@ -299,6 +299,14 @@ extern const struct _mp_obj_module_t audiopwmio_module;
 #define BINASCII_MODULE
 #endif
 
+#if CIRCUITPY_BITOPS
+extern const struct _mp_obj_module_t bitops_module;
+#define BITOPS_MODULE        { MP_OBJ_NEW_QSTR(MP_QSTR_bitops),(mp_obj_t)&bitops_module },
+#else
+#define BITOPS_MODULE
+#endif
+
+
 #if CIRCUITPY_BITBANGIO
 #define BITBANGIO_MODULE       { MP_OBJ_NEW_QSTR(MP_QSTR_bitbangio), (mp_obj_t)&bitbangio_module },
 extern const struct _mp_obj_module_t bitbangio_module;
@@ -819,6 +827,7 @@ extern const struct _mp_obj_module_t msgpack_module;
     AUDIOMP3_MODULE \
     AUDIOPWMIO_MODULE \
     BINASCII_MODULE \
+    BITOPS_MODULE \
     BITBANGIO_MODULE \
     BLEIO_MODULE \
     BOARD_MODULE \
diff --git a/py/circuitpy_mpconfig.mk b/py/circuitpy_mpconfig.mk
@@ -89,6 +89,9 @@ CFLAGS += -DCIRCUITPY_AUDIOMP3=$(CIRCUITPY_AUDIOMP3)
 CIRCUITPY_BINASCII ?= $(CIRCUITPY_FULL_BUILD)
 CFLAGS += -DCIRCUITPY_BINASCII=$(CIRCUITPY_BINASCII)
 
+CIRCUITPY_BITOPS ?= 0
+CFLAGS += -DCIRCUITPY_BITOPS=$(CIRCUITPY_BITOPS)
+
 CIRCUITPY_BITBANGIO ?= $(CIRCUITPY_FULL_BUILD)
 CFLAGS += -DCIRCUITPY_BITBANGIO=$(CIRCUITPY_BITBANGIO)
 
diff --git a/shared-bindings/bitops/__init__.c b/shared-bindings/bitops/__init__.c
@@ -0,0 +1,101 @@
+/*
+ * This file is part of the Circuit Python project, https://github.com/adafruit/circuitpython
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2021 Jeff Epler for Adafruit Industries
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "py/obj.h"
+#include "py/runtime.h"
+
+#include "shared-bindings/bitops/__init__.h"
+
+//| """Routines for low-level manipulation of binary data"""
+//|
+//|
+
+//| def bit_transpose(input: ReadableBuffer, output: WriteableBuffer, width:int = 8) -> WriteableBuffer:
+//|     """"Transpose" a buffer by assembling each output byte with bits taken from each of ``width`` different input bytes.
+//|
+//|     This can be useful to convert a sequence of pixel values into a single
+//|     stream of bytes suitable for sending via a parallel conversion method.
+//|
+//|     The number of bytes in the input buffer must be a multiple of the width,
+//|     and the width can be any value from 2 to 8.  If the width is fewer than 8,
+//|     then the remaining (less significant) bits of the output are set to zero.
+//|
+//|     Let ``stride = len(input)//width``.  Then the first byte is made out of the
+//|     most significant bits of ``[input[0], input[stride], input[2*stride], ...]``.
+//|     The second byte is made out of the second bits, and so on until the 8th output
+//|     byte which is made of the first bits of ``input[1], input[1+stride,
+//|     input[2*stride], ...]``.
+//|
+//|     The required output buffer size is ``len(input) * 8  // width``.
+//|
+//|     Returns the output buffer."""
+//|     ...
+
+STATIC mp_obj_t bit_transpose(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    enum { ARG_input, ARG_output, ARG_width };
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_input, MP_ARG_OBJ | MP_ARG_REQUIRED },
+        { MP_QSTR_output, MP_ARG_OBJ | MP_ARG_REQUIRED },
+        { MP_QSTR_width, MP_ARG_INT, { .u_int = 8 } },
+    };
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    int width = args[ARG_width].u_int;
+    if (width < 2 || width > 8) {
+        mp_raise_ValueError_varg(translate("width must be from 2 to 8 (inclusive), not %d"), width);
+    }
+
+    mp_buffer_info_t input_bufinfo;
+    mp_get_buffer_raise(args[ARG_input].u_obj, &input_bufinfo, MP_BUFFER_READ);
+    int inlen = input_bufinfo.len;
+    if (inlen % width != 0) {
+        mp_raise_ValueError_varg(translate("Input buffer length (%d) must be a multiple of the strand count (%d)"), inlen, width);
+    }
+
+    mp_buffer_info_t output_bufinfo;
+    mp_get_buffer_raise(args[ARG_output].u_obj, &output_bufinfo, MP_BUFFER_WRITE);
+    int avail = output_bufinfo.len;
+    int outlen = 8 * (inlen / width);
+    if (avail < outlen) {
+        mp_raise_ValueError_varg(translate("Output buffer must be at least %d bytes"), outlen);
+    }
+    common_hal_bitops_bit_transpose(output_bufinfo.buf, input_bufinfo.buf, inlen, width);
+    return args[ARG_output].u_obj;
+}
+STATIC MP_DEFINE_CONST_FUN_OBJ_KW(bitops_bit_transpose_obj, 1, bit_transpose);
+
+STATIC const mp_rom_map_elem_t bitops_module_globals_table[] = {
+    { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_bitops) },
+    { MP_ROM_QSTR(MP_QSTR_bit_transpose), MP_ROM_PTR(&bitops_bit_transpose_obj) },
+};
+
+STATIC MP_DEFINE_CONST_DICT(bitops_module_globals, bitops_module_globals_table);
+
+const mp_obj_module_t bitops_module = {
+    .base = { &mp_type_module },
+    .globals = (mp_obj_dict_t*)&bitops_module_globals,
+};
diff --git a/shared-bindings/bitops/__init__.h b/shared-bindings/bitops/__init__.h
@@ -0,0 +1,32 @@
+/*
+ * This file is part of the Circuit Python project, https://github.com/adafruit/circuitpython
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2021 Jeff Epler
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <stdint.h>
+#include <stdlib.h>
+
+void common_hal_bitops_bit_transpose(uint8_t *result, const uint8_t *src, size_t inlen, size_t num_strands);
diff --git a/shared-module/bitops/__init__.c b/shared-module/bitops/__init__.c
@@ -0,0 +1,151 @@
+/*
+ * This file is part of the Circuit Python project, https://github.com/adafruit/circuitpython
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2021 Jeff Epler for Adafruit Industries
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "shared-bindings/bitops/__init__.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef __GNUC__
+#define FALLTHROUGH __attribute__((fallthrough))
+#else
+#define FALLTHROUGH ((void)0) /* FALLTHROUGH */
+#endif
+
+// adapted from "Hacker's Delight" - Figure 7-2 Transposing an 8x8-bit matrix
+// basic idea is:
+// > First, treat the 8x8-bit matrix as 16 2x2-bit matrices, and transpose each
+// > of the 16 2x2-bit matrices. Second, treat the matrix as four 2x2 submatrices
+// > whose elements are 2x2-bit matrices and transpose each of the four 2x2
+// > submatrices. Finally, treat the matrix as a 2x2 matrix whose elements are
+// > 4x4-bit matrices, and transpose the 2x2 matrix. These transformations are
+// > illustrated below.
+// We want a different definition of bit/byte order, deal with strides differently, etc.
+// so the code is heavily re-worked compared to the original.
+static void transpose_var(uint32_t *result, const uint8_t *src, int src_stride, int num_strands) {
+    uint32_t x = 0, y = 0, t;
+
+    src += (num_strands-1) * src_stride;
+
+    switch(num_strands) {
+    case 7:
+        x |= *src << 16;
+        src -= src_stride;
+        FALLTHROUGH;
+    case 6:
+        x |= *src << 8;
+        src -= src_stride;
+        FALLTHROUGH;
+    case 5:
+        x |= *src;
+        src -= src_stride;
+        FALLTHROUGH;
+    case 4:
+        y |= *src << 24;
+        src -= src_stride;
+        FALLTHROUGH;
+    case 3:
+        y |= *src << 16;
+        src -= src_stride;
+        FALLTHROUGH;
+    case 2:
+        y |= *src << 8;
+        src -= src_stride;
+        y |= *src;
+    }
+
+    t = (x ^ (x >> 7)) & 0x00AA00AA;  x = x ^ t ^ (t << 7);
+    t = (y ^ (y >> 7)) & 0x00AA00AA;  y = y ^ t ^ (t << 7);
+
+    t = (x ^ (x >>14)) & 0x0000CCCC;  x = x ^ t ^ (t <<14);
+    t = (y ^ (y >>14)) & 0x0000CCCC;  y = y ^ t ^ (t <<14);
+
+    t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
+    y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
+    x = t;
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    x = __builtin_bswap32(x);
+    y = __builtin_bswap32(y);
+#endif
+    result[0] = x;
+    result[1] = y;
+}
+
+static void transpose_8(uint32_t *result, const uint8_t *src, int src_stride) {
+    uint32_t x, y, t;
+
+    y = *src; src += src_stride;
+    y |= (*src << 8); src += src_stride;
+    y |= (*src << 16); src += src_stride;
+    y |= (*src << 24); src += src_stride;
+    x = *src; src += src_stride;
+    x |= (*src << 8); src += src_stride;
+    x |= (*src << 16); src += src_stride;
+    x |= (*src << 24); src += src_stride;
+
+    t = (x ^ (x >> 7)) & 0x00AA00AA;  x = x ^ t ^ (t << 7);
+    t = (y ^ (y >> 7)) & 0x00AA00AA;  y = y ^ t ^ (t << 7);
+
+    t = (x ^ (x >>14)) & 0x0000CCCC;  x = x ^ t ^ (t <<14);
+    t = (y ^ (y >>14)) & 0x0000CCCC;  y = y ^ t ^ (t <<14);
+
+    t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
+    y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
+    x = t;
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    x = __builtin_bswap32(x);
+    y = __builtin_bswap32(y);
+#endif
+    result[0] = x;
+    result[1] = y;
+}
+
+static void bit_transpose_8(uint32_t *result, const uint8_t *src, size_t src_stride, size_t n) {
+    for(size_t i=0; i<n; i++) {
+        transpose_8(result, src, src_stride);
+        result += 2;
+        src += 1;
+    }
+}
+
+static void bit_transpose_var(uint32_t *result, const uint8_t *src, size_t src_stride, size_t n, int num_strands) {
+    for(size_t i=0; i<n; i++) {
+        transpose_var(result, src, src_stride, num_strands);
+        result += 2;
+        src += 1;
+    }
+}
+
+void common_hal_bitops_bit_transpose(uint8_t *result, const uint8_t *src, size_t inlen, size_t num_strands) {
+    if(num_strands == 8) {
+        bit_transpose_8((uint32_t*)(void*)result, src, inlen/8, inlen/8);
+    } else  {
+        bit_transpose_var((uint32_t*)(void*)result, src, inlen/num_strands, inlen/num_strands, num_strands);
+    }
+}
diff --git a/shared-module/bitops/__init__.h b/shared-module/bitops/__init__.h