Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions python/tflite_micro/numpy_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ int TfLiteTypeToPyArrayType(TfLiteType tf_lite_type) {
case kTfLiteInt4:
// TODO(b/246806634): NPY_INT4 currently doesn't exist
return NPY_BYTE;
case kTfLiteInt2:
// TODO(b/246806634): NPY_INT2 currently doesn't exist
return NPY_BYTE;
case kTfLiteInt8:
return NPY_INT8;
case kTfLiteInt64:
Expand Down
1 change: 1 addition & 0 deletions tensorflow/compiler/mlir/lite/core/c/tflite_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ typedef enum {
kTfLiteUInt16 = 17,
kTfLiteInt4 = 18,
kTfLiteBFloat16 = 19,
kTfLiteInt2 = 20,
} TfLiteType;
// LINT.ThenChange(//tensorflow/lite/profiling/proto/model_runtime_info.proto:EdgeDataType)

Expand Down
1 change: 1 addition & 0 deletions tensorflow/compiler/mlir/lite/schema/schema.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ enum TensorType : byte {
UINT16 = 16,
INT4 = 17,
BFLOAT16 = 18,
INT2 = 19,
}

// Custom quantization parameters for experimenting with new quantization
Expand Down
3 changes: 3 additions & 0 deletions tensorflow/lite/core/api/flatbuffer_conversions.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1088,6 +1088,9 @@ TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
case TensorType_INT4:
*type = kTfLiteInt4;
return kTfLiteOk;
case TensorType_INT2:
*type = kTfLiteInt2;
return kTfLiteOk;
default:
*type = kTfLiteNoType;
TF_LITE_REPORT_ERROR(error_reporter,
Expand Down
2 changes: 2 additions & 0 deletions tensorflow/lite/core/c/common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,8 @@ const char* TfLiteTypeGetName(TfLiteType type) {
return "VARIANT";
case kTfLiteInt4:
return "INT4";
case kTfLiteInt2:
return "INT2";
}
return "Unknown type";
}
Expand Down
98 changes: 83 additions & 15 deletions tensorflow/lite/kernels/internal/portable_tensor_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"

#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstdint>

Expand Down Expand Up @@ -92,23 +93,90 @@ void UnpackDenseInt4IntoInt8(const int8_t* src_buffer, int num_elements,
}
}

void PackInt8IntoDenseInt4(const int8_t* src_buffer, int num_elements,
int8_t* dst_buffer) {
// num_elements means the number of elements regardless of packed or unpacked.
// For example, 3 elements means both
// 1) Packed: 3 int4's = 12 bit -> 16 bits (padded) = 2 bytes.
// stored in src_buffer[0] and src_buffer[1] (i = 0..1)
// 2) Unpacked: 3 int8's = 3 bytes.
// stored in dst_buffer[0], dst_buffer[1] and dst_buffer[2] (j = 0..2)
for (int i = 0; i < num_elements - 1; i += 2) {
dst_buffer[i / 2] = src_buffer[i] & 0x0F;
dst_buffer[i / 2] |= src_buffer[i + 1] << 4;
void UnpackPackedIntToInt8(const int8_t* src_buffer, int num_elements,
int bit_width, int8_t* dst_buffer) {
assert(bit_width == 2 || bit_width == 4);
if (bit_width == 4) {
// num_elements means the number of elements regardless of packed or
// unpacked. For example, 3 elements means both
// 1) Packed: 3 int4's = 12 bit -> 16 bits (padded) = 2 bytes.
// stored in src_buffer[0] and src_buffer[1] (i = 0..1)
// 2) Unpacked: 3 int8's = 3 bytes.
//. stored in dst_buffer[0], dst_buffer[1] and dst_buffer[2] (j = 0..2)
for (int i = 0; i < num_elements / 2; i++) {
int8_t byte = src_buffer[i];
// Shift left first so that sign is properly extended when shifted right
int8_t lower = static_cast<int8_t>(byte << 4) >> 4;
int8_t higher = byte >> 4;
dst_buffer[2 * i] = lower;
dst_buffer[2 * i + 1] = higher;
}

// If the buffer size is odd, extract the final lower nibble.
if (num_elements % 2 != 0) {
dst_buffer[num_elements - 1] =
static_cast<int8_t>(src_buffer[num_elements / 2] << 4) >> 4;
}
} else if (bit_width == 2) {
for (int i = 0; i < num_elements / 4; i++) {
int8_t byte = src_buffer[i];
// Shift left first so that sign is properly extended when shifted right
int8_t val1 = static_cast<int8_t>(byte << 6) >> 6;
int8_t val2 = static_cast<int8_t>((byte << 4) & 0xFF) >> 6;
int8_t val3 = static_cast<int8_t>((byte << 2) & 0xFF) >> 6;
int8_t val4 = byte >> 6;
dst_buffer[4 * i] = val1;
dst_buffer[4 * i + 1] = val2;
dst_buffer[4 * i + 2] = val3;
dst_buffer[4 * i + 3] = val4;
}

// Handle the remaining elements.
int remaining_elements = num_elements % 4;
if (remaining_elements > 0) {
int8_t byte = src_buffer[num_elements / 4];
for (int i = 0; i < remaining_elements; i++) {
dst_buffer[num_elements - remaining_elements + i] =
static_cast<int8_t>((byte << (6 - 2 * i)) & 0xFF) >> 6;
}
}
}
auto packed_size = (num_elements + 1) / 2;
}

// Copy the final nibble if the buffer is odd-lengthed
if (num_elements % 2 != 0) {
dst_buffer[packed_size - 1] = src_buffer[num_elements - 1] & 0x0F;
void PackInt8IntoDenseInt(const int8_t* src_buffer, int num_elements,
int bit_width, int8_t* dst_buffer) {
assert(bit_width == 2 || bit_width == 4);
if (bit_width == 4) {
// num_elements means the number of elements regardless of packed or
// unpacked. For example, 3 elements means both
// 1) Unpacked: 3 int8's = 3 bytes.
// stored in src_buffer[0], src_buffer[1] and src_buffer[2] (j = 0..2)
// 2) Packed: 3 int4's = 12 bit -> 16 bits (padded) = 2 bytes.
// stored in dst_buffer[0] and dst_buffer[1] (i = 0..1)
for (int i = 0; i < num_elements / 2; ++i) {
dst_buffer[i] = (src_buffer[2 * i] & 0x0F) | (src_buffer[2 * i + 1] << 4);
}
// If the buffer size is odd, pack the final nibble.
if (num_elements % 2 != 0) {
dst_buffer[num_elements / 2] = src_buffer[num_elements - 1] & 0x0F;
}
} else if (bit_width == 2) {
for (int i = 0; i < num_elements / 4; ++i) {
dst_buffer[i] = (src_buffer[4 * i] & 0x03) |
((src_buffer[4 * i + 1] & 0x03) << 2) |
((src_buffer[4 * i + 2] & 0x03) << 4) |
((src_buffer[4 * i + 3] & 0x03) << 6);
}
// Handle the remaining elements.
int remaining_elements = num_elements % 4;
if (remaining_elements > 0) {
int8_t packed_val = 0;
for (int i = 0; i < remaining_elements; ++i) {
packed_val |= (src_buffer[num_elements - remaining_elements + i] & 0x03)
<< (i * 2);
}
dst_buffer[num_elements / 4] = packed_val;
}
}
}

Expand Down
33 changes: 27 additions & 6 deletions tensorflow/lite/kernels/internal/portable_tensor_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -618,20 +618,41 @@ void ApplySignbitToVector(const float* __restrict__ vector, int v_size,
void UnpackDenseInt4IntoInt8(const int8_t* src_buffer, int num_elements,
int8_t* dst_buffer);

// Pack `src_buffer` into a densely packed buffer of int4 values.
// Unpack or inflate `src_buffer` by taking each byte and splitting it into
// multiple elements into `dst_buffer`. Supports 2-bit and 4-bit packed integers
// Parameters:
// src_buffer : Buffer containing int4 values stored in int8 memory.
// src_buffer : Densely packed buffer containing int2 or int4 values.
// num_elements : Number of unpacked elements to be read from the buffer.
// This should be equal to the size of `dst_buffer`.
// bit_width : The bit width of the packed elements (either 2 or 4).
// dst_buffer : Buffer to unpack into. Should be allocated by the caller.
// Size should be at least `num_elements`.
// Notes:
// For 4-bit unpacking: e.g., `src_buffer = {0x12, 0x34};` (num_elements = 4)
// will return `dst_buffer = {0x02, 0x01, 0x04, 0x03}`.
// For 2-bit unpacking: e.g., `src_buffer = {0x12};` (num_elements = 4)
// will return `dst_buffer = {0x02, 0x00, 0x01, 0x00}` (sign extended).
void UnpackPackedIntToInt8(const int8_t* src_buffer, int num_elements,
int bit_width, int8_t* dst_buffer);

// Pack `src_buffer` into a densely packed buffer of int2 or int4 values.
// Parameters:
// src_buffer : Buffer containing int2 or int4 values stored in int8
// memory.
// num_elements : Number of elements stored in the buffer. Note that this can
// be smaller than the size of `src_buffer` by 1 if it's odd,
// in which case the last nibble in `src_buffer` is ignored.
// This should be equal to the size of `dst_buffer`.
// bit_width : The bit width of the packed elements (either 2 or 4).
// dst_buffer : Buffer to pack into. Should be allocated by the caller.
// Size should be at least `num_elements`.
// Notes:
// For example, given `src_buffer = {0x02, 0x01, 0x04, 0x03}`, calling this
// function will return `dst_buffer = {0x12, 0x34}`.
void PackInt8IntoDenseInt4(const int8_t* src_buffer, int num_elements,
int8_t* dst_buffer);
// For 4-bit packing: e.g., given `src_buffer = {0x02, 0x01, 0x04, 0x03}`,
// calling this function will return `dst_buffer = {0x12, 0x34}`.
// For 2-bit packing: e.g., given `src_buffer = {0x00, 0x01, 0x00, 0x02}`,
// calling this function will return `dst_buffer = {0x84}`.
void PackInt8IntoDenseInt(const int8_t* src_buffer, int num_elements,
int bit_width, int8_t* dst_buffer);
} // namespace tensor_utils

} // namespace tflite
Expand Down
3 changes: 3 additions & 0 deletions tensorflow/lite/micro/tools/layer_by_layer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ TfLiteStatus ConvertTensorType(TfLiteType type, TensorTypes& tensor_type) {
case kTfLiteInt4:
tensor_type = TensorTypes_INT4;
return kTfLiteOk;
case kTfLiteInt2:
tensor_type = TensorTypes_INT2;
return kTfLiteOk;
case kTfLiteNoType:
MicroPrintf("Unsupported data type %d in tensor\n", tensor_type);
return kTfLiteError;
Expand Down
1 change: 1 addition & 0 deletions tensorflow/lite/micro/tools/layer_by_layer_schema.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ enum TensorTypes : byte {
UINT16 = 16,
INT4 = 17,
BFLOAT16 = 18,
INT2 = 19,
}

table TensorData {
Expand Down
13 changes: 8 additions & 5 deletions tensorflow/lite/micro/tools/layer_by_layer_schema_generated.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,12 @@ enum TensorTypes : int8_t {
TensorTypes_UINT16 = 16,
TensorTypes_INT4 = 17,
TensorTypes_BFLOAT16 = 18,
TensorTypes_INT2 = 19,
TensorTypes_MIN = TensorTypes_FLOAT32,
TensorTypes_MAX = TensorTypes_BFLOAT16
TensorTypes_MAX = TensorTypes_INT2
};

inline const TensorTypes (&EnumValuesTensorTypes())[19] {
inline const TensorTypes (&EnumValuesTensorTypes())[20] {
static const TensorTypes values[] = {
TensorTypes_FLOAT32,
TensorTypes_FLOAT16,
Expand All @@ -83,13 +84,14 @@ inline const TensorTypes (&EnumValuesTensorTypes())[19] {
TensorTypes_UINT32,
TensorTypes_UINT16,
TensorTypes_INT4,
TensorTypes_BFLOAT16
TensorTypes_BFLOAT16,
TensorTypes_INT2
};
return values;
}

inline const char * const *EnumNamesTensorTypes() {
static const char * const names[20] = {
static const char * const names[21] = {
"FLOAT32",
"FLOAT16",
"INT32",
Expand All @@ -109,13 +111,14 @@ inline const char * const *EnumNamesTensorTypes() {
"UINT16",
"INT4",
"BFLOAT16",
"INT2",
nullptr
};
return names;
}

inline const char *EnumNameTensorTypes(TensorTypes e) {
if (::flatbuffers::IsOutRange(e, TensorTypes_FLOAT32, TensorTypes_BFLOAT16)) return "";
if (::flatbuffers::IsOutRange(e, TensorTypes_FLOAT32, TensorTypes_INT2)) return "";
const size_t index = static_cast<size_t>(e);
return EnumNamesTensorTypes()[index];
}
Expand Down
1 change: 1 addition & 0 deletions tensorflow/lite/python/schema_py_generated.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class TensorType(object):
UINT16 = 16
INT4 = 17
BFLOAT16 = 18
INT2 = 19


class QuantizationDetails(object):
Expand Down
13 changes: 8 additions & 5 deletions tensorflow/lite/schema/schema_generated.h
Original file line number Diff line number Diff line change
Expand Up @@ -703,11 +703,12 @@ enum TensorType : int8_t {
TensorType_UINT16 = 16,
TensorType_INT4 = 17,
TensorType_BFLOAT16 = 18,
TensorType_INT2 = 19,
TensorType_MIN = TensorType_FLOAT32,
TensorType_MAX = TensorType_BFLOAT16
TensorType_MAX = TensorType_INT2
};

inline const TensorType (&EnumValuesTensorType())[19] {
inline const TensorType (&EnumValuesTensorType())[20] {
static const TensorType values[] = {
TensorType_FLOAT32,
TensorType_FLOAT16,
Expand All @@ -727,13 +728,14 @@ inline const TensorType (&EnumValuesTensorType())[19] {
TensorType_UINT32,
TensorType_UINT16,
TensorType_INT4,
TensorType_BFLOAT16
TensorType_BFLOAT16,
TensorType_INT2
};
return values;
}

inline const char * const *EnumNamesTensorType() {
static const char * const names[20] = {
static const char * const names[21] = {
"FLOAT32",
"FLOAT16",
"INT32",
Expand All @@ -753,13 +755,14 @@ inline const char * const *EnumNamesTensorType() {
"UINT16",
"INT4",
"BFLOAT16",
"INT2",
nullptr
};
return names;
}

inline const char *EnumNameTensorType(TensorType e) {
if (::flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_BFLOAT16)) return "";
if (::flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_INT2)) return "";
const size_t index = static_cast<size_t>(e);
return EnumNamesTensorType()[index];
}
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/tools/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from tflite_micro.tensorflow.lite.python import schema_py_generated as schema_fb
else:
# This file is part of tflite_runtime package.
from tflite_runtime import schema_py_generated as schema_fb
from tflite_micro.tensorflow.lite_runtime import schema_py_generated as schema_fb

# A CSS description for making the visualizer
_CSS = """
Expand Down
Loading