Support for DECODE operator (#3213)

ddavis-2015 · web-flow · commit 6c1c1a89fc87 · 2025-11-19T14:14:53.000-08:00
* Support for DECODE operator @tensorflow/micro Add support for alternate decompression memory to DECODE operator. Additional unit tests. Update generic benchmark application and Makefile. bug=fixes #3212 * fixes. * Changes as per review.
diff --git a/tensorflow/lite/micro/kernels/decode.cc b/tensorflow/lite/micro/kernels/decode.cc
@@ -18,12 +18,37 @@ limitations under the License.
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/micro/kernels/decode_state.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/micro_arena_constants.h"
 #include "tensorflow/lite/micro/micro_context.h"
 #include "tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
 
+TfLiteStatus SetOutputTensorData(TfLiteContext* context, const TfLiteNode* node,
+                                 size_t tensor_output_index,
+                                 TfLiteTensor* output) {
+  if (output->data.data != nullptr) {
+    // If memory has already been assigned to the tensor, leave it be
+    return kTfLiteOk;
+  }
+
+  // If alternate decompression memory is available, set the tensor data
+  // pointer now to preclude allocation by the memory planner.
+  void* alternate_decompress_mem =
+      GetMicroContext(context)->AllocateDecompressionMemory(
+          output->bytes, MicroArenaBufferAlignment());
+  if (alternate_decompress_mem != nullptr) {
+    TfLiteEvalTensor* output_eval =
+        tflite::micro::GetEvalOutput(context, node, tensor_output_index);
+    TF_LITE_ENSURE(context, output_eval != nullptr);
+    output_eval->data.data = alternate_decompress_mem;
+    output->data.data = alternate_decompress_mem;
+  }
+
+  return kTfLiteOk;
+}
+
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const size_t num_inputs = NumInputs(node);
   const size_t num_outputs = NumOutputs(node);
@@ -43,6 +68,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TfLiteTensor* output = nullptr;
   TfLiteStatus status = kTfLiteOk;
 
+  micro_context->ResetDecompressionMemoryAllocations();
+
   for (size_t i = 0; i < num_inputs; i += 2) {
     input = micro_context->AllocateTempInputTensor(node, i);
     if (input == nullptr) {
@@ -95,6 +122,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
         break;
     }
 
+    status = SetOutputTensorData(context, node, i / 2, output);
+    if (status != kTfLiteOk) {
+      break;
+    }
+
     if (dsp != nullptr) {
       status = dsp->Setup(*input, *ancillary, *output);
       if (status != kTfLiteOk) {
diff --git a/tensorflow/lite/micro/kernels/decode_state_huffman_test.cc b/tensorflow/lite/micro/kernels/decode_state_huffman_test.cc
@@ -271,7 +271,7 @@ TF_LITE_MICRO_TEST(DecodeHuffmanTable16BitsInt16Fail) {
   tflite::testing::TestDecode<encodes.size() + ancillaries.size(),
                               outputs.size()>(
       encodes, ancillaries, outputs, expected, tflite::Register_DECODE(),
-      kTfLiteError);
+      nullptr, kTfLiteError);
 }
 
 TF_LITE_MICRO_TEST(DecodeHuffmanTable32BitsInt8) {
diff --git a/tensorflow/lite/micro/kernels/decode_state_prune_test.cc b/tensorflow/lite/micro/kernels/decode_state_prune_test.cc
@@ -575,7 +575,7 @@ TF_LITE_MICRO_TEST(DecodePruneQuantizedInvalidZeroPointInt16) {
   tflite::testing::TestDecode<kEncodes.size() + kAncillaries.size(),
                               kOutputs.size()>(
       kEncodes, kAncillaries, kOutputs, kExpected, tflite::Register_DECODE(),
-      kTfLiteError);
+      nullptr, kTfLiteError);
 }
 
 TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/lite/micro/kernels/decode_test.cc b/tensorflow/lite/micro/kernels/decode_test.cc
@@ -196,4 +196,54 @@ TF_LITE_MICRO_TEST(DecodeTwoTensorsLUT) {
       encodes, ancillaries, outputs, expected, tflite::Register_DECODE());
 }
 
+TF_LITE_MICRO_TEST(DecodeWithAltDecompressionMemory) {
+  // Align the tensor data the same as a Buffer in the TfLite schema
+  alignas(16) int8_t output_data[std::size(kExpectLUT0)] = {};
+  alignas(16) const AncillaryData<int8_t, std::size(kAncillaryDataLUT0)>
+      kAncillaryData = {{kDcmLUT0}, {kAncillaryDataLUT0}};
+
+  constexpr int kAncillaryShapeLUT[] = {1, sizeof(kAncillaryData)};
+
+  const TfLiteIntArray* const encoded_dims =
+      tflite::testing::IntArrayFromInts(kEncodedShapeLUT);
+  static const TensorInDatum tid_encode = {
+      kEncodedLUT,
+      *encoded_dims,
+  };
+  static constexpr std::initializer_list<const TensorInDatum*> encodes = {
+      &tid_encode,
+  };
+
+  const TfLiteIntArray* const ancillary_dims =
+      tflite::testing::IntArrayFromInts(kAncillaryShapeLUT);
+  static const TensorInDatum tid_ancillary = {
+      &kAncillaryData,
+      *ancillary_dims,
+  };
+  static constexpr std::initializer_list<const TensorInDatum*> ancillaries = {
+      &tid_ancillary};
+
+  const TfLiteIntArray* const output_dims =
+      tflite::testing::IntArrayFromInts(kOutputShapeLUT);
+  constexpr int kOutputZeroPointsData[] = {0};
+  const TfLiteIntArray* const kOutputZeroPoints =
+      tflite::testing::IntArrayFromInts(kOutputZeroPointsData);
+  const TfLiteFloatArray kOutputScales = {kOutputZeroPoints->size};
+  static const TensorOutDatum tod = {
+      nullptr,  // using alternate decompression memory
+      *output_dims, kTfLiteInt8, kOutputScales, *kOutputZeroPoints, 0, {},
+  };
+  static constexpr std::initializer_list<const TensorOutDatum*> outputs = {
+      &tod};
+
+  const std::initializer_list<const void*> expected = {kExpectLUT0};
+
+  std::initializer_list<tflite::MicroContext::AlternateMemoryRegion> amr = {
+      {output_data, sizeof(output_data)}};
+
+  tflite::testing::TestDecode<encodes.size() + ancillaries.size(),
+                              outputs.size()>(
+      encodes, ancillaries, outputs, expected, tflite::Register_DECODE(), &amr);
+}
+
 TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/lite/micro/kernels/decode_test_helpers.h b/tensorflow/lite/micro/kernels/decode_test_helpers.h
@@ -83,7 +83,9 @@ TfLiteStatus CheckOutput(const TfLiteTensor& output,
 template <size_t kNumInputs, size_t kNumOutputs>
 TfLiteStatus ExecuteDecodeTest(
     TfLiteTensor* tensors, const TFLMRegistration& registration,
-    const std::initializer_list<const void*>& expected) {
+    const std::initializer_list<const void*>& expected,
+    const std::initializer_list<MicroContext::AlternateMemoryRegion>* amr =
+        nullptr) {
   int kInputArrayData[kNumInputs + 1] = {kNumInputs};
   for (size_t i = 0; i < kNumInputs; i++) {
     kInputArrayData[i + 1] = i;
@@ -99,6 +101,10 @@ TfLiteStatus ExecuteDecodeTest(
   micro::KernelRunner runner(registration, tensors, kNumInputs + kNumOutputs,
                              inputs_array, outputs_array, nullptr);
 
+  if (amr != nullptr) {
+    runner.GetFakeMicroContext()->SetDecompressionMemory(*amr);
+  }
+
   if (runner.InitAndPrepare() != kTfLiteOk || runner.Invoke() != kTfLiteOk) {
     return kTfLiteError;
   }
@@ -135,12 +141,15 @@ TfLiteStatus ExecuteDecodeTest(
 }
 
 template <size_t kNumInputs, size_t kNumOutputs>
-void TestDecode(const std::initializer_list<const TensorInDatum*>& encodes,
-                const std::initializer_list<const TensorInDatum*>& ancillaries,
-                const std::initializer_list<const TensorOutDatum*>& outputs,
-                const std::initializer_list<const void*>& expected,
-                const TFLMRegistration& registration,
-                const TfLiteStatus expected_status = kTfLiteOk) {
+void TestDecode(
+    const std::initializer_list<const TensorInDatum*>& encodes,
+    const std::initializer_list<const TensorInDatum*>& ancillaries,
+    const std::initializer_list<const TensorOutDatum*>& outputs,
+    const std::initializer_list<const void*>& expected,
+    const TFLMRegistration& registration,
+    const std::initializer_list<MicroContext::AlternateMemoryRegion>* amr =
+        nullptr,
+    const TfLiteStatus expected_status = kTfLiteOk) {
   TfLiteTensor tensors[kNumInputs + kNumOutputs] = {};
 
   for (size_t i = 0; i < kNumInputs; i += 2) {
@@ -173,7 +182,7 @@ void TestDecode(const std::initializer_list<const TensorInDatum*>& encodes,
   }
 
   TfLiteStatus s = ExecuteDecodeTest<kNumInputs, kNumOutputs>(
-      tensors, registration, expected);
+      tensors, registration, expected, amr);
   TF_LITE_MICRO_EXPECT_EQ(s, expected_status);
 }
 
diff --git a/tensorflow/lite/micro/kernels/kernel_runner.h b/tensorflow/lite/micro/kernels/kernel_runner.h
@@ -67,6 +67,9 @@ class KernelRunner {
   // to stub out MicroGraph methods and track invocations on each subgraph.
   MockMicroGraph* GetMockGraph() { return &mock_micro_graph_; }
 
+  // Returns a pointer to the internal FakeMicroContext.
+  FakeMicroContext* GetFakeMicroContext() { return &fake_micro_context_; }
+
   // Returns true if all temp buffer in tests are deallocated.
   // TODO(b/209453859): move this function to private after deallocation checks
   // are enabled for all kernel tests.
diff --git a/tensorflow/lite/micro/micro_context.cc b/tensorflow/lite/micro/micro_context.cc
@@ -15,11 +15,13 @@ limitations under the License.
 
 #include "tensorflow/lite/micro/micro_context.h"
 
+#include <algorithm>
 #include <cstdarg>
 #include <cstddef>
 
 #include "tensorflow/lite/kernels/internal/compatibility.h"
 #include "tensorflow/lite/micro/kernels/decompress.h"
+#include "tensorflow/lite/micro/memory_helpers.h"
 #include "tensorflow/lite/micro/micro_common.h"
 #include "tensorflow/lite/micro/micro_log.h"
 #include "tensorflow/lite/micro/micro_utils.h"
@@ -125,18 +127,50 @@ void* MicroContext::DecompressTensorToBuffer(
   return nullptr;
 }
 
+#endif  // USE_TFLM_COMPRESSION
+
 TfLiteStatus MicroContext::SetDecompressionMemory(
     const std::initializer_list<AlternateMemoryRegion>& regions) {
-  return kTfLiteError;
+  if (decompress_regions_ != nullptr) {
+    return kTfLiteError;
+  }
+
+  decompress_regions_ = &regions;
+  decompress_regions_allocations_ = static_cast<size_t*>(
+      AllocatePersistentBuffer(sizeof(size_t) * regions.size()));
+  if (decompress_regions_allocations_ == nullptr) {
+    return kTfLiteError;
+  }
+  ResetDecompressionMemoryAllocations();
+
+  return kTfLiteOk;
 }
 
 void* MicroContext::AllocateDecompressionMemory(size_t bytes,
                                                 size_t alignment) {
+  if (decompress_regions_ != nullptr) {
+    for (size_t i = 0; i < decompress_regions_->size(); i++) {
+      const AlternateMemoryRegion* region = &decompress_regions_->begin()[i];
+      uint8_t* start = static_cast<uint8_t*>(region->address) +
+                       decompress_regions_allocations_[i];
+      uint8_t* aligned_start = AlignPointerUp(start, alignment);
+      size_t total = bytes + (aligned_start - start);
+      if (total + decompress_regions_allocations_[i] <= region->bytes) {
+        decompress_regions_allocations_[i] += total;
+        return aligned_start;
+      }
+    }
+  }
+
   return nullptr;
 }
 
-void MicroContext::ResetDecompressionMemoryAllocations() {}
-
-#endif  // USE_TFLM_COMPRESSION
+void MicroContext::ResetDecompressionMemoryAllocations() {
+  if (decompress_regions_ == nullptr) {
+    return;
+  }
+  TFLITE_DCHECK(decompress_regions_allocations_ != nullptr);
+  std::fill_n(decompress_regions_allocations_, decompress_regions_->size(), 0);
+}
 
 }  // namespace tflite
diff --git a/tensorflow/lite/micro/micro_context.h b/tensorflow/lite/micro/micro_context.h
@@ -16,14 +16,15 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_MICRO_MICRO_CONTEXT_H_
 #define TENSORFLOW_LITE_MICRO_MICRO_CONTEXT_H_
 
+#include <cstddef>
+#include <initializer_list>
+
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/micro/micro_graph.h"
 #include "tensorflow/lite/micro/micro_profiler_interface.h"
 
 #ifdef USE_TFLM_COMPRESSION
 
-#include <initializer_list>
-
 #include "tensorflow/lite/micro/compression.h"
 
 #endif  // USE_TFLM_COMPRESSION
@@ -126,6 +127,8 @@ class MicroContext {
       const TfLiteEvalTensor& tensor,
       const CompressionTensorData& compression_data, void* buffer);
 
+#endif  // USE_TFLM_COMPRESSION
+
   // Used for configuring alternate decompression memory
   struct AlternateMemoryRegion {
     void* address;
@@ -140,14 +143,13 @@ class MicroContext {
   // Return a pointer to memory that can be used for decompression.
   // The pointer will be aligned to the <alignment> value.
   // Return nullptr if the requested size is not available.
-  // Can be called during kPrepare and kInvoke states.
+  // Can be called during kPrepare state.
   virtual void* AllocateDecompressionMemory(size_t bytes, size_t alignment);
 
-  // reset all allocation tracking
+  // Reset all allocation tracking.
+  // Can be called during kPrepare state.
   virtual void ResetDecompressionMemoryAllocations();
 
-#endif  // USE_TFLM_COMPRESSION
-
   // Set the alternate MicroProfilerInterface.
   // This can be used to profile subsystems simultaneously with the profiling
   // of kernels during the Eval phase.  See (b/379584353).
@@ -168,6 +170,11 @@ class MicroContext {
   }
 
  private:
+  const std::initializer_list<AlternateMemoryRegion>* decompress_regions_ =
+      nullptr;
+  // array of size_t elements with length equal to decompress_regions_.size()
+  size_t* decompress_regions_allocations_ = nullptr;
+
   TF_LITE_REMOVE_VIRTUAL_DELETE
 };
 
diff --git a/tensorflow/lite/micro/micro_interpreter.cc b/tensorflow/lite/micro/micro_interpreter.cc
@@ -339,14 +339,10 @@ TfLiteStatus MicroInterpreter::SetAlternateProfiler(
   return micro_context_.SetAlternateProfiler(alt_profiler);
 }
 
-#ifdef USE_TFLM_COMPRESSION
-
 TfLiteStatus MicroInterpreter::SetDecompressionMemory(
     const std::initializer_list<MicroInterpreterContext::AlternateMemoryRegion>&
         regions) {
   return micro_context_.SetDecompressionMemory(regions);
 }
 
-#endif  // USE_TFLM_COMPRESSION
-
 }  // namespace tflite
diff --git a/tensorflow/lite/micro/micro_interpreter.h b/tensorflow/lite/micro/micro_interpreter.h
@@ -160,17 +160,13 @@ class MicroInterpreter {
   // decompression subsystem.
   TfLiteStatus SetAlternateProfiler(MicroProfilerInterface* alt_profiler);
 
-#ifdef USE_TFLM_COMPRESSION
-
   // Set the alternate decompression memory regions.
   // Can only be called during the MicroInterpreter kInit state (i.e. must
   // be called before MicroInterpreter::AllocateTensors).
   TfLiteStatus SetDecompressionMemory(
       const std::initializer_list<MicroContext::AlternateMemoryRegion>&
           regions);
 
-#endif  // USE_TFLM_COMPRESSION
-
  protected:
   const MicroAllocator& allocator() const { return allocator_; }
   const TfLiteContext& context() const { return context_; }
diff --git a/tensorflow/lite/micro/micro_interpreter_context.cc b/tensorflow/lite/micro/micro_interpreter_context.cc
diff --git a/tensorflow/lite/micro/micro_interpreter_context.h b/tensorflow/lite/micro/micro_interpreter_context.h
diff --git a/tensorflow/lite/micro/micro_interpreter_context_test.cc b/tensorflow/lite/micro/micro_interpreter_context_test.cc
diff --git a/tensorflow/lite/micro/tools/benchmarking/Makefile.inc b/tensorflow/lite/micro/tools/benchmarking/Makefile.inc
diff --git a/tensorflow/lite/micro/tools/benchmarking/generic_model_benchmark.cc b/tensorflow/lite/micro/tools/benchmarking/generic_model_benchmark.cc

Original file line number	Diff line number	Diff line change
`@@ -271,7 +271,7 @@ TF_LITE_MICRO_TEST(DecodeHuffmanTable16BitsInt16Fail) {`
`271`	`271`	`tflite::testing::TestDecode<encodes.size() + ancillaries.size(),`
`272`	`272`	`outputs.size()>(`
`273`	`273`	`encodes, ancillaries, outputs, expected, tflite::Register_DECODE(),`
`274`		`- kTfLiteError);`
	`274`	`+ nullptr, kTfLiteError);`
`275`	`275`	`}`
`276`	`276`
`277`	`277`	`TF_LITE_MICRO_TEST(DecodeHuffmanTable32BitsInt8) {`
Original file line number	Diff line number	Diff line change
`@@ -575,7 +575,7 @@ TF_LITE_MICRO_TEST(DecodePruneQuantizedInvalidZeroPointInt16) {`
`575`	`575`	`tflite::testing::TestDecode<kEncodes.size() + kAncillaries.size(),`
`576`	`576`	`kOutputs.size()>(`
`577`	`577`	`kEncodes, kAncillaries, kOutputs, kExpected, tflite::Register_DECODE(),`
`578`		`- kTfLiteError);`
	`578`	`+ nullptr, kTfLiteError);`
`579`	`579`	`}`
`580`	`580`
`581`	`581`	`TF_LITE_MICRO_TESTS_END`
Original file line number	Diff line number	Diff line change
`@@ -339,14 +339,10 @@ TfLiteStatus MicroInterpreter::SetAlternateProfiler(`
`339`	`339`	`return micro_context_.SetAlternateProfiler(alt_profiler);`
`340`	`340`	`}`
`341`	`341`
`342`		`-#ifdef USE_TFLM_COMPRESSION`
`343`		`-`
`344`	`342`	`TfLiteStatus MicroInterpreter::SetDecompressionMemory(`
`345`	`343`	`const std::initializer_list<MicroInterpreterContext::AlternateMemoryRegion>&`
`346`	`344`	`regions) {`
`347`	`345`	`return micro_context_.SetDecompressionMemory(regions);`
`348`	`346`	`}`
`349`	`347`
`350`		`-#endif // USE_TFLM_COMPRESSION`
`351`		`-`
`352`	`348`	`} // namespace tflite`