Automated sync from github.com/tensorflow/tensorflow (#3214)

TFLM-bot · veblush · web-flow · commit 37fb41f3710b · 2025-10-16T14:33:56.000-07:00
* Sync from upstream TF.

* Manual change from cl/819821231

* Fix

* Copyright

---------

Co-authored-by: Esun Kim &lt;veblush@google.com&gt;
diff --git a/python/tflite_micro/numpy_utils.cc b/python/tflite_micro/numpy_utils.cc
@@ -58,6 +58,9 @@ int TfLiteTypeToPyArrayType(TfLiteType tf_lite_type) {
     case kTfLiteInt4:
       // TODO(b/246806634): NPY_INT4 currently doesn't exist
       return NPY_BYTE;
+    case kTfLiteInt2:
+      // TODO(b/246806634): NPY_INT2 currently doesn't exist
+      return NPY_BYTE;
     case kTfLiteInt8:
       return NPY_INT8;
     case kTfLiteInt64:
diff --git a/tensorflow/compiler/mlir/lite/core/c/tflite_types.h b/tensorflow/compiler/mlir/lite/core/c/tflite_types.h
@@ -64,6 +64,7 @@ typedef enum {
   kTfLiteUInt16 = 17,
   kTfLiteInt4 = 18,
   kTfLiteBFloat16 = 19,
+  kTfLiteInt2 = 20,
 } TfLiteType;
 // LINT.ThenChange(//tensorflow/lite/profiling/proto/model_runtime_info.proto:EdgeDataType)
 
diff --git a/tensorflow/compiler/mlir/lite/schema/schema.fbs b/tensorflow/compiler/mlir/lite/schema/schema.fbs
@@ -59,6 +59,7 @@ enum TensorType : byte {
   UINT16 = 16,
   INT4 = 17,
   BFLOAT16 = 18,
+  INT2 = 19,
 }
 
 // Custom quantization parameters for experimenting with new quantization
diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc
@@ -1088,6 +1088,9 @@ TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
     case TensorType_INT4:
       *type = kTfLiteInt4;
       return kTfLiteOk;
+    case TensorType_INT2:
+      *type = kTfLiteInt2;
+      return kTfLiteOk;
     default:
       *type = kTfLiteNoType;
       TF_LITE_REPORT_ERROR(error_reporter,
diff --git a/tensorflow/lite/core/c/common.cc b/tensorflow/lite/core/c/common.cc
@@ -509,6 +509,8 @@ const char* TfLiteTypeGetName(TfLiteType type) {
       return "VARIANT";
     case kTfLiteInt4:
       return "INT4";
+    case kTfLiteInt2:
+      return "INT2";
   }
   return "Unknown type";
 }
diff --git a/tensorflow/lite/kernels/internal/portable_tensor_utils.cc b/tensorflow/lite/kernels/internal/portable_tensor_utils.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
 
 #include <algorithm>
+#include <cassert>
 #include <cmath>
 #include <cstdint>
 
@@ -92,23 +93,90 @@ void UnpackDenseInt4IntoInt8(const int8_t* src_buffer, int num_elements,
   }
 }
 
-void PackInt8IntoDenseInt4(const int8_t* src_buffer, int num_elements,
-                           int8_t* dst_buffer) {
-  // num_elements means the number of elements regardless of packed or unpacked.
-  // For example, 3 elements means both
-  //   1) Packed: 3 int4's = 12 bit -> 16 bits (padded) = 2 bytes.
-  //      stored in src_buffer[0] and src_buffer[1] (i = 0..1)
-  //   2) Unpacked: 3 int8's = 3 bytes.
-  //      stored in dst_buffer[0], dst_buffer[1] and dst_buffer[2] (j = 0..2)
-  for (int i = 0; i < num_elements - 1; i += 2) {
-    dst_buffer[i / 2] = src_buffer[i] & 0x0F;
-    dst_buffer[i / 2] |= src_buffer[i + 1] << 4;
+void UnpackPackedIntToInt8(const int8_t* src_buffer, int num_elements,
+                           int bit_width, int8_t* dst_buffer) {
+  assert(bit_width == 2 || bit_width == 4);
+  if (bit_width == 4) {
+    // num_elements means the number of elements regardless of packed or
+    // unpacked. For example, 3 elements means both
+    //   1) Packed: 3 int4's = 12 bit -> 16 bits (padded) = 2 bytes.
+    //      stored in src_buffer[0] and src_buffer[1] (i = 0..1)
+    //   2) Unpacked: 3 int8's = 3 bytes.
+    //.     stored in dst_buffer[0], dst_buffer[1] and dst_buffer[2] (j = 0..2)
+    for (int i = 0; i < num_elements / 2; i++) {
+      int8_t byte = src_buffer[i];
+      // Shift left first so that sign is properly extended when shifted right
+      int8_t lower = static_cast<int8_t>(byte << 4) >> 4;
+      int8_t higher = byte >> 4;
+      dst_buffer[2 * i] = lower;
+      dst_buffer[2 * i + 1] = higher;
+    }
+
+    // If the buffer size is odd, extract the final lower nibble.
+    if (num_elements % 2 != 0) {
+      dst_buffer[num_elements - 1] =
+          static_cast<int8_t>(src_buffer[num_elements / 2] << 4) >> 4;
+    }
+  } else if (bit_width == 2) {
+    for (int i = 0; i < num_elements / 4; i++) {
+      int8_t byte = src_buffer[i];
+      // Shift left first so that sign is properly extended when shifted right
+      int8_t val1 = static_cast<int8_t>(byte << 6) >> 6;
+      int8_t val2 = static_cast<int8_t>((byte << 4) & 0xFF) >> 6;
+      int8_t val3 = static_cast<int8_t>((byte << 2) & 0xFF) >> 6;
+      int8_t val4 = byte >> 6;
+      dst_buffer[4 * i] = val1;
+      dst_buffer[4 * i + 1] = val2;
+      dst_buffer[4 * i + 2] = val3;
+      dst_buffer[4 * i + 3] = val4;
+    }
+
+    // Handle the remaining elements.
+    int remaining_elements = num_elements % 4;
+    if (remaining_elements > 0) {
+      int8_t byte = src_buffer[num_elements / 4];
+      for (int i = 0; i < remaining_elements; i++) {
+        dst_buffer[num_elements - remaining_elements + i] =
+            static_cast<int8_t>((byte << (6 - 2 * i)) & 0xFF) >> 6;
+      }
+    }
   }
-  auto packed_size = (num_elements + 1) / 2;
+}
 
-  // Copy the final nibble if the buffer is odd-lengthed
-  if (num_elements % 2 != 0) {
-    dst_buffer[packed_size - 1] = src_buffer[num_elements - 1] & 0x0F;
+void PackInt8IntoDenseInt(const int8_t* src_buffer, int num_elements,
+                          int bit_width, int8_t* dst_buffer) {
+  assert(bit_width == 2 || bit_width == 4);
+  if (bit_width == 4) {
+    // num_elements means the number of elements regardless of packed or
+    // unpacked. For example, 3 elements means both
+    //   1) Unpacked: 3 int8's = 3 bytes.
+    //      stored in src_buffer[0], src_buffer[1] and src_buffer[2] (j = 0..2)
+    //   2) Packed: 3 int4's = 12 bit -> 16 bits (padded) = 2 bytes.
+    //      stored in dst_buffer[0] and dst_buffer[1] (i = 0..1)
+    for (int i = 0; i < num_elements / 2; ++i) {
+      dst_buffer[i] = (src_buffer[2 * i] & 0x0F) | (src_buffer[2 * i + 1] << 4);
+    }
+    // If the buffer size is odd, pack the final nibble.
+    if (num_elements % 2 != 0) {
+      dst_buffer[num_elements / 2] = src_buffer[num_elements - 1] & 0x0F;
+    }
+  } else if (bit_width == 2) {
+    for (int i = 0; i < num_elements / 4; ++i) {
+      dst_buffer[i] = (src_buffer[4 * i] & 0x03) |
+                      ((src_buffer[4 * i + 1] & 0x03) << 2) |
+                      ((src_buffer[4 * i + 2] & 0x03) << 4) |
+                      ((src_buffer[4 * i + 3] & 0x03) << 6);
+    }
+    // Handle the remaining elements.
+    int remaining_elements = num_elements % 4;
+    if (remaining_elements > 0) {
+      int8_t packed_val = 0;
+      for (int i = 0; i < remaining_elements; ++i) {
+        packed_val |= (src_buffer[num_elements - remaining_elements + i] & 0x03)
+                      << (i * 2);
+      }
+      dst_buffer[num_elements / 4] = packed_val;
+    }
   }
 }
 
diff --git a/tensorflow/lite/kernels/internal/portable_tensor_utils.h b/tensorflow/lite/kernels/internal/portable_tensor_utils.h
@@ -618,20 +618,41 @@ void ApplySignbitToVector(const float* __restrict__ vector, int v_size,
 void UnpackDenseInt4IntoInt8(const int8_t* src_buffer, int num_elements,
                              int8_t* dst_buffer);
 
-// Pack `src_buffer` into a densely packed buffer of int4 values.
+// Unpack or inflate `src_buffer` by taking each byte and splitting it into
+// multiple elements into `dst_buffer`. Supports 2-bit and 4-bit packed integers
 // Parameters:
-//   src_buffer   : Buffer containing int4 values stored in int8 memory.
+//   src_buffer   : Densely packed buffer containing int2 or int4 values.
+//   num_elements : Number of unpacked elements to be read from the buffer.
+//                  This should be equal to the size of `dst_buffer`.
+//   bit_width    : The bit width of the packed elements (either 2 or 4).
+//   dst_buffer   : Buffer to unpack into. Should be allocated by the caller.
+//                  Size should be at least `num_elements`.
+// Notes:
+//   For 4-bit unpacking: e.g., `src_buffer = {0x12, 0x34};` (num_elements = 4)
+//   will return `dst_buffer = {0x02, 0x01, 0x04, 0x03}`.
+//   For 2-bit unpacking: e.g., `src_buffer = {0x12};` (num_elements = 4)
+//   will return `dst_buffer = {0x02, 0x00, 0x01, 0x00}` (sign extended).
+void UnpackPackedIntToInt8(const int8_t* src_buffer, int num_elements,
+                           int bit_width, int8_t* dst_buffer);
+
+// Pack `src_buffer` into a densely packed buffer of int2 or int4 values.
+// Parameters:
+//   src_buffer   : Buffer containing int2 or int4 values stored in int8
+//                  memory.
 //   num_elements : Number of elements stored in the buffer. Note that this can
 //                  be smaller than the size of `src_buffer` by 1 if it's odd,
 //                  in which case the last nibble in `src_buffer` is ignored.
 //                  This should be equal to the size of `dst_buffer`.
+//   bit_width    : The bit width of the packed elements (either 2 or 4).
 //   dst_buffer   : Buffer to pack into. Should be allocated by the caller.
 //                  Size should be at least `num_elements`.
 // Notes:
-//   For example, given `src_buffer = {0x02, 0x01, 0x04, 0x03}`, calling this
-//   function will return `dst_buffer = {0x12, 0x34}`.
-void PackInt8IntoDenseInt4(const int8_t* src_buffer, int num_elements,
-                           int8_t* dst_buffer);
+//   For 4-bit packing: e.g., given `src_buffer = {0x02, 0x01, 0x04, 0x03}`,
+//   calling this function will return `dst_buffer = {0x12, 0x34}`.
+//   For 2-bit packing: e.g., given `src_buffer = {0x00, 0x01, 0x00, 0x02}`,
+//   calling this function will return `dst_buffer = {0x84}`.
+void PackInt8IntoDenseInt(const int8_t* src_buffer, int num_elements,
+                          int bit_width, int8_t* dst_buffer);
 }  // namespace tensor_utils
 
 }  // namespace tflite
diff --git a/tensorflow/lite/micro/tools/layer_by_layer.cc b/tensorflow/lite/micro/tools/layer_by_layer.cc
@@ -120,6 +120,9 @@ TfLiteStatus ConvertTensorType(TfLiteType type, TensorTypes& tensor_type) {
     case kTfLiteInt4:
       tensor_type = TensorTypes_INT4;
       return kTfLiteOk;
+    case kTfLiteInt2:
+      tensor_type = TensorTypes_INT2;
+      return kTfLiteOk;
     case kTfLiteNoType:
       MicroPrintf("Unsupported data type %d in tensor\n", tensor_type);
       return kTfLiteError;
diff --git a/tensorflow/lite/micro/tools/layer_by_layer_schema.fbs b/tensorflow/lite/micro/tools/layer_by_layer_schema.fbs
@@ -35,6 +35,7 @@ enum TensorTypes : byte {
   UINT16 = 16,
   INT4 = 17,
   BFLOAT16 = 18,
+  INT2 = 19,
 }
 
 table TensorData {
diff --git a/tensorflow/lite/micro/tools/layer_by_layer_schema_generated.h b/tensorflow/lite/micro/tools/layer_by_layer_schema_generated.h
@@ -59,11 +59,12 @@ enum TensorTypes : int8_t {
   TensorTypes_UINT16 = 16,
   TensorTypes_INT4 = 17,
   TensorTypes_BFLOAT16 = 18,
+  TensorTypes_INT2 = 19,
   TensorTypes_MIN = TensorTypes_FLOAT32,
-  TensorTypes_MAX = TensorTypes_BFLOAT16
+  TensorTypes_MAX = TensorTypes_INT2
 };
 
-inline const TensorTypes (&EnumValuesTensorTypes())[19] {
+inline const TensorTypes (&EnumValuesTensorTypes())[20] {
   static const TensorTypes values[] = {
     TensorTypes_FLOAT32,
     TensorTypes_FLOAT16,
@@ -83,13 +84,14 @@ inline const TensorTypes (&EnumValuesTensorTypes())[19] {
     TensorTypes_UINT32,
     TensorTypes_UINT16,
     TensorTypes_INT4,
-    TensorTypes_BFLOAT16
+    TensorTypes_BFLOAT16,
+    TensorTypes_INT2
   };
   return values;
 }
 
 inline const char * const *EnumNamesTensorTypes() {
-  static const char * const names[20] = {
+  static const char * const names[21] = {
     "FLOAT32",
     "FLOAT16",
     "INT32",
@@ -109,13 +111,14 @@ inline const char * const *EnumNamesTensorTypes() {
     "UINT16",
     "INT4",
     "BFLOAT16",
+    "INT2",
     nullptr
   };
   return names;
 }
 
 inline const char *EnumNameTensorTypes(TensorTypes e) {
-  if (::flatbuffers::IsOutRange(e, TensorTypes_FLOAT32, TensorTypes_BFLOAT16)) return "";
+  if (::flatbuffers::IsOutRange(e, TensorTypes_FLOAT32, TensorTypes_INT2)) return "";
   const size_t index = static_cast<size_t>(e);
   return EnumNamesTensorTypes()[index];
 }
diff --git a/tensorflow/lite/python/schema_py_generated.py b/tensorflow/lite/python/schema_py_generated.py
@@ -27,6 +27,7 @@ class TensorType(object):
     UINT16 = 16
     INT4 = 17
     BFLOAT16 = 18
+    INT2 = 19
 
 
 class QuantizationDetails(object):
diff --git a/tensorflow/lite/schema/schema_generated.h b/tensorflow/lite/schema/schema_generated.h
@@ -703,11 +703,12 @@ enum TensorType : int8_t {
   TensorType_UINT16 = 16,
   TensorType_INT4 = 17,
   TensorType_BFLOAT16 = 18,
+  TensorType_INT2 = 19,
   TensorType_MIN = TensorType_FLOAT32,
-  TensorType_MAX = TensorType_BFLOAT16
+  TensorType_MAX = TensorType_INT2
 };
 
-inline const TensorType (&EnumValuesTensorType())[19] {
+inline const TensorType (&EnumValuesTensorType())[20] {
   static const TensorType values[] = {
     TensorType_FLOAT32,
     TensorType_FLOAT16,
@@ -727,13 +728,14 @@ inline const TensorType (&EnumValuesTensorType())[19] {
     TensorType_UINT32,
     TensorType_UINT16,
     TensorType_INT4,
-    TensorType_BFLOAT16
+    TensorType_BFLOAT16,
+    TensorType_INT2
   };
   return values;
 }
 
 inline const char * const *EnumNamesTensorType() {
-  static const char * const names[20] = {
+  static const char * const names[21] = {
     "FLOAT32",
     "FLOAT16",
     "INT32",
@@ -753,13 +755,14 @@ inline const char * const *EnumNamesTensorType() {
     "UINT16",
     "INT4",
     "BFLOAT16",
+    "INT2",
     nullptr
   };
   return names;
 }
 
 inline const char *EnumNameTensorType(TensorType e) {
-  if (::flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_BFLOAT16)) return "";
+  if (::flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_INT2)) return "";
   const size_t index = static_cast<size_t>(e);
   return EnumNamesTensorType()[index];
 }
diff --git a/tensorflow/lite/tools/visualize.py b/tensorflow/lite/tools/visualize.py
@@ -33,7 +33,7 @@
   from tflite_micro.tensorflow.lite.python import schema_py_generated as schema_fb
 else:
   # This file is part of tflite_runtime package.
-  from tflite_runtime import schema_py_generated as schema_fb
+  from tflite_micro.tensorflow.lite_runtime import schema_py_generated as schema_fb
 
 # A CSS description for making the visualizer
 _CSS = """

Original file line number	Diff line number	Diff line change
`@@ -59,6 +59,7 @@ enum TensorType : byte {`
`59`	`59`	`UINT16 = 16,`
`60`	`60`	`INT4 = 17,`
`61`	`61`	`BFLOAT16 = 18,`
	`62`	`+ INT2 = 19,`
`62`	`63`	`}`
`63`	`64`
`64`	`65`	`// Custom quantization parameters for experimenting with new quantization`
Original file line number	Diff line number	Diff line change
`@@ -509,6 +509,8 @@ const char* TfLiteTypeGetName(TfLiteType type) {`
`509`	`509`	`return "VARIANT";`
`510`	`510`	`case kTfLiteInt4:`
`511`	`511`	`return "INT4";`
	`512`	`+ case kTfLiteInt2:`
	`513`	`+ return "INT2";`
`512`	`514`	`}`
`513`	`515`	`return "Unknown type";`
`514`	`516`	`}`
Original file line number	Diff line number	Diff line change
`@@ -35,6 +35,7 @@ enum TensorTypes : byte {`
`35`	`35`	`UINT16 = 16,`
`36`	`36`	`INT4 = 17,`
`37`	`37`	`BFLOAT16 = 18,`
	`38`	`+ INT2 = 19,`
`38`	`39`	`}`
`39`	`40`
`40`	`41`	`table TensorData {`