arm
diff --git a/‎chapters/appendix_a.adoc‎
Lines changed: 20 additions & 0 deletions b/‎chapters/appendix_a.adoc‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎chapters/introduction.adoc‎
Lines changed: 48 additions & 6 deletions b/‎chapters/introduction.adoc‎
Lines changed: 48 additions & 6 deletions
diff --git a/‎chapters/tensor_ops.adoc‎
Lines changed: 19 additions & 0 deletions b/‎chapters/tensor_ops.adoc‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎chapters/type_conversion.adoc‎
Lines changed: 43 additions & 0 deletions b/‎chapters/type_conversion.adoc‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎pseudocode/library/generic_helpers.tosac‎
Lines changed: 7 additions & 3 deletions b/‎pseudocode/library/generic_helpers.tosac‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎pseudocode/library/numeric_accuracy_helpers.tosac‎
Lines changed: 85 additions & 6 deletions b/‎pseudocode/library/numeric_accuracy_helpers.tosac‎
Lines changed: 85 additions & 6 deletions
@@ -223,6 +223,26 @@ for (0 <= n < N, 0 <= c < C, 0 <= x < W) {
 }
 ----
 
+==== MATMUL_T_BLOCK_SCALED
+
+The following generates input test data for test set S.
+For compliant implementation, the test must pass whenever the attributes satisfy:
+`N*H*W >= MIN_DOT_PRODUCTS`
+
+[source,c++]
+----
+KS = C;
+for (0 <= n < N, 0 <= y < H, 0 <= c < C) {
+  A[n, y, c] = tosa_pro_fp_data(S, KS, 0, c, (n*H+y)*C+c);
+}
+  A_scale, A_values = CAST_TO_BLOCK_SCALED(A);
+for (0 <= n < N, 0 <= c < C, 0 <= x < W) {
+  B[n, x, c] = tosa_pro_fp_data(S, KS, 1, c, (n*W+x)*C+c);
+}
+  B_scale, B_values = CAST_TO_BLOCK_SCALED(B);
+
+----
+
 ==== TRANSPOSE_CONV2D
 
 The following generates input test data for test set S.
 
@@ -271,9 +271,33 @@ Number formats not required for any operators in a profile do not need to be imp
 | (1<<47)-1
 |Signed 48-bit two's-complement value.
 
+|fp4e2m1_t
+| -6.0
+| +6.0
+| 4-bit floating-point defined by <<OCP-MX,OCP-MX>> with two bits of exponent and one bit of mantissa. +
+Normal values must be supported. +
+Subnormal values must be supported. +
+Signed zero must be supported.
+
+|fp6e3m2_t
+| -28.0
+| +28.0
+| 6-bit floating-point defined by <<OCP-MX,OCP-MX>> with three bits of exponent and two bits of mantissa. +
+Normal values must be supported. +
+Subnormal values must be supported. +
+Signed zero must be supported.
+
+|fp6e2m3_t
+| -7.5
+| +7.5
+| 6-bit floating-point defined by <<OCP-MX,OCP-MX>> with two bits of exponent and three bits of mantissa. +
+Normal values must be supported. +
+Subnormal values must be supported. +
+Signed zero must be supported.
+
 |fp8e4m3_t
 | -448
-| 448
+| +448
 | 8-bit floating-point defined by <<OCP-OFP8,OCP-OFP8>> with four bits of exponent and three bits of mantissa. +
 Normal values must be supported. +
 Subnormal values must be supported. +
@@ -292,6 +316,12 @@ Positive and negative infinity must be supported. +
 NaN encodings must be supported. +
 Signed zero must be supported.
 
+|fp8ue8m0_t
+| exp2(-127)
+| exp2(+127)
+| 8-bit floating-point value defined by <<OCP-MX,OCP-MX>> with no sign bit, eight bits of exponent, and no mantissa bits. +
+The NaN encoding must be supported. +
+
 |fp16_t
 | -infinity
 | +infinity
@@ -331,6 +361,11 @@ Subnormal values must either be supported or flushed to sign-preserved zero. +
 Positive and negative infinity must be supported. +
 At least one NaN encoding must be supported. +
 Signed zero must be supported.
+
+|mxint8_t
+| -2
+| +1 + 63/64
+| 8-bit integer format with an implicit 1/64 scale defined by <<OCP-MX,OCP-MX>>. +
 |===
 
 Note: In this specification, minimum<type> and maximum<type> will denote the minimum and maximum values of the data as stored in memory (ignoring the zero point).
@@ -450,15 +485,21 @@ This section assumes an operation acting on tensors named 'input', 'weight' and
 Each output tensor element can be expressed as a dot product of elements between the 'input' and 'weight' tensors with optional bias addition.
 The dot product has length KS, the kernel size.
 If the operation does not specify a bias then 'bias' is taken to be zero in this section.
+If the dot product is of a block-scaled tensor, then 'input_scale' and 'weight_scale' are inputs to the dot product.
+
 Note: KS is defined for each relevant operator in the appendix section <<Floating-Point Operator Test Data>>.
 
-In other words, each output element `out` can be expressed as a dot product between input elements `in[k]`, weight elements `w[k]`, bias `b`:
+Each output element `out` can be expressed as a dot product between input elements `in[k]`, weight elements `w[k]`, bias `b`:
 
 `out = in[0] * w[0] + in[1] * w[1] + ... + in[KS-1] * w[KS-1] + b`
 
 The positions of `in[k]`, `w[k]`, `b` in the input, weight and bias tensors depends on the operation being performed.
 This may be, for example, a convolution.
 
+In a block-scaled dot product, each input and weight element `in[k]` and `w[k]` are scaled based on the corresponding scale values:
+`in[k] = in_data[k] * in_scale[k/block_size]`
+`w[k]  = w_data[k] * w_scale[k/block_size]`
+
 This section defines the accuracy required for these operations.
 In this section:
 
@@ -480,9 +521,9 @@ ABS_BOUND is the maximum allowed absolute error when NaN or overflow is not pres
 |===
 |Condition|ABS_BOUND|Notes
 
-|`(is_same<in_t,fp8e5m2_t>() \|\| is_same<in_t,fp8e4m3_t>()) && is_same<acc_t,fp32_t>`
+|`(is_same<in_t,fp8e5m2_t>() \|\| is_same<in_t,fp8e4m3_t>() \|\| is_same<in_t,fp6e3m2_t>() \|\| is_same<in_t,fp6e2m3_t>() \|\| is_same<in_t,fp4e2m1_t>() \|\| is_same<in_t,mxint8_t>()) && is_same<acc_t,fp32_t>`
 |`2 * max(ksb, min(ksb,64) * (1 << 10))`
-| The FP8 dot product with FP32 accumulator is allowed a relaxed absolute error bound. +
+| The FP8 dot product with FP32 accumulator as well as block scaled dot products are allowed a relaxed absolute error bound. +
 The 2 factor allows for different rounding modes. +
 The second term in the maximum allows accumulating intermediates at lower precision. +
 If the operator does not use an accumulator type acc_t, the final comparison should be is_same<out_t,fp32_t>.
@@ -499,9 +540,9 @@ The squared error for each result is summed, and the result must be less than th
 |===
 |Condition|VARIANCE_ERROR_BOUND|Notes
 
-| `(is_same<in_t,fp8e5m2_t>() \|\| is_same<in_t,fp8e4m3_t>()) && is_same<acc_t,fp32_t>`
+|`(is_same<in_t,fp8e5m2_t>() \|\| is_same<in_t,fp8e4m3_t>() \|\| is_same<in_t,fp6e3m2_t>() \|\| is_same<in_t,fp6e2m3_t>() \|\| is_same<in_t,fp4e2m1_t>() \|\| is_same<in_t,mxint8_t>()) && is_same<acc_t,fp32_t>`
 | `4 * 0.4 * max(ksb, min(ksb,64) * (1 << 20))`
-| The FP8 dot product with FP32 accumulator is allowed a relaxed variance error bound. +
+| The FP8 dot product with FP32 accumulator as well as block scaled dot products are allowed a relaxed variance error bound. +
 The factors are similar to the absolute bound with precision factors squared for the variance bound. +
 The 0.4 factor is derived from the uniform [-1,1] distribution variance of 1/3 by rounding up. +
 The 4 factor is the square of the 2 factor in the absolute bound  to allow for different rounding modes. +
@@ -678,3 +719,4 @@ The following publications are referred to in this specification, or provide mor
 
 . [[IEEE-754]]IEEE Std 754-2008, _IEEE Standard for Floating-point Arithmetic_, August 2008.
 . [[OCP-OFP8]]Open Compute Project OCP 8-bit Floating Point Specification (OFP8) Revision 1.0
+. [[OCP-MX]] Open Compute Project OCP Microscaling Formats (MX) Specification Version 1.0
@@ -187,6 +187,25 @@ include::{generated}/operators/MATMUL.adoc[]
 include::{pseudocode}/operators/MATMUL.tosac[lines=10..-1]
 ----
 
+==== MATMUL_T_BLOCK_SCALED
+
+Performs two dimensional matrix multiplications using block scaled tensors.
+The block dimension is always the the last dimension of the tensor, so the result is effectively a matrix multiply of A by the transposed B matrix.
+If the D dimension of input B is of size 1, the B matrix will be broadcast.
+
+*Precision Requirements*
+
+* Each output can be expressed as a dot product of two input vectors multiplied by the scale factors for the A and B tensors.
+* The dot product must meet the <<Dot product accuracy requirements>>.
+* When generating the data sets for the Dot product accuracy requirements, the data should be generated as fp32 and converted to a scale/value tensor pair using the scale calculation defined in CAST_TO_BLOCK_SCALED.
+
+include::{generated}/operators/MATMUL_T_BLOCK_SCALED.adoc[]
+
+[source,c++]
+----
+include::{pseudocode}/operators/MATMUL_T_BLOCK_SCALED.tosac[lines=10..-1]
+----
+
 ==== MAX_POOL2D
 
 This performs a max pooling over the given input tensor.
 
@@ -56,6 +56,49 @@ Rules when casting between different types:
 include::{pseudocode}/operators/CAST.tosac[lines=10..-1]
 ----
 
+==== CAST_FROM_BLOCK_SCALED
+
+Apply the scales from a scale tensor to the values in a value tensor, casting the result to the output type.
+The block dimension must be the last dimension of the tensor.
+
+include::{generated}/operators/CAST_FROM_BLOCK_SCALED.adoc[]
+
+*Precision Requirements*
+
+* Subnormal values must be supported on the output type.
+* Let `x` be a value from the `input_data` tensor.
+* Let `s` be the corresponding scale value from the `input_scale` tensor.
+* Let `out_ref = x * s` calculated using fp64_t arithmetic.
+* Let `out_imp` be the result of the implementation.
+* Then `tosa_reference_check_from_block<in_t, out_t>(out_imp, out_ref, s)` must be true.
+
+[source,c++]
+----
+include::{pseudocode}/operators/CAST_FROM_BLOCK_SCALED.tosac[lines=10..-1]
+----
+
+==== CAST_TO_BLOCK_SCALED
+
+Calculate a scale value per block of input values and use that to calculate scaled data values from an input tensor.
+The output tensors are cast to the specified scale and value types.
+The block dimension will be the last dimension of the tensor.
+
+include::{generated}/operators/CAST_TO_BLOCK_SCALED.adoc[]
+
+*Precision Requirements*
+
+* Subnormal values must be supported on the output type.
+* Let `x` be a value from the `input_data` tensor
+* Let `out_ref_scale` be the results of calculating the block scale for the block containing `x` using fp64_t arithmetic.
+* Let `out_ref_value = x / out_ref_scale` calculated using fp64_t arithmetic.
+* Let `out_imp_scale, out_imp_value` be the results of the implementation for input `x`.
+* Then `tosa_reference_check_scale<scale_t, out_t>(out_imp_scale, out_imp_value, out_ref_scale, out_ref_value)` must be true.
+
+[source,c++]
+----
+include::{pseudocode}/operators/CAST_TO_BLOCK_SCALED.tosac[lines=10..-1]
+----
+
 ==== RESCALE
 
 RESCALE is defined using an integer multiply, add, and shift.
 
@@ -1,14 +1,15 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020-2024 ARM Limited
+// (C) COPYRIGHT 2020-2025 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
 // by a licensing agreement from ARM Limited.
 
 bool_t is_floating_point<type>() {
-    if (is_same<type,fp16_t>() || is_same<type,fp32_t>() || is_same<type,bf16_t>() || is_same<type,fp8e4m3_t>() || is_same<type,fp8e5m2_t>()) {
+    if (is_same<type,fp16_t>() || is_same<type,fp32_t>() || is_same<type,bf16_t>() || is_same<type,fp8e4m3_t>() || is_same<type,fp8e5m2_t>() ||
+        is_same<type,fp4e2m1_t>() || is_same<type,fp6e3m2_t>() || is_same<type,fp6e2m3_t>() || is_same<type,fp8ue8m0_t>()) {
         return true;
     }
     return false;
@@ -69,7 +70,10 @@ in_out_t maximum_u<in_out_t>();
 in_out_t minimum_u<in_out_t>();
 
 // return true if the given value is a NaN. Only valid for floating-point types
-bool_t is_a_NaN(fp64_t value);
+bool_t is_a_NaN(in_t value);
+
+// return true if the given value is an Infinity. Only valid for floating-point types with defined Infinity values.
+bool_t is_an_Inf(in_t value);
 
 // return true if value is a normal fp64 value (Not zero, subnormal, infinite or NaN)
 bool_t is_normal_fp64(fp64_t value);
@@ -37,25 +37,44 @@ fp64_t normal_min<in_t>() {
     return exp2(-6);
   } else if (is_same<in_t,fp8e5m2_t>()) {
     return exp2(-14);
-  }
+  } else if (is_same<in_t,fp6e2m3_t>()) {
+    return 1.0;
+  } else if (is_same<in_t,fp6e3m2_t>()) {
+    return 0.25;
+  } else if (is_same<in_t,fp4e2m1_t>()) {
+    return 1.0;
+  } else if (is_same<in_t,mxint8_t>()) {
+    return 1/64.0;
+  } else if (is_same<in_t,fp8ue8m0_t>()) {
+    return exp2(-127);
 }
 
 fp64_t normal_max<in_t>() {
   if (is_same<in_t,fp32_t>()) {
     return exp2(128) - exp2(127-23);
   } else if (is_same<in_t,bf16_t>()) {
-    return exp2(128) - exp2(127- 7);
+    return exp2(128) - exp2(127-7);
   } else if (is_same<in_t,fp16_t>()) {
-    return exp2( 16) - exp2( 15-10);
+    return exp2(16) - exp2(15-10);
   } else if (is_same<in_t,fp8e4m3_t>()) {
-    return exp2( 9) - exp2( 8-2);
+    return exp2(9) - exp2(8-2);
   } else if (is_same<in_t,fp8e5m2_t>()) {
-    return exp2( 16) - exp2( 15-2);
+    return exp2(16) - exp2(15-2);
+  } else if (is_same<in_t,fp6e2m3_t>()) {
+    return 7.5;
+  } else if (is_same<in_t,fp6e3m2_t>()) {
+    return 28.0;
+  } else if (is_same<in_t,fp4e2m1_t>()) {
+    return 6.0;
+  } else if (is_same<in_t,mxint8_t>()) {
+    return 1.0 + 63.0/64.0;
+  } else if (is_same<in_t,fp8ue8m0_t>()) {
+    return exp2(127);
   }
 }
 
 // Number of fractional (mantissa bits)
-int normal_frac<in_t> () {
+int normal_frac<in_t>() {
   if (is_same<in_t,fp32_t>()) {
     return 23;
   } else if (is_same<in_t,bf16_t>()) {
@@ -66,9 +85,69 @@ int normal_frac<in_t> () {
     return 3;
   } else if (is_same<in_t,fp8e5m2_t>()) {
     return 2;
+  } else if (is_same<in_t,fp6e2m3_t>()) {
+    return 3;
+  } else if (is_same<in_t,fp6e3m2_t>()) {
+    return 2;
+  } else if (is_same<in_t,fp4e2m1_t>()) {
+    return 1;
+  } else if (is_same<in_t,mxint8_t>()) {
+    return 0;
   }
 }
 
+// Exponent width
+int exponent_bits<in_t>() {
+  if (is_same<in_t, fp32_t) {
+    return 8;
+  } else if (is_same<in_t, fp16_t) {
+    return 5;
+  } else if (is_same<in_t, bf16_t) {
+    return 8;
+  } else if (is_same<in_t,fp8e4m3_t>()) {
+    return 4;
+  } else if (is_same<in_t, fp8e5m2_t>()) {
+    return 5;
+  } else if (is_same<in_t, fp6e2m3_t>()) {
+    return 2;
+  } else if (is_same<in_t, fp6e3m2_t>()) {
+    return 3;
+  } else if (is_same<in_t, fp4e2m1_t>()) {
+    return 2;
+  } else if (is_same<in_t, mxint8_t>()) {
+    return 0;
+  }
+}
+
+int exponent_bias<in_t>() {
+  if (is_same<in_t, fp32_t) {
+    return 127;
+  } else if (is_same<in_t, fp16_t) {
+    return 15;
+  } else if (is_same<in_t, bf16_t) {
+    return 127;
+  } else if (is_same<in_t,fp8e4m3_t>()) {
+    return 7;
+  } else if (is_same<in_t, fp8e5m2_t>()) {
+    return 15;
+  } else if (is_same<in_t, fp6e2m3_t>()) {
+    return 1;
+  } else if (is_same<in_t, fp6e3m2_t>()) {
+    return 3;
+  } else if (is_same<in_t, fp4e2m1_t>()) {
+    return 1;
+  } else if (is_same<in_t, mxint8_t>()) {
+    return 6;
+  } else if (is_same<in_t, fp8ue8m0_t>()) {
+    return 127;
+  }
+}
+
+// Returns a mask for the low N bits of a value
+uint32_t get_low_bitmask(int32_t bits) {
+  return (1 << bits) - 1;
+}
+
 fp64_t calcAbsErrorBound<in_t>(fp64_t bound_magnitude, fp64_t bounds_value,
                                fp64_t lower_bound, fp64_t normal_divisor) {
     fp64_t error_bound = 0.0;