|
10 | 10 | // Describe AArch64 instructions format here |
11 | 11 | // |
12 | 12 |
|
| 13 | +// Helper class to convert vector element types to integers. |
| 14 | +class ChangeElementTypeToInteger<ValueType InVT> { |
| 15 | + ValueType VT = !cond( |
| 16 | + !eq(InVT, v2f32): v2i32, |
| 17 | + !eq(InVT, v4f32): v4i32, |
| 18 | + // TODO: Other types. |
| 19 | + true : untyped); |
| 20 | +} |
| 21 | + |
| 22 | +class VTPair<ValueType A, ValueType B> { |
| 23 | + ValueType VT0 = A; |
| 24 | + ValueType VT1 = B; |
| 25 | +} |
| 26 | + |
13 | 27 | // Format specifies the encoding used by the instruction. This is part of the |
14 | 28 | // ad-hoc solution used to emit machine instruction encodings by our machine |
15 | 29 | // code emitter. |
@@ -8952,36 +8966,6 @@ multiclass SIMDThreeSameVectorBFDot<bit U, string asm> { |
8952 | 8966 | v4f32, v8bf16>; |
8953 | 8967 | } |
8954 | 8968 |
|
8955 | | -class BaseSIMDThreeSameVectorBF16DotI<bit Q, bit U, string asm, |
8956 | | - string dst_kind, string lhs_kind, |
8957 | | - string rhs_kind, |
8958 | | - RegisterOperand RegType, |
8959 | | - ValueType AccumType, |
8960 | | - ValueType InputType> |
8961 | | - : BaseSIMDIndexedTied<Q, U, 0b0, 0b01, 0b1111, |
8962 | | - RegType, RegType, V128, VectorIndexS, |
8963 | | - asm, "", dst_kind, lhs_kind, rhs_kind, |
8964 | | - [(set (AccumType RegType:$dst), |
8965 | | - (AccumType (int_aarch64_neon_bfdot |
8966 | | - (AccumType RegType:$Rd), |
8967 | | - (InputType RegType:$Rn), |
8968 | | - (InputType (bitconvert (AccumType |
8969 | | - (AArch64duplane32 (v4f32 V128:$Rm), |
8970 | | - VectorIndexS:$idx)))))))]> { |
8971 | | - |
8972 | | - bits<2> idx; |
8973 | | - let Inst{21} = idx{0}; // L |
8974 | | - let Inst{11} = idx{1}; // H |
8975 | | -} |
8976 | | - |
8977 | | -multiclass SIMDThreeSameVectorBF16DotI<bit U, string asm> { |
8978 | | - |
8979 | | - def v4bf16 : BaseSIMDThreeSameVectorBF16DotI<0, U, asm, ".2s", ".4h", |
8980 | | - ".2h", V64, v2f32, v4bf16>; |
8981 | | - def v8bf16 : BaseSIMDThreeSameVectorBF16DotI<1, U, asm, ".4s", ".8h", |
8982 | | - ".2h", V128, v4f32, v8bf16>; |
8983 | | -} |
8984 | | - |
8985 | 8969 | let mayRaiseFPException = 1, Uses = [FPCR] in |
8986 | 8970 | class SIMDBF16MLAL<bit Q, string asm, SDPatternOperator OpNode> |
8987 | 8971 | : BaseSIMDThreeSameVectorTied<Q, 0b1, 0b110, 0b11111, V128, asm, ".4s", |
@@ -9054,6 +9038,40 @@ class BF16ToSinglePrecision<string asm> |
9054 | 9038 | } |
9055 | 9039 | } // End of let mayStore = 0, mayLoad = 0, hasSideEffects = 0 |
9056 | 9040 |
|
| 9041 | +multiclass BaseSIMDThreeSameVectorBF16DotI<bit Q, bit U, string asm, |
| 9042 | + string dst_kind, string lhs_kind, |
| 9043 | + string rhs_kind, |
| 9044 | + RegisterOperand RegType, |
| 9045 | + ValueType AccumType, |
| 9046 | + ValueType InputType> { |
| 9047 | + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { |
| 9048 | + def NAME : BaseSIMDIndexedTied<Q, U, 0b0, 0b01, 0b1111, RegType, RegType, V128, VectorIndexS, |
| 9049 | + asm, "", dst_kind, lhs_kind, rhs_kind, []> |
| 9050 | + { |
| 9051 | + bits<2> idx; |
| 9052 | + let Inst{21} = idx{0}; // L |
| 9053 | + let Inst{11} = idx{1}; // H |
| 9054 | + } |
| 9055 | + } |
| 9056 | + |
| 9057 | + foreach DupTypes = [VTPair<AccumType, v4f32>, |
| 9058 | + VTPair<ChangeElementTypeToInteger<AccumType>.VT, v4i32>] in { |
| 9059 | + def : Pat<(AccumType (int_aarch64_neon_bfdot |
| 9060 | + (AccumType RegType:$Rd), (InputType RegType:$Rn), |
| 9061 | + (InputType (bitconvert |
| 9062 | + (DupTypes.VT0 (AArch64duplane32 (DupTypes.VT1 |
| 9063 | + (bitconvert (v8bf16 V128:$Rm))), VectorIndexS:$Idx)))))), |
| 9064 | + (!cast<Instruction>(NAME) $Rd, $Rn, $Rm, VectorIndexS:$Idx)>; |
| 9065 | + } |
| 9066 | +} |
| 9067 | + |
| 9068 | +multiclass SIMDThreeSameVectorBF16DotI<bit U, string asm> { |
| 9069 | + defm v4bf16 : BaseSIMDThreeSameVectorBF16DotI<0, U, asm, ".2s", ".4h", |
| 9070 | + ".2h", V64, v2f32, v4bf16>; |
| 9071 | + defm v8bf16 : BaseSIMDThreeSameVectorBF16DotI<1, U, asm, ".4s", ".8h", |
| 9072 | + ".2h", V128, v4f32, v8bf16>; |
| 9073 | +} |
| 9074 | + |
9057 | 9075 | //---------------------------------------------------------------------------- |
9058 | 9076 | class BaseSIMDThreeSameVectorIndexB<bit Q, bit U, bits<2> sz, bits<4> opc, |
9059 | 9077 | string asm, string dst_kind, |
|
0 commit comments