Skip to content

Commit 8ceabe8

Browse files
Sve2 AddHighNarrowing (Even/Odd) (#116848)
Contributing towards #115479
1 parent 3eba330 commit 8ceabe8

File tree

10 files changed

+401
-89
lines changed

10 files changed

+401
-89
lines changed

src/coreclr/jit/codegenarm64test.cpp

Lines changed: 27 additions & 60 deletions
Large diffs are not rendered by default.

src/coreclr/jit/emitarm64.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,9 @@ static code_t insEncodeReg3Scale(bool isScaled);
555555
// Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 SVE vector instruction
556556
static code_t insEncodeSveElemsize(emitAttr size);
557557

558+
// Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction
559+
static code_t insEncodeNarrowingSveElemsize(emitAttr size);
560+
558561
// Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction
559562
// This specifically encodes the size at bit locations '22-21'.
560563
static code_t insEncodeSveElemsize_22_to_21(emitAttr size);

src/coreclr/jit/emitarm64sve.cpp

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3564,7 +3564,6 @@ void emitter::emitInsSve_R_R_R(instruction ins,
35643564
case INS_sve_subhnt:
35653565
case INS_sve_rsubhnb:
35663566
case INS_sve_rsubhnt:
3567-
unreached(); // TODO-SVE: Not yet supported.
35683567
assert(insOptsScalableWide(opt));
35693568
assert(isVectorRegister(reg1)); // ddddd
35703569
assert(isVectorRegister(reg2)); // nnnnn
@@ -7440,6 +7439,30 @@ void emitter::emitIns_PRFOP_R_R_I(instruction ins,
74407439
return 0;
74417440
}
74427441

7442+
/*****************************************************************************
7443+
*
7444+
* Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction
7445+
*/
7446+
7447+
/*static*/ emitter::code_t emitter::insEncodeNarrowingSveElemsize(emitAttr size)
7448+
{
7449+
switch (size)
7450+
{
7451+
case EA_1BYTE:
7452+
return 0x00400000; // set the bit at location 22
7453+
7454+
case EA_2BYTE:
7455+
return 0x00800000; // set the bit at location 23
7456+
7457+
case EA_4BYTE:
7458+
return 0x00C00000; // set the bit at location 23 and 22
7459+
7460+
default:
7461+
assert(!"Invalid insOpt for vector register");
7462+
}
7463+
return 0;
7464+
}
7465+
74437466
/*****************************************************************************
74447467
*
74457468
* Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction
@@ -10018,7 +10041,6 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id)
1001810041
case IF_SVE_FL_3A: // ........xx.mmmmm ......nnnnnddddd
1001910042
case IF_SVE_FM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract wide
1002010043
case IF_SVE_FW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate
10021-
case IF_SVE_GC_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract narrow high part
1002210044
case IF_SVE_GF_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 histogram generation (segment)
1002310045
code = emitInsCodeSve(ins, fmt);
1002410046
code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd
@@ -10028,6 +10050,16 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id)
1002810050
dst += emitOutput_Instr(dst, code);
1002910051
break;
1003010052

10053+
// Scalable, 3 regs, no predicates, narrowing
10054+
case IF_SVE_GC_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract narrow high part
10055+
code = emitInsCodeSve(ins, fmt);
10056+
code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd
10057+
code |= insEncodeReg_V<9, 5>(id->idReg2()); // nnnnn
10058+
code |= insEncodeReg_V<20, 16>(id->idReg3()); // mmmmm
10059+
code |= insEncodeNarrowingSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
10060+
dst += emitOutput_Instr(dst, code);
10061+
break;
10062+
1003110063
// Scalable, 3 regs, no predicates. General purpose source registers
1003210064
case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register
1003310065
// increment)
@@ -12658,7 +12690,6 @@ void emitter::emitInsSveSanityCheck(instrDesc* id)
1265812690
case IF_SVE_FL_3A: // ........xx.mmmmm ......nnnnnddddd
1265912691
case IF_SVE_FM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract wide
1266012692
case IF_SVE_FW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate
12661-
case IF_SVE_GC_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract narrow high part
1266212693
case IF_SVE_GF_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 histogram generation (segment)
1266312694
assert(insOptsScalableStandard(id->idInsOpt())); // xx
1266412695
assert(isVectorRegister(id->idReg1())); // ddddd
@@ -12667,6 +12698,15 @@ void emitter::emitInsSveSanityCheck(instrDesc* id)
1266712698
assert(isScalableVectorSize(id->idOpSize()));
1266812699
break;
1266912700

12701+
// Scalable, unpredicated, narrowing
12702+
case IF_SVE_GC_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract narrow high part
12703+
assert(insOptsScalableWide(id->idInsOpt())); // xx
12704+
assert(isVectorRegister(id->idReg1())); // ddddd
12705+
assert(isVectorRegister(id->idReg2())); // nnnnn
12706+
assert(isVectorRegister(id->idReg3())); // mmmmm
12707+
assert(isScalableVectorSize(id->idOpSize()));
12708+
break;
12709+
1267012710
// Scalable, no predicates. General purpose source registers
1267112711
case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register
1267212712
// increment)

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,8 @@ HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceWideningLower,
320320
HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceWideningUpper, -1, 2, {INS_invalid, INS_invalid, INS_sve_sabdlt, INS_sve_uabdlt, INS_sve_sabdlt, INS_sve_uabdlt, INS_sve_sabdlt, INS_sve_uabdlt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable)
321321
HARDWARE_INTRINSIC(Sve2, AddCarryWideningLower, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adclb, INS_invalid, INS_sve_adclb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen)
322322
HARDWARE_INTRINSIC(Sve2, AddCarryWideningUpper, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adclt, INS_invalid, INS_sve_adclt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen)
323+
HARDWARE_INTRINSIC(Sve2, AddHighNarrowingEven, -1, 2, {INS_sve_addhnb, INS_sve_addhnb, INS_sve_addhnb, INS_sve_addhnb, INS_sve_addhnb, INS_sve_addhnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable)
324+
HARDWARE_INTRINSIC(Sve2, AddHighNarrowingOdd, -1, 3, {INS_sve_addhnt, INS_sve_addhnt, INS_sve_addhnt, INS_sve_addhnt, INS_sve_addhnt, INS_sve_addhnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics)
323325
HARDWARE_INTRINSIC(Sve2, BitwiseClearXor, -1, 3, {INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)
324326
HARDWARE_INTRINSIC(Sve2, BitwiseSelect, -1, 3, {INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)
325327
HARDWARE_INTRINSIC(Sve2, BitwiseSelectLeftInverted, -1, 3, {INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve2.PlatformNotSupported.cs

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,83 @@ internal Arm64() { }
258258
/// </summary>
259259
public static unsafe Vector<ulong> AddCarryWideningUpper(Vector<ulong> op1, Vector<ulong> op2, Vector<ulong> op3) { throw new PlatformNotSupportedException(); }
260260

261+
// Add narrow high part (bottom)
262+
263+
/// <summary>
264+
/// svuint8_t svaddhnb[_u16](svuint16_t op1, svuint16_t op2)
265+
/// ADDHNB Zresult.B, Zop1.H, Zop2.H
266+
/// </summary>
267+
public static Vector<byte> AddHighNarrowingEven(Vector<ushort> left, Vector<ushort> right) { throw new PlatformNotSupportedException(); }
268+
269+
/// <summary>
270+
/// svint16_t svaddhnb[_s32](svint32_t op1, svint32_t op2)
271+
/// ADDHNB Zresult.H, Zop1.S, Zop2.S
272+
/// </summary>
273+
public static Vector<short> AddHighNarrowingEven(Vector<int> left, Vector<int> right) { throw new PlatformNotSupportedException(); }
274+
275+
/// <summary>
276+
/// svint32_t svaddhnb[_s64](svint64_t op1, svint64_t op2)
277+
/// ADDHNB Zresult.S, Zop1.D, Zop2.D
278+
/// </summary>
279+
public static Vector<int> AddHighNarrowingEven(Vector<long> left, Vector<long> right) { throw new PlatformNotSupportedException(); }
280+
281+
/// <summary>
282+
/// svint8_t svaddhnb[_s16](svint16_t op1, svint16_t op2)
283+
/// ADDHNB Zresult.B, Zop1.H, Zop2.H
284+
/// </summary>
285+
public static Vector<sbyte> AddHighNarrowingEven(Vector<short> left, Vector<short> right) { throw new PlatformNotSupportedException(); }
286+
287+
/// <summary>
288+
/// svuint16_t svaddhnb[_u32](svuint32_t op1, svuint32_t op2)
289+
/// ADDHNB Zresult.H, Zop1.S, Zop2.S
290+
/// </summary>
291+
public static Vector<ushort> AddHighNarrowingEven(Vector<uint> left, Vector<uint> right) { throw new PlatformNotSupportedException(); }
292+
293+
/// <summary>
294+
/// svuint32_t svaddhnb[_u64](svuint64_t op1, svuint64_t op2)
295+
/// ADDHNB Zresult.S, Zop1.D, Zop2.D
296+
/// </summary>
297+
public static Vector<uint> AddHighNarrowingEven(Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }
298+
299+
// Add narrow high part (top)
300+
301+
/// <summary>
302+
/// svuint8_t svaddhnt[_u16](svuint8_t even, svuint16_t op1, svuint16_t op2)
303+
/// ADDHNT Ztied.B, Zop1.H, Zop2.H
304+
/// </summary>
305+
public static unsafe Vector<byte> AddHighNarrowingOdd(Vector<byte> even, Vector<ushort> left, Vector<ushort> right) { throw new PlatformNotSupportedException(); }
306+
307+
/// <summary>
308+
/// svint16_t svaddhnt[_s32](svint16_t even, svint32_t op1, svint32_t op2)
309+
/// ADDHNT Ztied.H, Zop1.S, Zop2.S
310+
/// </summary>
311+
public static unsafe Vector<short> AddHighNarrowingOdd(Vector<short> even, Vector<int> left, Vector<int> right) { throw new PlatformNotSupportedException(); }
312+
313+
/// <summary>
314+
/// svint32_t svaddhnt[_s64](svint32_t even, svint64_t op1, svint64_t op2)
315+
/// ADDHNT Ztied.S, Zop1.D, Zop2.D
316+
/// </summary>
317+
public static unsafe Vector<int> AddHighNarrowingOdd(Vector<int> even, Vector<long> left, Vector<long> right) { throw new PlatformNotSupportedException(); }
318+
319+
/// <summary>
320+
/// svint8_t svaddhnt[_s16](svint8_t even, svint16_t op1, svint16_t op2)
321+
/// ADDHNT Ztied.B, Zop1.H, Zop2.H
322+
/// </summary>
323+
public static unsafe Vector<sbyte> AddHighNarrowingOdd(Vector<sbyte> even, Vector<short> left, Vector<short> right) { throw new PlatformNotSupportedException(); }
324+
325+
/// <summary>
326+
/// svuint16_t svaddhnt[_u32](svuint16_t even, svuint32_t op1, svuint32_t op2)
327+
/// ADDHNT Ztied.H, Zop1.S, Zop2.S
328+
/// </summary>
329+
public static unsafe Vector<ushort> AddHighNarrowingOdd(Vector<ushort> even, Vector<uint> left, Vector<uint> right) { throw new PlatformNotSupportedException(); }
330+
331+
/// <summary>
332+
/// svuint32_t svaddhnt[_u64](svuint32_t even, svuint64_t op1, svuint64_t op2)
333+
/// ADDHNT Ztied.S, Zop1.D, Zop2.D
334+
/// </summary>
335+
public static unsafe Vector<uint> AddHighNarrowingOdd(Vector<uint> even, Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }
336+
337+
261338
// Bitwise clear and exclusive OR
262339

263340
/// <summary>

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve2.cs

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,82 @@ internal Arm64() { }
258258
/// </summary>
259259
public static unsafe Vector<ulong> AddCarryWideningUpper(Vector<ulong> op1, Vector<ulong> op2, Vector<ulong> op3) => AddCarryWideningUpper(op1, op2, op3);
260260

261+
// Add narrow high part (bottom)
262+
263+
/// <summary>
264+
/// svuint8_t svaddhnb[_u16](svuint16_t op1, svuint16_t op2)
265+
/// ADDHNB Zresult.B, Zop1.H, Zop2.H
266+
/// </summary>
267+
public static Vector<byte> AddHighNarrowingEven(Vector<ushort> left, Vector<ushort> right) => AddHighNarrowingEven(left, right);
268+
269+
/// <summary>
270+
/// svint16_t svaddhnb[_s32](svint32_t op1, svint32_t op2)
271+
/// ADDHNB Zresult.H, Zop1.S, Zop2.S
272+
/// </summary>
273+
public static Vector<short> AddHighNarrowingEven(Vector<int> left, Vector<int> right) => AddHighNarrowingEven(left, right);
274+
275+
/// <summary>
276+
/// svint32_t svaddhnb[_s64](svint64_t op1, svint64_t op2)
277+
/// ADDHNB Zresult.S, Zop1.D, Zop2.D
278+
/// </summary>
279+
public static Vector<int> AddHighNarrowingEven(Vector<long> left, Vector<long> right) => AddHighNarrowingEven(left, right);
280+
281+
/// <summary>
282+
/// svint8_t svaddhnb[_s16](svint16_t op1, svint16_t op2)
283+
/// ADDHNB Zresult.B, Zop1.H, Zop2.H
284+
/// </summary>
285+
public static Vector<sbyte> AddHighNarrowingEven(Vector<short> left, Vector<short> right) => AddHighNarrowingEven(left, right);
286+
287+
/// <summary>
288+
/// svuint16_t svaddhnb[_u32](svuint32_t op1, svuint32_t op2)
289+
/// ADDHNB Zresult.H, Zop1.S, Zop2.S
290+
/// </summary>
291+
public static Vector<ushort> AddHighNarrowingEven(Vector<uint> left, Vector<uint> right) => AddHighNarrowingEven(left, right);
292+
293+
/// <summary>
294+
/// svuint32_t svaddhnb[_u64](svuint64_t op1, svuint64_t op2)
295+
/// ADDHNB Zresult.S, Zop1.D, Zop2.D
296+
/// </summary>
297+
public static Vector<uint> AddHighNarrowingEven(Vector<ulong> left, Vector<ulong> right) => AddHighNarrowingEven(left, right);
298+
299+
// Add narrow high part (top)
300+
301+
/// <summary>
302+
/// svuint8_t svaddhnt[_u16](svuint8_t even, svuint16_t op1, svuint16_t op2)
303+
/// ADDHNT Ztied.B, Zop1.H, Zop2.H
304+
/// </summary>
305+
public static unsafe Vector<byte> AddHighNarrowingOdd(Vector<byte> even, Vector<ushort> left, Vector<ushort> right) => AddHighNarrowingOdd(even, left, right);
306+
307+
/// <summary>
308+
/// svint16_t svaddhnt[_s32](svint16_t even, svint32_t op1, svint32_t op2)
309+
/// ADDHNT Ztied.H, Zop1.S, Zop2.S
310+
/// </summary>
311+
public static unsafe Vector<short> AddHighNarrowingOdd(Vector<short> even, Vector<int> left, Vector<int> right) => AddHighNarrowingOdd(even, left, right);
312+
313+
/// <summary>
314+
/// svint32_t svaddhnt[_s64](svint32_t even, svint64_t op1, svint64_t op2)
315+
/// ADDHNT Ztied.S, Zop1.D, Zop2.D
316+
/// </summary>
317+
public static unsafe Vector<int> AddHighNarrowingOdd(Vector<int> even, Vector<long> left, Vector<long> right) => AddHighNarrowingOdd(even, left, right);
318+
319+
/// <summary>
320+
/// svint8_t svaddhnt[_s16](svint8_t even, svint16_t op1, svint16_t op2)
321+
/// ADDHNT Ztied.B, Zop1.H, Zop2.H
322+
/// </summary>
323+
public static unsafe Vector<sbyte> AddHighNarrowingOdd(Vector<sbyte> even, Vector<short> left, Vector<short> right) => AddHighNarrowingOdd(even, left, right);
324+
325+
/// <summary>
326+
/// svuint16_t svaddhnt[_u32](svuint16_t even, svuint32_t op1, svuint32_t op2)
327+
/// ADDHNT Ztied.H, Zop1.S, Zop2.S
328+
/// </summary>
329+
public static unsafe Vector<ushort> AddHighNarrowingOdd(Vector<ushort> even, Vector<uint> left, Vector<uint> right) => AddHighNarrowingOdd(even, left, right);
330+
331+
/// <summary>
332+
/// svuint32_t svaddhnt[_u64](svuint32_t even, svuint64_t op1, svuint64_t op2)
333+
/// ADDHNT Ztied.S, Zop1.D, Zop2.D
334+
/// </summary>
335+
public static unsafe Vector<uint> AddHighNarrowingOdd(Vector<uint> even, Vector<ulong> left, Vector<ulong> right) => AddHighNarrowingOdd(even, left, right);
336+
261337
// Bitwise clear and exclusive OR
262338

263339
/// <summary>

0 commit comments

Comments
 (0)