Skip to content

Commit 31ee2da

Browse files
ghehglanza
authored andcommitted
[CIR][CIRGen][Builtin][Neon] Lower neon_vmax_v and neon_vmaxq_v (llvm#1239)
This implementation is different from OG in the sense we chose to use CIR op which eventually lowers to generic LLVM intrinsics instead of llvm.aarch64.neon intrinsics But down to the ASM level, [they are identical ](https://godbolt.org/z/Gbbos9z6Y).
1 parent 24e6355 commit 31ee2da

File tree

2 files changed

+209
-122
lines changed

2 files changed

+209
-122
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3928,8 +3928,15 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
39283928
getLoc(E->getExprLoc()));
39293929
}
39303930
case NEON::BI__builtin_neon_vmax_v:
3931-
case NEON::BI__builtin_neon_vmaxq_v:
3932-
llvm_unreachable("NEON::BI__builtin_neon_vmaxq_v NYI");
3931+
case NEON::BI__builtin_neon_vmaxq_v: {
3932+
mlir::Location loc = getLoc(E->getExprLoc());
3933+
Ops[0] = builder.createBitcast(Ops[0], ty);
3934+
Ops[1] = builder.createBitcast(Ops[1], ty);
3935+
if (cir::isFPOrFPVectorTy(ty)) {
3936+
return builder.create<cir::FMaximumOp>(loc, Ops[0], Ops[1]);
3937+
}
3938+
return builder.create<cir::BinOp>(loc, cir::BinOpKind::Max, Ops[0], Ops[1]);
3939+
}
39333940
case NEON::BI__builtin_neon_vmaxh_f16: {
39343941
llvm_unreachable("NEON::BI__builtin_neon_vmaxh_f16 NYI");
39353942
}

clang/test/CIR/CodeGen/AArch64/neon.c

Lines changed: 200 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -4271,132 +4271,207 @@ uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
42714271
// return vsliq_n_p64(a, b, 0);
42724272
// }
42734273

4274-
// NYI-LABEL: @test_vmax_s8(
4275-
// NYI: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b)
4276-
// NYI: ret <8 x i8> [[VMAX_I]]
4277-
// int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
4278-
// return vmax_s8(a, b);
4279-
// }
4274+
int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
4275+
return vmax_s8(a, b);
42804276

4281-
// NYI-LABEL: @test_vmax_s16(
4282-
// NYI: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4283-
// NYI: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4284-
// NYI: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %a, <4 x i16> %b)
4285-
// NYI: ret <4 x i16> [[VMAX2_I]]
4286-
// int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
4287-
// return vmax_s16(a, b);
4288-
// }
4277+
// CIR-LABEL: vmax_s8
4278+
// CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!s8i x 8>
42894279

4290-
// NYI-LABEL: @test_vmax_s32(
4291-
// NYI: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4292-
// NYI: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4293-
// NYI: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %a, <2 x i32> %b)
4294-
// NYI: ret <2 x i32> [[VMAX2_I]]
4295-
// int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
4296-
// return vmax_s32(a, b);
4297-
// }
4280+
// LLVM-LABEL: test_vmax_s8
4281+
// LLVM-SAME: (<8 x i8> [[a:%.*]], <8 x i8> [[b:%.*]])
4282+
// LLVM: [[VMAX_I:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[a]], <8 x i8> [[b]])
4283+
// LLVM: ret <8 x i8> [[VMAX_I]]
4284+
}
42984285

4299-
// NYI-LABEL: @test_vmax_u8(
4300-
// NYI: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b)
4301-
// NYI: ret <8 x i8> [[VMAX_I]]
4302-
// uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
4303-
// return vmax_u8(a, b);
4304-
// }
4286+
int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
4287+
return vmax_s16(a, b);
43054288

4306-
// NYI-LABEL: @test_vmax_u16(
4307-
// NYI: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4308-
// NYI: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4309-
// NYI: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %a, <4 x i16> %b)
4310-
// NYI: ret <4 x i16> [[VMAX2_I]]
4311-
// uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
4312-
// return vmax_u16(a, b);
4313-
// }
4289+
// CIR-LABEL: vmax_s16
4290+
// CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!s16i x 4>
43144291

4315-
// NYI-LABEL: @test_vmax_u32(
4316-
// NYI: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4317-
// NYI: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4318-
// NYI: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %a, <2 x i32> %b)
4319-
// NYI: ret <2 x i32> [[VMAX2_I]]
4320-
// uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
4321-
// return vmax_u32(a, b);
4322-
// }
4292+
// LLVM-LABEL: test_vmax_s16
4293+
// LLVM-SAME: (<4 x i16> [[a:%.*]], <4 x i16> [[b:%.*]])
4294+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[a]] to <8 x i8>
4295+
// LLVM: [[TMP1:%.*]] = bitcast <4 x i16> [[b]] to <8 x i8>
4296+
// LLVM: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.smax.v4i16(<4 x i16> [[a]], <4 x i16> [[b]])
4297+
// LLVM: ret <4 x i16> [[VMAX2_I]]
4298+
}
43234299

4324-
// NYI-LABEL: @test_vmax_f32(
4325-
// NYI: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4326-
// NYI: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4327-
// NYI: [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %a, <2 x float> %b)
4328-
// NYI: ret <2 x float> [[VMAX2_I]]
4329-
// float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
4330-
// return vmax_f32(a, b);
4331-
// }
4300+
int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
4301+
return vmax_s32(a, b);
43324302

4333-
// NYI-LABEL: @test_vmaxq_s8(
4334-
// NYI: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b)
4335-
// NYI: ret <16 x i8> [[VMAX_I]]
4336-
// int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
4337-
// return vmaxq_s8(a, b);
4338-
// }
4303+
// CIR-LABEL: vmax_s32
4304+
// CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!s32i x 2>
43394305

4340-
// NYI-LABEL: @test_vmaxq_s16(
4341-
// NYI: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4342-
// NYI: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4343-
// NYI: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %a, <8 x i16> %b)
4344-
// NYI: ret <8 x i16> [[VMAX2_I]]
4345-
// int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
4346-
// return vmaxq_s16(a, b);
4347-
// }
4306+
// LLVM-LABEL: test_vmax_s32
4307+
// LLVM-SAME: (<2 x i32> [[a:%.*]], <2 x i32> [[b:%.*]])
4308+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[a]] to <8 x i8>
4309+
// LLVM: [[TMP1:%.*]] = bitcast <2 x i32> [[b]] to <8 x i8>
4310+
// LLVM: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[a]], <2 x i32> [[b]])
4311+
// LLVM: ret <2 x i32> [[VMAX2_I]]
4312+
}
43484313

4349-
// NYI-LABEL: @test_vmaxq_s32(
4350-
// NYI: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4351-
// NYI: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4352-
// NYI: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
4353-
// NYI: ret <4 x i32> [[VMAX2_I]]
4354-
// int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
4355-
// return vmaxq_s32(a, b);
4356-
// }
4314+
uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
4315+
return vmax_u8(a, b);
43574316

4358-
// NYI-LABEL: @test_vmaxq_u8(
4359-
// NYI: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b)
4360-
// NYI: ret <16 x i8> [[VMAX_I]]
4361-
// uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
4362-
// return vmaxq_u8(a, b);
4363-
// }
4317+
// CIR-LABEL: vmax_u8
4318+
// CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!u8i x 8>
43644319

4365-
// NYI-LABEL: @test_vmaxq_u16(
4366-
// NYI: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4367-
// NYI: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4368-
// NYI: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %a, <8 x i16> %b)
4369-
// NYI: ret <8 x i16> [[VMAX2_I]]
4370-
// uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
4371-
// return vmaxq_u16(a, b);
4372-
// }
4320+
// LLVM-LABEL: test_vmax_u8
4321+
// LLVM-SAME: (<8 x i8> [[a:%.*]], <8 x i8> [[b:%.*]])
4322+
// LLVM: [[VMAX_I:%.*]] = call <8 x i8> @llvm.umax.v8i8(<8 x i8> [[a]], <8 x i8> [[b]])
4323+
// LLVM: ret <8 x i8> [[VMAX_I]]
4324+
}
43734325

4374-
// NYI-LABEL: @test_vmaxq_u32(
4375-
// NYI: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4376-
// NYI: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4377-
// NYI: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %a, <4 x i32> %b)
4378-
// NYI: ret <4 x i32> [[VMAX2_I]]
4379-
// uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
4380-
// return vmaxq_u32(a, b);
4381-
// }
4326+
uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
4327+
return vmax_u16(a, b);
43824328

4383-
// NYI-LABEL: @test_vmaxq_f32(
4384-
// NYI: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4385-
// NYI: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4386-
// NYI: [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %a, <4 x float> %b)
4387-
// NYI: ret <4 x float> [[VMAX2_I]]
4388-
// float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
4389-
// return vmaxq_f32(a, b);
4390-
// }
4329+
// CIR-LABEL: vmax_u16
4330+
// CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!u16i x 4>
43914331

4392-
// NYI-LABEL: @test_vmaxq_f64(
4393-
// NYI: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4394-
// NYI: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4395-
// NYI: [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %a, <2 x double> %b)
4396-
// NYI: ret <2 x double> [[VMAX2_I]]
4397-
// float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) {
4398-
// return vmaxq_f64(a, b);
4399-
// }
4332+
// LLVM-LABEL: test_vmax_u16
4333+
// LLVM-SAME: (<4 x i16> [[a:%.*]], <4 x i16> [[b:%.*]])
4334+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[a]] to <8 x i8>
4335+
// LLVM: [[TMP1:%.*]] = bitcast <4 x i16> [[b]] to <8 x i8>
4336+
// LLVM: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.umax.v4i16(<4 x i16> [[a]], <4 x i16> [[b]])
4337+
// LLVM: ret <4 x i16> [[VMAX2_I]]
4338+
}
4339+
4340+
uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
4341+
return vmax_u32(a, b);
4342+
4343+
// CIR-LABEL: vmax_u32
4344+
// CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!u32i x 2>
4345+
4346+
// LLVM-LABEL: test_vmax_u32
4347+
// LLVM-SAME: (<2 x i32> [[a:%.*]], <2 x i32> [[b:%.*]])
4348+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[a]] to <8 x i8>
4349+
// LLVM: [[TMP1:%.*]] = bitcast <2 x i32> [[b]] to <8 x i8>
4350+
// LLVM: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[a]], <2 x i32> [[b]])
4351+
// LLVM: ret <2 x i32> [[VMAX2_I]]
4352+
}
4353+
4354+
float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
4355+
return vmax_f32(a, b);
4356+
4357+
// CIR-LABEL: vmax_f32
4358+
// CIR: cir.fmaximum {{%.*}}, {{%.*}} : !cir.vector<!cir.float x 2>
4359+
4360+
// LLVM-LABEL: test_vmax_f32
4361+
// LLVM-SAME: (<2 x float> [[a:%.*]], <2 x float> [[b:%.*]])
4362+
// LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[a]] to <8 x i8>
4363+
// LLVM: [[TMP1:%.*]] = bitcast <2 x float> [[b]] to <8 x i8>
4364+
// LLVM: [[VMAX2_I:%.*]] = call <2 x float> @llvm.maximum.v2f32(<2 x float> [[a]], <2 x float> [[b]])
4365+
// LLVM: ret <2 x float> [[VMAX2_I]]
4366+
}
4367+
4368+
int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
4369+
return vmaxq_s8(a, b);
4370+
4371+
// CIR-LABEL: vmaxq_s8
4372+
// CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!s8i x 16>
4373+
4374+
// LLVM-LABEL: test_vmaxq_s8
4375+
// LLVM-SAME: (<16 x i8> [[a:%.*]], <16 x i8> [[b:%.*]])
4376+
// LLVM: [[VMAX_I:%.*]] = call <16 x i8> @llvm.smax.v16i8(<16 x i8> [[a]], <16 x i8> [[b]])
4377+
// LLVM: ret <16 x i8> [[VMAX_I]]
4378+
}
4379+
4380+
int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
4381+
return vmaxq_s16(a, b);
4382+
4383+
// CIR-LABEL: vmaxq_s16
4384+
// CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!s16i x 8>
4385+
4386+
// LLVM-LABEL: test_vmaxq_s16
4387+
// LLVM-SAME: (<8 x i16> [[a:%.*]], <8 x i16> [[b:%.*]])
4388+
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[a]] to <16 x i8>
4389+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i16> [[b]] to <16 x i8>
4390+
// LLVM: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[a]], <8 x i16> [[b]])
4391+
// LLVM: ret <8 x i16> [[VMAX2_I]]
4392+
}
4393+
4394+
int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
4395+
return vmaxq_s32(a, b);
4396+
4397+
// CIR-LABEL: vmaxq_s32
4398+
// CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!s32i x 4>
4399+
4400+
// LLVM-LABEL: test_vmaxq_s32
4401+
// LLVM-SAME: (<4 x i32> [[a:%.*]], <4 x i32> [[b:%.*]])
4402+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[a]] to <16 x i8>
4403+
// LLVM: [[TMP1:%.*]] = bitcast <4 x i32> [[b]] to <16 x i8>
4404+
// LLVM: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[a]], <4 x i32> [[b]])
4405+
// LLVM: ret <4 x i32> [[VMAX2_I]]
4406+
}
4407+
4408+
uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
4409+
return vmaxq_u8(a, b);
4410+
4411+
// CIR-LABEL: vmaxq_u8
4412+
// CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!u8i x 16>
4413+
4414+
// LLVM-LABEL: test_vmaxq_u8
4415+
// LLVM-SAME: (<16 x i8> [[a:%.*]], <16 x i8> [[b:%.*]])
4416+
// LLVM: [[VMAX_I:%.*]] = call <16 x i8> @llvm.umax.v16i8(<16 x i8> [[a]], <16 x i8> [[b]])
4417+
// LLVM: ret <16 x i8> [[VMAX_I]]
4418+
}
4419+
4420+
uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
4421+
return vmaxq_u16(a, b);
4422+
4423+
// CIR-LABEL: vmaxq_u16
4424+
// CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!u16i x 8>
4425+
4426+
// LLVM-LABEL: test_vmaxq_u16
4427+
// LLVM-SAME: (<8 x i16> [[a:%.*]], <8 x i16> [[b:%.*]])
4428+
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[a]] to <16 x i8>
4429+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i16> [[b]] to <16 x i8>
4430+
// LLVM: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.umax.v8i16(<8 x i16> [[a]], <8 x i16> [[b]])
4431+
// LLVM: ret <8 x i16> [[VMAX2_I]]
4432+
}
4433+
4434+
uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
4435+
return vmaxq_u32(a, b);
4436+
4437+
// CIR-LABEL: vmaxq_u32
4438+
// CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!u32i x 4>
4439+
4440+
// LLVM-LABEL: test_vmaxq_u32
4441+
// LLVM-SAME: (<4 x i32> [[a:%.*]], <4 x i32> [[b:%.*]])
4442+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[a]] to <16 x i8>
4443+
// LLVM: [[TMP1:%.*]] = bitcast <4 x i32> [[b]] to <16 x i8>
4444+
// LLVM: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[a]], <4 x i32> [[b]])
4445+
// LLVM: ret <4 x i32> [[VMAX2_I]]
4446+
}
4447+
4448+
float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
4449+
return vmaxq_f32(a, b);
4450+
4451+
// CIR-LABEL: vmaxq_f32
4452+
// CIR: cir.fmaximum {{%.*}}, {{%.*}} : !cir.vector<!cir.float x 4>
4453+
4454+
// LLVM-LABEL: test_vmaxq_f32
4455+
// LLVM-SAME: (<4 x float> [[a:%.*]], <4 x float> [[b:%.*]])
4456+
// LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[a]] to <16 x i8>
4457+
// LLVM: [[TMP1:%.*]] = bitcast <4 x float> [[b]] to <16 x i8>
4458+
// LLVM: [[VMAX2_I:%.*]] = call <4 x float> @llvm.maximum.v4f32(<4 x float> [[a]], <4 x float> [[b]])
4459+
// LLVM: ret <4 x float> [[VMAX2_I]]
4460+
}
4461+
4462+
float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) {
4463+
return vmaxq_f64(a, b);
4464+
4465+
// CIR-LABEL: vmaxq_f64
4466+
// CIR: cir.fmaximum {{%.*}}, {{%.*}} : !cir.vector<!cir.double x 2>
4467+
4468+
// LLVM-LABEL: test_vmaxq_f64
4469+
// LLVM-SAME: (<2 x double> [[a:%.*]], <2 x double> [[b:%.*]])
4470+
// LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[a]] to <16 x i8>
4471+
// LLVM: [[TMP1:%.*]] = bitcast <2 x double> [[b]] to <16 x i8>
4472+
// LLVM: [[VMAX2_I:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[a]], <2 x double> [[b]])
4473+
// LLVM: ret <2 x double> [[VMAX2_I]]
4474+
}
44004475

44014476
int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
44024477
return vmin_s8(a, b);
@@ -18587,14 +18662,19 @@ float64_t test_vaddvq_f64(float64x2_t a) {
1858718662
// return vabd_f64(a, b);
1858818663
// }
1858918664

18590-
// NYI-LABEL: @test_vmax_f64(
18591-
// NYI: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18592-
// NYI: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
18593-
// NYI: [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b)
18594-
// NYI: ret <1 x double> [[VMAX2_I]]
18595-
// float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) {
18596-
// return vmax_f64(a, b);
18597-
// }
18665+
float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) {
18666+
return vmax_f64(a, b);
18667+
18668+
// CIR-LABEL: vmax_f64
18669+
// CIR: cir.fmaximum {{%.*}}, {{%.*}} : !cir.vector<!cir.double x 1>
18670+
18671+
// LLVM-LABEL: test_vmax_f64
18672+
// LLVM-SAME: (<1 x double> [[a:%.*]], <1 x double> [[b:%.*]])
18673+
// LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[a]] to <8 x i8>
18674+
// LLVM: [[TMP1:%.*]] = bitcast <1 x double> [[b]] to <8 x i8>
18675+
// LLVM: [[VMAX2_I:%.*]] = call <1 x double> @llvm.maximum.v1f64(<1 x double> [[a]], <1 x double> [[b]])
18676+
// LLVM: ret <1 x double> [[VMAX2_I]]
18677+
}
1859818678

1859918679
// NYI-LABEL: @test_vmaxnm_f64(
1860018680
// NYI: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>

0 commit comments

Comments
 (0)