Skip to content

Commit 4b78536

Browse files
committed
allow more containment opts in tier0
1 parent f1867b7 commit 4b78536

File tree

1 file changed

+68
-91
lines changed

1 file changed

+68
-91
lines changed

src/coreclr/jit/lowerxarch.cpp

Lines changed: 68 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -9255,15 +9255,15 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
92559255
case NI_AVX_LoadAlignedVector256:
92569256
case NI_AVX512_LoadAlignedVector512:
92579257
{
9258-
// In minOpts, we need to ensure that an unaligned address will fault when an explicit LoadAligned is used.
9259-
// Non-VEX encoded instructions will fault if an unaligned SIMD16 load is contained but will not for scalar
9260-
// loads, and VEX-encoded instructions will not fault for unaligned loads in any case.
9258+
// For debug code, we need to ensure that an unaligned address will fault when an explicit LoadAligned is
9259+
// used. Non-VEX encoded instructions will fault if an unaligned SIMD16 load is contained but will not for
9260+
// scalar loads, and VEX-encoded instructions will not fault for unaligned loads in any case.
92619261
//
92629262
// When optimizations are enabled, we want to contain any aligned load that is large enough for the parent's
92639263
// requirement.
92649264

9265-
return (supportsSIMDLoad &&
9266-
((!comp->canUseVexEncoding() && expectedSize == genTypeSize(TYP_SIMD16)) || !comp->opts.MinOpts()));
9265+
return (supportsSIMDLoad && ((!comp->canUseVexEncoding() && expectedSize == genTypeSize(TYP_SIMD16)) ||
9266+
comp->opts.Tier0OptimizationEnabled()));
92679267
}
92689268

92699269
case NI_X86Base_LoadScalarVector128:
@@ -9279,7 +9279,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
92799279
case NI_AVX2_BroadcastScalarToVector256:
92809280
case NI_AVX512_BroadcastScalarToVector512:
92819281
{
9282-
if (comp->opts.MinOpts() || !comp->canUseEmbeddedBroadcast())
9282+
if (!comp->opts.Tier0OptimizationEnabled() || !comp->canUseEmbeddedBroadcast())
92839283
{
92849284
return false;
92859285
}
@@ -9369,113 +9369,90 @@ void Lowering::TryFoldCnsVecForEmbeddedBroadcast(GenTreeHWIntrinsic* parentNode,
93699369
return;
93709370
}
93719371

9372-
// We use the child node's size for the broadcast node, because the parent may consume more than its own size.
9373-
// The containment check has already validated that the child is sufficiently large.
9374-
//
93759372
// We use the parent node's base type, because we must ensure that the constant repeats correctly for that size,
93769373
// regardless of how the constant vector was created.
93779374

9378-
var_types simdType = childNode->TypeGet();
9379-
var_types simdBaseType = parentNode->GetSimdBaseType();
9380-
CorInfoType simdBaseJitType = parentNode->GetSimdBaseJitType();
9381-
bool isCreatedFromScalar = true;
9375+
var_types simdBaseType = parentNode->GetSimdBaseType();
9376+
CorInfoType simdBaseJitType = parentNode->GetSimdBaseJitType();
9377+
9378+
if (varTypeIsSmall(simdBaseType) || !childNode->IsBroadcast(simdBaseType))
9379+
{
9380+
MakeSrcContained(parentNode, childNode);
9381+
return;
9382+
}
9383+
9384+
// We use the child node's size for the broadcast node, because the parent may consume more than its own size.
9385+
// The containment check has already validated that the child is sufficiently large.
9386+
9387+
var_types simdType = childNode->TypeGet();
9388+
NamedIntrinsic broadcastName = NI_AVX2_BroadcastScalarToVector128;
9389+
GenTree* constScalar = nullptr;
93829390

9383-
if (varTypeIsSmall(simdBaseType))
9391+
if (simdType == TYP_SIMD32)
9392+
{
9393+
broadcastName = NI_AVX2_BroadcastScalarToVector256;
9394+
}
9395+
else if (simdType == TYP_SIMD64)
93849396
{
9385-
isCreatedFromScalar = false;
9397+
broadcastName = NI_AVX512_BroadcastScalarToVector512;
93869398
}
93879399
else
93889400
{
9389-
isCreatedFromScalar = childNode->IsBroadcast(simdBaseType);
9401+
assert(simdType == TYP_SIMD16);
93909402
}
93919403

9392-
if (isCreatedFromScalar)
9404+
switch (simdBaseType)
93939405
{
9394-
NamedIntrinsic broadcastName = NI_AVX2_BroadcastScalarToVector128;
9395-
if (simdType == TYP_SIMD32)
9406+
case TYP_FLOAT:
93969407
{
9397-
broadcastName = NI_AVX2_BroadcastScalarToVector256;
9408+
float scalar = childNode->gtSimdVal.f32[0];
9409+
constScalar = comp->gtNewDconNodeF(scalar);
9410+
break;
93989411
}
9399-
else if (simdType == TYP_SIMD64)
9412+
case TYP_DOUBLE:
94009413
{
9401-
broadcastName = NI_AVX512_BroadcastScalarToVector512;
9414+
double scalar = childNode->gtSimdVal.f64[0];
9415+
constScalar = comp->gtNewDconNodeD(scalar);
9416+
break;
94029417
}
9403-
else
9418+
case TYP_INT:
9419+
case TYP_UINT:
94049420
{
9405-
assert(simdType == TYP_SIMD16);
9421+
int32_t scalar = childNode->gtSimdVal.i32[0];
9422+
constScalar = comp->gtNewIconNode(scalar);
9423+
break;
94069424
}
9407-
9408-
GenTree* constScalar = nullptr;
9409-
switch (simdBaseType)
9425+
case TYP_LONG:
9426+
case TYP_ULONG:
94109427
{
9411-
case TYP_FLOAT:
9412-
{
9413-
float scalar = childNode->gtSimdVal.f32[0];
9414-
constScalar = comp->gtNewDconNodeF(scalar);
9415-
break;
9416-
}
9417-
case TYP_DOUBLE:
9418-
{
9419-
double scalar = childNode->gtSimdVal.f64[0];
9420-
constScalar = comp->gtNewDconNodeD(scalar);
9421-
break;
9422-
}
9423-
case TYP_INT:
9424-
{
9425-
int32_t scalar = childNode->gtSimdVal.i32[0];
9426-
constScalar = comp->gtNewIconNode(scalar, simdBaseType);
9427-
break;
9428-
}
9429-
case TYP_UINT:
9430-
{
9431-
uint32_t scalar = childNode->gtSimdVal.u32[0];
9432-
constScalar = comp->gtNewIconNode(scalar, TYP_INT);
9433-
break;
9434-
}
9435-
case TYP_LONG:
9436-
case TYP_ULONG:
9437-
{
9438-
int64_t scalar = childNode->gtSimdVal.i64[0];
9439-
constScalar = comp->gtNewLconNode(scalar);
9440-
break;
9441-
}
9442-
default:
9443-
unreached();
9428+
int64_t scalar = childNode->gtSimdVal.i64[0];
9429+
constScalar = comp->gtNewLconNode(scalar);
9430+
break;
94449431
}
9432+
default:
9433+
unreached();
9434+
}
94459435

9446-
GenTreeHWIntrinsic* createScalar =
9447-
comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, constScalar, NI_Vector128_CreateScalarUnsafe, simdBaseJitType,
9448-
16);
9449-
GenTreeHWIntrinsic* broadcastNode = comp->gtNewSimdHWIntrinsicNode(simdType, createScalar, broadcastName,
9450-
simdBaseJitType, genTypeSize(simdType));
9451-
BlockRange().InsertBefore(childNode, broadcastNode);
9452-
BlockRange().InsertBefore(broadcastNode, createScalar);
9453-
BlockRange().InsertBefore(createScalar, constScalar);
9454-
LIR::Use use;
9455-
if (BlockRange().TryGetUse(childNode, &use))
9456-
{
9457-
use.ReplaceWith(broadcastNode);
9458-
}
9459-
else
9460-
{
9461-
broadcastNode->SetUnusedValue();
9462-
}
9436+
GenTreeHWIntrinsic* createScalar =
9437+
comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, constScalar, NI_Vector128_CreateScalarUnsafe, simdBaseJitType, 16);
9438+
GenTreeHWIntrinsic* broadcastNode =
9439+
comp->gtNewSimdHWIntrinsicNode(simdType, createScalar, broadcastName, simdBaseJitType, genTypeSize(simdType));
94639440

9464-
BlockRange().Remove(childNode);
9465-
LowerNode(createScalar);
9466-
LowerNode(broadcastNode);
9467-
if (varTypeIsFloating(simdBaseType))
9468-
{
9469-
MakeSrcContained(broadcastNode, createScalar);
9470-
}
9471-
else if (constScalar->TypeIs(TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG))
9472-
{
9473-
MakeSrcContained(broadcastNode, constScalar);
9474-
}
9475-
MakeSrcContained(parentNode, broadcastNode);
9476-
return;
9441+
BlockRange().InsertBefore(childNode, constScalar, createScalar, broadcastNode);
9442+
9443+
MakeSrcContained(broadcastNode, createScalar);
9444+
MakeSrcContained(parentNode, broadcastNode);
9445+
9446+
LIR::Use use;
9447+
if (BlockRange().TryGetUse(childNode, &use))
9448+
{
9449+
use.ReplaceWith(broadcastNode);
9450+
}
9451+
else
9452+
{
9453+
broadcastNode->SetUnusedValue();
94779454
}
9478-
MakeSrcContained(parentNode, childNode);
9455+
BlockRange().Remove(childNode);
94799456
}
94809457

94819458
//------------------------------------------------------------------------

0 commit comments

Comments
 (0)