@@ -9255,15 +9255,15 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
92559255 case NI_AVX_LoadAlignedVector256:
92569256 case NI_AVX512_LoadAlignedVector512:
92579257 {
9258- // In minOpts , we need to ensure that an unaligned address will fault when an explicit LoadAligned is used.
9259- // Non-VEX encoded instructions will fault if an unaligned SIMD16 load is contained but will not for scalar
9260- // loads, and VEX-encoded instructions will not fault for unaligned loads in any case.
9258+ // For debug code , we need to ensure that an unaligned address will fault when an explicit LoadAligned is
9259+ // used. Non-VEX encoded instructions will fault if an unaligned SIMD16 load is contained but will not for
9260+ // scalar loads, and VEX-encoded instructions will not fault for unaligned loads in any case.
92619261 //
92629262 // When optimizations are enabled, we want to contain any aligned load that is large enough for the parent's
92639263 // requirement.
92649264
9265- return (supportsSIMDLoad &&
9266- ((!comp-> canUseVexEncoding () && expectedSize == genTypeSize (TYP_SIMD16)) || ! comp->opts .MinOpts ()));
9265+ return (supportsSIMDLoad && ((!comp-> canUseVexEncoding () && expectedSize == genTypeSize (TYP_SIMD16)) ||
9266+ comp->opts .Tier0OptimizationEnabled ()));
92679267 }
92689268
92699269 case NI_X86Base_LoadScalarVector128:
@@ -9279,7 +9279,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
92799279 case NI_AVX2_BroadcastScalarToVector256:
92809280 case NI_AVX512_BroadcastScalarToVector512:
92819281 {
9282- if (comp->opts .MinOpts () || !comp->canUseEmbeddedBroadcast ())
9282+ if (! comp->opts .Tier0OptimizationEnabled () || !comp->canUseEmbeddedBroadcast ())
92839283 {
92849284 return false ;
92859285 }
@@ -9369,113 +9369,90 @@ void Lowering::TryFoldCnsVecForEmbeddedBroadcast(GenTreeHWIntrinsic* parentNode,
93699369 return ;
93709370 }
93719371
9372- // We use the child node's size for the broadcast node, because the parent may consume more than its own size.
9373- // The containment check has already validated that the child is sufficiently large.
9374- //
93759372 // We use the parent node's base type, because we must ensure that the constant repeats correctly for that size,
93769373 // regardless of how the constant vector was created.
93779374
9378- var_types simdType = childNode->TypeGet ();
9379- var_types simdBaseType = parentNode->GetSimdBaseType ();
9380- CorInfoType simdBaseJitType = parentNode->GetSimdBaseJitType ();
9381- bool isCreatedFromScalar = true ;
9375+ var_types simdBaseType = parentNode->GetSimdBaseType ();
9376+ CorInfoType simdBaseJitType = parentNode->GetSimdBaseJitType ();
9377+
9378+ if (varTypeIsSmall (simdBaseType) || !childNode->IsBroadcast (simdBaseType))
9379+ {
9380+ MakeSrcContained (parentNode, childNode);
9381+ return ;
9382+ }
9383+
9384+ // We use the child node's size for the broadcast node, because the parent may consume more than its own size.
9385+ // The containment check has already validated that the child is sufficiently large.
9386+
9387+ var_types simdType = childNode->TypeGet ();
9388+ NamedIntrinsic broadcastName = NI_AVX2_BroadcastScalarToVector128;
9389+ GenTree* constScalar = nullptr ;
93829390
9383- if (varTypeIsSmall (simdBaseType))
9391+ if (simdType == TYP_SIMD32)
9392+ {
9393+ broadcastName = NI_AVX2_BroadcastScalarToVector256;
9394+ }
9395+ else if (simdType == TYP_SIMD64)
93849396 {
9385- isCreatedFromScalar = false ;
9397+ broadcastName = NI_AVX512_BroadcastScalarToVector512 ;
93869398 }
93879399 else
93889400 {
9389- isCreatedFromScalar = childNode-> IsBroadcast (simdBaseType );
9401+ assert (simdType == TYP_SIMD16 );
93909402 }
93919403
9392- if (isCreatedFromScalar )
9404+ switch (simdBaseType )
93939405 {
9394- NamedIntrinsic broadcastName = NI_AVX2_BroadcastScalarToVector128;
9395- if (simdType == TYP_SIMD32)
9406+ case TYP_FLOAT:
93969407 {
9397- broadcastName = NI_AVX2_BroadcastScalarToVector256;
9408+ float scalar = childNode->gtSimdVal .f32 [0 ];
9409+ constScalar = comp->gtNewDconNodeF (scalar);
9410+ break ;
93989411 }
9399- else if (simdType == TYP_SIMD64)
9412+ case TYP_DOUBLE:
94009413 {
9401- broadcastName = NI_AVX512_BroadcastScalarToVector512;
9414+ double scalar = childNode->gtSimdVal .f64 [0 ];
9415+ constScalar = comp->gtNewDconNodeD (scalar);
9416+ break ;
94029417 }
9403- else
9418+ case TYP_INT:
9419+ case TYP_UINT:
94049420 {
9405- assert (simdType == TYP_SIMD16);
9421+ int32_t scalar = childNode->gtSimdVal .i32 [0 ];
9422+ constScalar = comp->gtNewIconNode (scalar);
9423+ break ;
94069424 }
9407-
9408- GenTree* constScalar = nullptr ;
9409- switch (simdBaseType)
9425+ case TYP_LONG:
9426+ case TYP_ULONG:
94109427 {
9411- case TYP_FLOAT:
9412- {
9413- float scalar = childNode->gtSimdVal .f32 [0 ];
9414- constScalar = comp->gtNewDconNodeF (scalar);
9415- break ;
9416- }
9417- case TYP_DOUBLE:
9418- {
9419- double scalar = childNode->gtSimdVal .f64 [0 ];
9420- constScalar = comp->gtNewDconNodeD (scalar);
9421- break ;
9422- }
9423- case TYP_INT:
9424- {
9425- int32_t scalar = childNode->gtSimdVal .i32 [0 ];
9426- constScalar = comp->gtNewIconNode (scalar, simdBaseType);
9427- break ;
9428- }
9429- case TYP_UINT:
9430- {
9431- uint32_t scalar = childNode->gtSimdVal .u32 [0 ];
9432- constScalar = comp->gtNewIconNode (scalar, TYP_INT);
9433- break ;
9434- }
9435- case TYP_LONG:
9436- case TYP_ULONG:
9437- {
9438- int64_t scalar = childNode->gtSimdVal .i64 [0 ];
9439- constScalar = comp->gtNewLconNode (scalar);
9440- break ;
9441- }
9442- default :
9443- unreached ();
9428+ int64_t scalar = childNode->gtSimdVal .i64 [0 ];
9429+ constScalar = comp->gtNewLconNode (scalar);
9430+ break ;
94449431 }
9432+ default :
9433+ unreached ();
9434+ }
94459435
9446- GenTreeHWIntrinsic* createScalar =
9447- comp->gtNewSimdHWIntrinsicNode (TYP_SIMD16, constScalar, NI_Vector128_CreateScalarUnsafe, simdBaseJitType,
9448- 16 );
9449- GenTreeHWIntrinsic* broadcastNode = comp->gtNewSimdHWIntrinsicNode (simdType, createScalar, broadcastName,
9450- simdBaseJitType, genTypeSize (simdType));
9451- BlockRange ().InsertBefore (childNode, broadcastNode);
9452- BlockRange ().InsertBefore (broadcastNode, createScalar);
9453- BlockRange ().InsertBefore (createScalar, constScalar);
9454- LIR::Use use;
9455- if (BlockRange ().TryGetUse (childNode, &use))
9456- {
9457- use.ReplaceWith (broadcastNode);
9458- }
9459- else
9460- {
9461- broadcastNode->SetUnusedValue ();
9462- }
9436+ GenTreeHWIntrinsic* createScalar =
9437+ comp->gtNewSimdHWIntrinsicNode (TYP_SIMD16, constScalar, NI_Vector128_CreateScalarUnsafe, simdBaseJitType, 16 );
9438+ GenTreeHWIntrinsic* broadcastNode =
9439+ comp->gtNewSimdHWIntrinsicNode (simdType, createScalar, broadcastName, simdBaseJitType, genTypeSize (simdType));
94639440
9464- BlockRange ().Remove (childNode);
9465- LowerNode (createScalar);
9466- LowerNode (broadcastNode);
9467- if ( varTypeIsFloating (simdBaseType))
9468- {
9469- MakeSrcContained (broadcastNode, createScalar) ;
9470- }
9471- else if (constScalar-> TypeIs (TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG))
9472- {
9473- MakeSrcContained (broadcastNode, constScalar);
9474- }
9475- MakeSrcContained (parentNode, broadcastNode);
9476- return ;
9441+ BlockRange ().InsertBefore (childNode, constScalar, createScalar, broadcastNode );
9442+
9443+ MakeSrcContained (broadcastNode, createScalar );
9444+ MakeSrcContained (parentNode, broadcastNode);
9445+
9446+ LIR::Use use ;
9447+ if ( BlockRange (). TryGetUse (childNode, &use))
9448+ {
9449+ use. ReplaceWith (broadcastNode);
9450+ }
9451+ else
9452+ {
9453+ broadcastNode-> SetUnusedValue () ;
94779454 }
9478- MakeSrcContained (parentNode, childNode);
9455+ BlockRange (). Remove ( childNode);
94799456}
94809457
94819458// ------------------------------------------------------------------------
0 commit comments