@@ -1707,6 +1707,11 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsic(LIR::Use& use)
17071707 return DecomposeHWIntrinsicGetElement (use, hwintrinsicTree);
17081708 }
17091709
1710+ case NI_EVEX_MoveMask:
1711+ {
1712+ return DecomposeHWIntrinsicMoveMask (use, hwintrinsicTree);
1713+ }
1714+
17101715 default :
17111716 {
17121717 noway_assert (!" unexpected GT_HWINTRINSIC node in long decomposition" );
@@ -1830,6 +1835,106 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicGetElement(LIR::Use& use, GenTreeHW
18301835 return FinalizeDecomposition (use, loResult, hiResult, hiResult);
18311836}
18321837
1838+ // ------------------------------------------------------------------------
1839+ // DecomposeHWIntrinsicMoveMask: Decompose GT_HWINTRINSIC -- NI_EVEX_MoveMask
1840+ //
1841+ // Decompose a MoveMask(x) node on Vector512<*>. For:
1842+ //
1843+ // GT_HWINTRINSIC{MoveMask}[*](simd_var)
1844+ //
1845+ // create:
1846+ //
1847+ // tmp_simd_var = simd_var
1848+ // tmp_simd_lo = GT_HWINTRINSIC{GetLower}(tmp_simd_var)
1849+ // lo_result = GT_HWINTRINSIC{MoveMask}(tmp_simd_lo)
1850+ // tmp_simd_hi = GT_HWINTRINSIC{GetUpper}(tmp_simd_var)
1851+ // hi_result = GT_HWINTRINSIC{MoveMask}(tmp_simd_hi)
1852+ // return: GT_LONG(lo_result, hi_result)
1853+ //
1854+ // Noting that for all types except byte/sbyte, hi_result will be exclusively
1855+ // zero and so we can actually optimize this a bit more directly
1856+ //
1857+ // Arguments:
1858+ // use - the LIR::Use object for the def that needs to be decomposed.
1859+ // node - the hwintrinsic node to decompose
1860+ //
1861+ // Return Value:
1862+ // The next node to process.
1863+ //
1864+ GenTree* DecomposeLongs::DecomposeHWIntrinsicMoveMask (LIR::Use& use, GenTreeHWIntrinsic* node)
1865+ {
1866+ assert (node == use.Def ());
1867+ assert (varTypeIsLong (node));
1868+ assert (node->GetHWIntrinsicId () == NI_EVEX_MoveMask);
1869+
1870+ GenTree* op1 = node->Op (1 );
1871+ CorInfoType simdBaseJitType = node->GetSimdBaseJitType ();
1872+ var_types simdBaseType = node->GetSimdBaseType ();
1873+ unsigned simdSize = node->GetSimdSize ();
1874+
1875+ assert (varTypeIsArithmetic (simdBaseType));
1876+ assert (op1->TypeGet () == TYP_MASK);
1877+ assert (simdSize == 64 );
1878+
1879+ GenTree* loResult = nullptr ;
1880+ GenTree* hiResult = nullptr ;
1881+
1882+ if (varTypeIsByte (simdBaseType))
1883+ {
1884+ // Create:
1885+ // simdTmpVar = op1
1886+
1887+ GenTree* simdTmpVar = RepresentOpAsLocalVar (op1, node, &node->Op (1 ));
1888+ unsigned simdTmpVarNum = simdTmpVar->AsLclVarCommon ()->GetLclNum ();
1889+ JITDUMP (" [DecomposeHWIntrinsicMoveMask]: Saving op1 tree to a temp var:\n " );
1890+ DISPTREERANGE (Range (), simdTmpVar);
1891+ Range ().Remove (simdTmpVar);
1892+
1893+ Range ().InsertBefore (node, simdTmpVar);
1894+
1895+ // Create:
1896+ // loResult = GT_HWINTRINSIC{MoveMask}(simdTmpVar)
1897+
1898+ loResult = m_compiler->gtNewSimdHWIntrinsicNode (TYP_INT, simdTmpVar, NI_EVEX_MoveMask, simdBaseJitType, 32 );
1899+ Range ().InsertBefore (node, loResult);
1900+
1901+ simdTmpVar = m_compiler->gtNewLclLNode (simdTmpVarNum, simdTmpVar->TypeGet ());
1902+ Range ().InsertBefore (node, simdTmpVar);
1903+
1904+ // Create:
1905+ // simdTmpVar = GT_HWINTRINSIC{ShiftRightMask}(simdTmpVar, 32)
1906+ // hiResult = GT_HWINTRINSIC{MoveMask}(simdTmpVar)
1907+
1908+ GenTree* shiftIcon = m_compiler->gtNewIconNode (32 , TYP_INT);
1909+ Range ().InsertBefore (node, shiftIcon);
1910+
1911+ simdTmpVar = m_compiler->gtNewSimdHWIntrinsicNode (TYP_MASK, simdTmpVar, shiftIcon, NI_EVEX_ShiftRightMask,
1912+ simdBaseJitType, 64 );
1913+ Range ().InsertBefore (node, simdTmpVar);
1914+
1915+ hiResult = m_compiler->gtNewSimdHWIntrinsicNode (TYP_INT, simdTmpVar, NI_EVEX_MoveMask, simdBaseJitType, 32 );
1916+ Range ().InsertBefore (node, hiResult);
1917+ }
1918+ else
1919+ {
1920+ // Create:
1921+ // loResult = GT_HWINTRINSIC{MoveMask}(op1)
1922+
1923+ loResult = m_compiler->gtNewSimdHWIntrinsicNode (TYP_INT, op1, NI_EVEX_MoveMask, simdBaseJitType, simdSize);
1924+ Range ().InsertBefore (node, loResult);
1925+
1926+ // Create:
1927+ // hiResult = GT_ICON(0)
1928+
1929+ hiResult = m_compiler->gtNewZeroConNode (TYP_INT);
1930+ Range ().InsertBefore (node, hiResult);
1931+ }
1932+
1933+ // Done with the original tree; remove it.
1934+ Range ().Remove (node);
1935+
1936+ return FinalizeDecomposition (use, loResult, hiResult, hiResult);
1937+ }
18331938#endif // FEATURE_HW_INTRINSICS
18341939
18351940// ------------------------------------------------------------------------
0 commit comments