@@ -92,6 +92,8 @@ STATISTIC(NumInternalFunc, "Number of internal functions");
92
92
STATISTIC (NumColdCC, " Number of functions marked coldcc" );
93
93
STATISTIC (NumIFuncsResolved, " Number of statically resolved IFuncs" );
94
94
STATISTIC (NumIFuncsDeleted, " Number of IFuncs removed" );
95
+ STATISTIC (NumGlobalArraysPadded,
96
+ " Number of global arrays padded to alignment boundary" );
95
97
96
98
static cl::opt<bool >
97
99
EnableColdCCStressTest (" enable-coldcc-stress-test" ,
@@ -2029,6 +2031,165 @@ OptimizeFunctions(Module &M,
2029
2031
return Changed;
2030
2032
}
2031
2033
2034
+ static bool callInstIsMemcpy (CallInst *CI) {
2035
+ if (!CI)
2036
+ return false ;
2037
+
2038
+ Function *F = CI->getCalledFunction ();
2039
+ if (!F || !F->isIntrinsic () || F->getIntrinsicID () != Intrinsic::memcpy )
2040
+ return false ;
2041
+
2042
+ return true ;
2043
+ }
2044
+
2045
+ static bool destArrayCanBeWidened (CallInst *CI) {
2046
+ auto *IsVolatile = dyn_cast<ConstantInt>(CI->getArgOperand (3 ));
2047
+ auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand (0 ));
2048
+
2049
+ if (!Alloca || !IsVolatile || IsVolatile->isOne ())
2050
+ return false ;
2051
+
2052
+ if (!Alloca->isStaticAlloca ())
2053
+ return false ;
2054
+
2055
+ if (!Alloca->getAllocatedType ()->isArrayTy ())
2056
+ return false ;
2057
+
2058
+ return true ;
2059
+ }
2060
+
2061
+ static GlobalVariable *widenGlobalVariable (GlobalVariable *OldVar, Function *F,
2062
+ unsigned NumBytesToPad,
2063
+ unsigned NumBytesToCopy) {
2064
+ if (!OldVar->hasInitializer ())
2065
+ return nullptr ;
2066
+
2067
+ ConstantDataArray *DataArray =
2068
+ dyn_cast<ConstantDataArray>(OldVar->getInitializer ());
2069
+ if (!DataArray)
2070
+ return nullptr ;
2071
+
2072
+ // Update to be word aligned (memcpy(...,X,...))
2073
+ // create replacement with padded null bytes.
2074
+ StringRef Data = DataArray->getRawDataValues ();
2075
+ std::vector<uint8_t > StrData (Data.begin (), Data.end ());
2076
+ for (unsigned int p = 0 ; p < NumBytesToPad; p++)
2077
+ StrData.push_back (' \0 ' );
2078
+ auto Arr = ArrayRef (StrData.data (), NumBytesToCopy + NumBytesToPad);
2079
+ // Create new padded version of global variable.
2080
+ Constant *SourceReplace = ConstantDataArray::get (F->getContext (), Arr);
2081
+ GlobalVariable *NewGV = new GlobalVariable (
2082
+ *(F->getParent ()), SourceReplace->getType (), true , OldVar->getLinkage (),
2083
+ SourceReplace, SourceReplace->getName ());
2084
+ // Copy any other attributes from original global variable
2085
+ // e.g. unamed_addr
2086
+ NewGV->copyAttributesFrom (OldVar);
2087
+ NewGV->takeName (OldVar);
2088
+ return NewGV;
2089
+ }
2090
+
2091
+ static void widenDestArray (CallInst *CI, const unsigned NumBytesToPad,
2092
+ const unsigned NumBytesToCopy,
2093
+ ConstantDataArray *SourceDataArray) {
2094
+
2095
+ auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand (0 ));
2096
+ if (Alloca) {
2097
+ unsigned ElementByteWidth = SourceDataArray->getElementByteSize ();
2098
+ unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad;
2099
+ unsigned NumElementsToCopy = divideCeil (TotalBytes, ElementByteWidth);
2100
+ // Update destination array to be word aligned (memcpy(X,...,...))
2101
+ IRBuilder<> BuildAlloca (Alloca);
2102
+ AllocaInst *NewAlloca = BuildAlloca.CreateAlloca (ArrayType::get (
2103
+ Alloca->getAllocatedType ()->getArrayElementType (), NumElementsToCopy));
2104
+ NewAlloca->takeName (Alloca);
2105
+ NewAlloca->setAlignment (Alloca->getAlign ());
2106
+ Alloca->replaceAllUsesWith (NewAlloca);
2107
+ Alloca->eraseFromParent ();
2108
+ }
2109
+ }
2110
+
2111
+ static bool tryWidenGlobalArrayAndDests (Function *F, GlobalVariable *SourceVar,
2112
+ const unsigned NumBytesToPad,
2113
+ const unsigned NumBytesToCopy,
2114
+ ConstantInt *BytesToCopyOp,
2115
+ ConstantDataArray *SourceDataArray) {
2116
+ auto *NewSourceGV =
2117
+ widenGlobalVariable (SourceVar, F, NumBytesToPad, NumBytesToCopy);
2118
+ if (!NewSourceGV)
2119
+ return false ;
2120
+
2121
+ // Update arguments of remaining uses that
2122
+ // are memcpys.
2123
+ for (auto *User : SourceVar->users ()) {
2124
+ auto *CI = dyn_cast<CallInst>(User);
2125
+ if (!callInstIsMemcpy (CI) || !destArrayCanBeWidened (CI))
2126
+ continue ;
2127
+
2128
+ if (CI->getArgOperand (1 ) != SourceVar)
2129
+ continue ;
2130
+
2131
+ widenDestArray (CI, NumBytesToPad, NumBytesToCopy, SourceDataArray);
2132
+
2133
+ CI->setArgOperand (2 , ConstantInt::get (BytesToCopyOp->getType (),
2134
+ NumBytesToCopy + NumBytesToPad));
2135
+ }
2136
+ SourceVar->replaceAllUsesWith (NewSourceGV);
2137
+
2138
+ NumGlobalArraysPadded++;
2139
+ return true ;
2140
+ }
2141
+
2142
+ static bool tryWidenGlobalArraysUsedByMemcpy (
2143
+ GlobalVariable *GV,
2144
+ function_ref<TargetTransformInfo &(Function &)> GetTTI) {
2145
+
2146
+ if (!GV->hasInitializer () || !GV->isConstant () || !GV->hasLocalLinkage () ||
2147
+ !GV->hasGlobalUnnamedAddr ())
2148
+ return false ;
2149
+
2150
+ for (auto *User : GV->users ()) {
2151
+ CallInst *CI = dyn_cast<CallInst>(User);
2152
+ if (!callInstIsMemcpy (CI) || !destArrayCanBeWidened (CI))
2153
+ continue ;
2154
+
2155
+ Function *F = CI->getCalledFunction ();
2156
+
2157
+ auto *BytesToCopyOp = dyn_cast<ConstantInt>(CI->getArgOperand (2 ));
2158
+ if (!BytesToCopyOp)
2159
+ continue ;
2160
+
2161
+ ConstantDataArray *SourceDataArray =
2162
+ dyn_cast<ConstantDataArray>(GV->getInitializer ());
2163
+ if (!SourceDataArray)
2164
+ continue ;
2165
+
2166
+ unsigned NumBytesToCopy = BytesToCopyOp->getZExtValue ();
2167
+
2168
+ auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand (0 ));
2169
+ uint64_t DZSize = Alloca->getAllocatedType ()->getArrayNumElements ();
2170
+ uint64_t SZSize = SourceDataArray->getType ()->getNumElements ();
2171
+ unsigned ElementByteWidth = SourceDataArray->getElementByteSize ();
2172
+ // Calculate the number of elements to copy while avoiding floored
2173
+ // division of integers returning wrong values i.e. copying one byte
2174
+ // from an array of i16 would yield 0 elements to copy as supposed to 1.
2175
+ unsigned NumElementsToCopy = divideCeil (NumBytesToCopy, ElementByteWidth);
2176
+
2177
+ // For safety purposes lets add a constraint and only pad when
2178
+ // NumElementsToCopy == destination array size ==
2179
+ // source which is a constant
2180
+ if (NumElementsToCopy != DZSize || DZSize != SZSize)
2181
+ continue ;
2182
+
2183
+ unsigned NumBytesToPad = GetTTI (*F).getNumBytesToPadGlobalArray (
2184
+ NumBytesToCopy, SourceDataArray->getType ());
2185
+ if (NumBytesToPad) {
2186
+ return tryWidenGlobalArrayAndDests (F, GV, NumBytesToPad, NumBytesToCopy,
2187
+ BytesToCopyOp, SourceDataArray);
2188
+ }
2189
+ }
2190
+ return false ;
2191
+ }
2192
+
2032
2193
static bool
2033
2194
OptimizeGlobalVars (Module &M,
2034
2195
function_ref<TargetTransformInfo &(Function &)> GetTTI,
@@ -2058,6 +2219,10 @@ OptimizeGlobalVars(Module &M,
2058
2219
continue ;
2059
2220
}
2060
2221
2222
+ // For global variable arrays called in a memcpy
2223
+ // we try to pad to nearest valid alignment boundary
2224
+ Changed |= tryWidenGlobalArraysUsedByMemcpy (&GV, GetTTI);
2225
+
2061
2226
Changed |= processGlobal (GV, GetTTI, GetTLI, LookupDomTree);
2062
2227
}
2063
2228
return Changed;
0 commit comments