|
| 1 | +// ARMWidenStrings.cpp - Widen strings to word boundaries to speed up |
| 2 | +// programs that use simple strcpy's with constant strings as source |
| 3 | +// and stack allocated array for destination. |
| 4 | + |
| 5 | +#define DEBUG_TYPE "arm-widen-strings" |
| 6 | + |
| 7 | +#include "llvm/Transforms/Scalar/ARMWidenStrings.h" |
| 8 | +#include "llvm/Analysis/LoopInfo.h" |
| 9 | +#include "llvm/IR/BasicBlock.h" |
| 10 | +#include "llvm/IR/Constants.h" |
| 11 | +#include "llvm/IR/Function.h" |
| 12 | +#include "llvm/IR/GlobalVariable.h" |
| 13 | +#include "llvm/IR/IRBuilder.h" |
| 14 | +#include "llvm/IR/Instructions.h" |
| 15 | +#include "llvm/IR/Intrinsics.h" |
| 16 | +#include "llvm/IR/Module.h" |
| 17 | +#include "llvm/IR/Operator.h" |
| 18 | +#include "llvm/IR/ValueSymbolTable.h" |
| 19 | +#include "llvm/Pass.h" |
| 20 | +#include "llvm/Support/CommandLine.h" |
| 21 | +#include "llvm/Support/Debug.h" |
| 22 | +#include "llvm/Support/raw_ostream.h" |
| 23 | +#include "llvm/TargetParser/Triple.h" |
| 24 | +#include "llvm/Transforms/Scalar.h" |
| 25 | + |
| 26 | +using namespace llvm; |
| 27 | + |
| 28 | +cl::opt<bool> DisableARMWidenStrings("disable-arm-widen-strings", |
| 29 | + cl::init(false)); |
| 30 | + |
| 31 | +namespace { |
| 32 | + |
| 33 | +class ARMWidenStrings { |
| 34 | +public: |
| 35 | + /* |
| 36 | + Max number of bytes that memcpy allows for lowering to load/stores before it |
| 37 | + uses library function (__aeabi_memcpy). This is the same value returned by |
| 38 | + ARMSubtarget::getMaxInlineSizeThreshold which I would have called in place of |
| 39 | + the constant int but can't get access to the subtarget info class from the |
| 40 | + midend. |
| 41 | + */ |
| 42 | + const unsigned int MemcpyInliningLimit = 64; |
| 43 | + |
| 44 | + bool run(Function &F); |
| 45 | +}; |
| 46 | + |
| 47 | +static bool IsCharArray(Type *t) { |
| 48 | + const unsigned int CHAR_BIT_SIZE = 8; |
| 49 | + return t && t->isArrayTy() && t->getArrayElementType()->isIntegerTy() && |
| 50 | + t->getArrayElementType()->getIntegerBitWidth() == CHAR_BIT_SIZE; |
| 51 | +} |
| 52 | + |
| 53 | +bool ARMWidenStrings::run(Function &F) { |
| 54 | + if (DisableARMWidenStrings) { |
| 55 | + return false; |
| 56 | + } |
| 57 | + |
| 58 | + LLVM_DEBUG(dbgs() << "Running ARMWidenStrings on module " << F.getName() |
| 59 | + << "\n"); |
| 60 | + |
| 61 | + for (Function::iterator b = F.begin(); b != F.end(); ++b) { |
| 62 | + for (BasicBlock::iterator i = b->begin(); i != b->end(); ++i) { |
| 63 | + CallInst *CI = dyn_cast<CallInst>(i); |
| 64 | + if (!CI) { |
| 65 | + continue; |
| 66 | + } |
| 67 | + |
| 68 | + Function *CallMemcpy = CI->getCalledFunction(); |
| 69 | + // find out if the current call instruction is a call to llvm memcpy |
| 70 | + // intrinsics |
| 71 | + if (CallMemcpy == NULL || !CallMemcpy->isIntrinsic() || |
| 72 | + CallMemcpy->getIntrinsicID() != Intrinsic::memcpy) { |
| 73 | + continue; |
| 74 | + } |
| 75 | + |
| 76 | + LLVM_DEBUG(dbgs() << "Found call to strcpy/memcpy:\n" << *CI << "\n"); |
| 77 | + |
| 78 | + auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0)); |
| 79 | + auto *SourceVar = dyn_cast<GlobalVariable>(CI->getArgOperand(1)); |
| 80 | + auto *BytesToCopy = dyn_cast<ConstantInt>(CI->getArgOperand(2)); |
| 81 | + auto *IsVolatile = dyn_cast<ConstantInt>(CI->getArgOperand(3)); |
| 82 | + |
| 83 | + if (!BytesToCopy) { |
| 84 | + LLVM_DEBUG(dbgs() << "Number of bytes to copy is null\n"); |
| 85 | + continue; |
| 86 | + } |
| 87 | + |
| 88 | + uint64_t NumBytesToCopy = BytesToCopy->getZExtValue(); |
| 89 | + |
| 90 | + if (!Alloca) { |
| 91 | + LLVM_DEBUG(dbgs() << "Destination isn't a Alloca\n"); |
| 92 | + continue; |
| 93 | + } |
| 94 | + |
| 95 | + if (!SourceVar) { |
| 96 | + LLVM_DEBUG(dbgs() << "Source isn't a global constant variable\n"); |
| 97 | + continue; |
| 98 | + } |
| 99 | + |
| 100 | + if (!IsVolatile || IsVolatile->isOne()) { |
| 101 | + LLVM_DEBUG( |
| 102 | + dbgs() << "Not widening strings for this memcpy because it's " |
| 103 | + "a volatile operations\n"); |
| 104 | + continue; |
| 105 | + } |
| 106 | + |
| 107 | + if (NumBytesToCopy % 4 == 0) { |
| 108 | + LLVM_DEBUG(dbgs() << "Bytes to copy in strcpy/memcpy is already word " |
| 109 | + "aligned so nothing to do here.\n"); |
| 110 | + continue; |
| 111 | + } |
| 112 | + |
| 113 | + if (!SourceVar->hasInitializer() || !SourceVar->isConstant() || |
| 114 | + !SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr()) { |
| 115 | + LLVM_DEBUG(dbgs() << "Source is not constant global, thus it's " |
| 116 | + "mutable therefore it's not safe to pad\n"); |
| 117 | + continue; |
| 118 | + } |
| 119 | + |
| 120 | + ConstantDataArray *SourceDataArray = |
| 121 | + dyn_cast<ConstantDataArray>(SourceVar->getInitializer()); |
| 122 | + if (!SourceDataArray || !IsCharArray(SourceDataArray->getType())) { |
| 123 | + LLVM_DEBUG(dbgs() << "Source isn't a constant data array\n"); |
| 124 | + continue; |
| 125 | + } |
| 126 | + |
| 127 | + if (!Alloca->isStaticAlloca()) { |
| 128 | + LLVM_DEBUG(dbgs() << "Destination allocation isn't a static " |
| 129 | + "constant which is locally allocated in this " |
| 130 | + "function, so skipping.\n"); |
| 131 | + continue; |
| 132 | + } |
| 133 | + |
| 134 | + // Make sure destination is definitley a char array. |
| 135 | + if (!IsCharArray(Alloca->getAllocatedType())) { |
| 136 | + LLVM_DEBUG(dbgs() << "Destination doesn't look like a constant char (8 " |
| 137 | + "bits) array\n"); |
| 138 | + continue; |
| 139 | + } |
| 140 | + LLVM_DEBUG(dbgs() << "With Alloca: " << *Alloca << "\n"); |
| 141 | + |
| 142 | + uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements(); |
| 143 | + uint64_t SZSize = SourceDataArray->getType()->getNumElements(); |
| 144 | + |
| 145 | + // For safety purposes lets add a constraint and only padd when |
| 146 | + // num bytes to copy == destination array size == source string |
| 147 | + // which is a constant |
| 148 | + LLVM_DEBUG(dbgs() << "Number of bytes to copy is: " << NumBytesToCopy |
| 149 | + << "\n"); |
| 150 | + LLVM_DEBUG(dbgs() << "Size of destination array is: " << DZSize << "\n"); |
| 151 | + LLVM_DEBUG(dbgs() << "Size of source array is: " << SZSize << "\n"); |
| 152 | + if (NumBytesToCopy != DZSize || DZSize != SZSize) { |
| 153 | + LLVM_DEBUG(dbgs() << "Size of number of bytes to copy, destination " |
| 154 | + "array and source string don't match, so " |
| 155 | + "skipping\n"); |
| 156 | + continue; |
| 157 | + } |
| 158 | + LLVM_DEBUG(dbgs() << "Going to widen.\n"); |
| 159 | + unsigned int NumBytesToPad = 4 - (NumBytesToCopy % 4); |
| 160 | + LLVM_DEBUG(dbgs() << "Number of bytes to pad by is " << NumBytesToPad |
| 161 | + << "\n"); |
| 162 | + unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad; |
| 163 | + |
| 164 | + if (TotalBytes > MemcpyInliningLimit) { |
| 165 | + LLVM_DEBUG( |
| 166 | + dbgs() << "Not going to pad because total number of bytes is " |
| 167 | + << TotalBytes |
| 168 | + << " which be greater than the inlining " |
| 169 | + "limit for memcpy which is " |
| 170 | + << MemcpyInliningLimit << "\n"); |
| 171 | + continue; |
| 172 | + } |
| 173 | + |
| 174 | + // update destination char array to be word aligned (memcpy(X,...,...)) |
| 175 | + IRBuilder<> BuildAlloca(Alloca); |
| 176 | + AllocaInst *NewAlloca = cast<AllocaInst>(BuildAlloca.CreateAlloca( |
| 177 | + ArrayType::get(Alloca->getAllocatedType()->getArrayElementType(), |
| 178 | + NumBytesToCopy + NumBytesToPad))); |
| 179 | + NewAlloca->takeName(Alloca); |
| 180 | + NewAlloca->setAlignment(Alloca->getAlign()); |
| 181 | + Alloca->replaceAllUsesWith(NewAlloca); |
| 182 | + |
| 183 | + LLVM_DEBUG(dbgs() << "Updating users of destination stack object to use " |
| 184 | + << "new size\n"); |
| 185 | + |
| 186 | + // update source to be word aligned (memcpy(...,X,...)) |
| 187 | + // create replacement string with padded null bytes. |
| 188 | + StringRef Data = SourceDataArray->getRawDataValues(); |
| 189 | + std::vector<uint8_t> StrData(Data.begin(), Data.end()); |
| 190 | + for (unsigned int p = 0; p < NumBytesToPad; p++) |
| 191 | + StrData.push_back('\0'); |
| 192 | + auto Arr = ArrayRef(StrData.data(), TotalBytes); |
| 193 | + |
| 194 | + // create new padded version of global variable string. |
| 195 | + Constant *SourceReplace = ConstantDataArray::get(F.getContext(), Arr); |
| 196 | + GlobalVariable *NewGV = new GlobalVariable( |
| 197 | + *F.getParent(), SourceReplace->getType(), true, |
| 198 | + SourceVar->getLinkage(), SourceReplace, SourceReplace->getName()); |
| 199 | + |
| 200 | + // copy any other attributes from original global variable string |
| 201 | + // e.g. unamed_addr |
| 202 | + NewGV->copyAttributesFrom(SourceVar); |
| 203 | + NewGV->takeName(SourceVar); |
| 204 | + |
| 205 | + // replace intrinsic source. |
| 206 | + CI->setArgOperand(1, NewGV); |
| 207 | + |
| 208 | + // Update number of bytes to copy (memcpy(...,...,X)) |
| 209 | + CI->setArgOperand(2, |
| 210 | + ConstantInt::get(BytesToCopy->getType(), TotalBytes)); |
| 211 | + LLVM_DEBUG(dbgs() << "Padded dest/source and increased number of bytes:\n" |
| 212 | + << *CI << "\n" |
| 213 | + << *NewAlloca << "\n"); |
| 214 | + } |
| 215 | + } |
| 216 | + return true; |
| 217 | +} |
| 218 | + |
| 219 | +} // end of anonymous namespace |
| 220 | + |
| 221 | +PreservedAnalyses ARMWidenStringsPass::run(Function &F, |
| 222 | + FunctionAnalysisManager &AM) { |
| 223 | + if (!ARMWidenStrings().run(F)) |
| 224 | + return PreservedAnalyses::all(); |
| 225 | + |
| 226 | + return PreservedAnalyses::none(); |
| 227 | +} |
0 commit comments