LLVM API Documentation
00001 //===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This transformation implements the well known scalar replacement of 00011 // aggregates transformation. This xform breaks up alloca instructions of 00012 // aggregate type (structure or array) into individual alloca instructions for 00013 // each member (if possible). Then, if possible, it transforms the individual 00014 // alloca instructions into nice clean scalar SSA form. 00015 // 00016 // This combines a simple SRoA algorithm with the Mem2Reg algorithm because 00017 // often interact, especially for C++ programs. As such, iterating between 00018 // SRoA, then Mem2Reg until we run out of things to promote works well. 00019 // 00020 //===----------------------------------------------------------------------===// 00021 00022 #define DEBUG_TYPE "scalarrepl" 00023 #include "llvm/Transforms/Scalar.h" 00024 #include "llvm/Constants.h" 00025 #include "llvm/DerivedTypes.h" 00026 #include "llvm/Function.h" 00027 #include "llvm/GlobalVariable.h" 00028 #include "llvm/Instructions.h" 00029 #include "llvm/IntrinsicInst.h" 00030 #include "llvm/Pass.h" 00031 #include "llvm/Analysis/Dominators.h" 00032 #include "llvm/Target/TargetData.h" 00033 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 00034 #include "llvm/Support/Debug.h" 00035 #include "llvm/Support/GetElementPtrTypeIterator.h" 00036 #include "llvm/Support/MathExtras.h" 00037 #include "llvm/Support/Compiler.h" 00038 #include "llvm/ADT/SmallVector.h" 00039 #include "llvm/ADT/Statistic.h" 00040 #include "llvm/ADT/StringExtras.h" 00041 using namespace llvm; 00042 00043 STATISTIC(NumReplaced, "Number of allocas broken up"); 00044 STATISTIC(NumPromoted, "Number of allocas promoted"); 00045 STATISTIC(NumConverted, "Number of aggregates converted to scalar"); 00046 STATISTIC(NumGlobals, "Number of allocas copied from constant global"); 00047 00048 namespace { 00049 struct VISIBILITY_HIDDEN SROA : public FunctionPass { 00050 static char ID; // Pass identification, replacement for typeid 00051 explicit SROA(signed T = -1) : FunctionPass(&ID) { 00052 if (T == -1) 00053 SRThreshold = 128; 00054 else 00055 SRThreshold = T; 00056 } 00057 00058 bool runOnFunction(Function &F); 00059 00060 bool performScalarRepl(Function &F); 00061 bool performPromotion(Function &F); 00062 00063 // getAnalysisUsage - This pass does not require any passes, but we know it 00064 // will not alter the CFG, so say so. 00065 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 00066 AU.addRequired<DominatorTree>(); 00067 AU.addRequired<DominanceFrontier>(); 00068 AU.addRequired<TargetData>(); 00069 AU.setPreservesCFG(); 00070 } 00071 00072 private: 00073 TargetData *TD; 00074 00075 /// AllocaInfo - When analyzing uses of an alloca instruction, this captures 00076 /// information about the uses. All these fields are initialized to false 00077 /// and set to true when something is learned. 00078 struct AllocaInfo { 00079 /// isUnsafe - This is set to true if the alloca cannot be SROA'd. 00080 bool isUnsafe : 1; 00081 00082 /// needsCanon - This is set to true if there is some use of the alloca 00083 /// that requires canonicalization. 00084 bool needsCanon : 1; 00085 00086 /// isMemCpySrc - This is true if this aggregate is memcpy'd from. 00087 bool isMemCpySrc : 1; 00088 00089 /// isMemCpyDst - This is true if this aggregate is memcpy'd into. 00090 bool isMemCpyDst : 1; 00091 00092 AllocaInfo() 00093 : isUnsafe(false), needsCanon(false), 00094 isMemCpySrc(false), isMemCpyDst(false) {} 00095 }; 00096 00097 unsigned SRThreshold; 00098 00099 void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; } 00100 00101 int isSafeAllocaToScalarRepl(AllocationInst *AI); 00102 00103 void isSafeUseOfAllocation(Instruction *User, AllocationInst *AI, 00104 AllocaInfo &Info); 00105 void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI, 00106 AllocaInfo &Info); 00107 void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI, 00108 unsigned OpNo, AllocaInfo &Info); 00109 void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocationInst *AI, 00110 AllocaInfo &Info); 00111 00112 void DoScalarReplacement(AllocationInst *AI, 00113 std::vector<AllocationInst*> &WorkList); 00114 void CanonicalizeAllocaUsers(AllocationInst *AI); 00115 AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocationInst *Base); 00116 00117 void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI, 00118 SmallVector<AllocaInst*, 32> &NewElts); 00119 00120 void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, 00121 AllocationInst *AI, 00122 SmallVector<AllocaInst*, 32> &NewElts); 00123 void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocationInst *AI, 00124 SmallVector<AllocaInst*, 32> &NewElts); 00125 00126 const Type *CanConvertToScalar(Value *V, bool &IsNotTrivial); 00127 void ConvertToScalar(AllocationInst *AI, const Type *Ty); 00128 void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset); 00129 Value *ConvertUsesOfLoadToScalar(LoadInst *LI, AllocaInst *NewAI, 00130 unsigned Offset); 00131 Value *ConvertUsesOfStoreToScalar(StoreInst *SI, AllocaInst *NewAI, 00132 unsigned Offset); 00133 static Instruction *isOnlyCopiedFromConstantGlobal(AllocationInst *AI); 00134 }; 00135 } 00136 00137 char SROA::ID = 0; 00138 static RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates"); 00139 00140 // Public interface to the ScalarReplAggregates pass 00141 FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { 00142 return new SROA(Threshold); 00143 } 00144 00145 00146 bool SROA::runOnFunction(Function &F) { 00147 TD = &getAnalysis<TargetData>(); 00148 00149 bool Changed = performPromotion(F); 00150 while (1) { 00151 bool LocalChange = performScalarRepl(F); 00152 if (!LocalChange) break; // No need to repromote if no scalarrepl 00153 Changed = true; 00154 LocalChange = performPromotion(F); 00155 if (!LocalChange) break; // No need to re-scalarrepl if no promotion 00156 } 00157 00158 return Changed; 00159 } 00160 00161 00162 bool SROA::performPromotion(Function &F) { 00163 std::vector<AllocaInst*> Allocas; 00164 DominatorTree &DT = getAnalysis<DominatorTree>(); 00165 DominanceFrontier &DF = getAnalysis<DominanceFrontier>(); 00166 00167 BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function 00168 00169 bool Changed = false; 00170 00171 while (1) { 00172 Allocas.clear(); 00173 00174 // Find allocas that are safe to promote, by looking at all instructions in 00175 // the entry node 00176 for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) 00177 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? 00178 if (isAllocaPromotable(AI)) 00179 Allocas.push_back(AI); 00180 00181 if (Allocas.empty()) break; 00182 00183 PromoteMemToReg(Allocas, DT, DF); 00184 NumPromoted += Allocas.size(); 00185 Changed = true; 00186 } 00187 00188 return Changed; 00189 } 00190 00191 /// getNumSAElements - Return the number of elements in the specific struct or 00192 /// array. 00193 static uint64_t getNumSAElements(const Type *T) { 00194 if (const StructType *ST = dyn_cast<StructType>(T)) 00195 return ST->getNumElements(); 00196 return cast<ArrayType>(T)->getNumElements(); 00197 } 00198 00199 // performScalarRepl - This algorithm is a simple worklist driven algorithm, 00200 // which runs on all of the malloc/alloca instructions in the function, removing 00201 // them if they are only used by getelementptr instructions. 00202 // 00203 bool SROA::performScalarRepl(Function &F) { 00204 std::vector<AllocationInst*> WorkList; 00205 00206 // Scan the entry basic block, adding any alloca's and mallocs to the worklist 00207 BasicBlock &BB = F.getEntryBlock(); 00208 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) 00209 if (AllocationInst *A = dyn_cast<AllocationInst>(I)) 00210 WorkList.push_back(A); 00211 00212 // Process the worklist 00213 bool Changed = false; 00214 while (!WorkList.empty()) { 00215 AllocationInst *AI = WorkList.back(); 00216 WorkList.pop_back(); 00217 00218 // Handle dead allocas trivially. These can be formed by SROA'ing arrays 00219 // with unused elements. 00220 if (AI->use_empty()) { 00221 AI->eraseFromParent(); 00222 continue; 00223 } 00224 00225 // If we can turn this aggregate value (potentially with casts) into a 00226 // simple scalar value that can be mem2reg'd into a register value. 00227 bool IsNotTrivial = false; 00228 if (const Type *ActualType = CanConvertToScalar(AI, IsNotTrivial)) 00229 if (IsNotTrivial && ActualType != Type::VoidTy) { 00230 ConvertToScalar(AI, ActualType); 00231 Changed = true; 00232 continue; 00233 } 00234 00235 // Check to see if we can perform the core SROA transformation. We cannot 00236 // transform the allocation instruction if it is an array allocation 00237 // (allocations OF arrays are ok though), and an allocation of a scalar 00238 // value cannot be decomposed at all. 00239 if (!AI->isArrayAllocation() && 00240 (isa<StructType>(AI->getAllocatedType()) || 00241 isa<ArrayType>(AI->getAllocatedType())) && 00242 AI->getAllocatedType()->isSized() && 00243 // Do not promote any struct whose size is larger than "128" bytes. 00244 TD->getABITypeSize(AI->getAllocatedType()) < SRThreshold && 00245 // Do not promote any struct into more than "32" separate vars. 00246 getNumSAElements(AI->getAllocatedType()) < SRThreshold/4) { 00247 // Check that all of the users of the allocation are capable of being 00248 // transformed. 00249 switch (isSafeAllocaToScalarRepl(AI)) { 00250 default: assert(0 && "Unexpected value!"); 00251 case 0: // Not safe to scalar replace. 00252 break; 00253 case 1: // Safe, but requires cleanup/canonicalizations first 00254 CanonicalizeAllocaUsers(AI); 00255 // FALL THROUGH. 00256 case 3: // Safe to scalar replace. 00257 DoScalarReplacement(AI, WorkList); 00258 Changed = true; 00259 continue; 00260 } 00261 } 00262 00263 // Check to see if this allocation is only modified by a memcpy/memmove from 00264 // a constant global. If this is the case, we can change all users to use 00265 // the constant global instead. This is commonly produced by the CFE by 00266 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 00267 // is only subsequently read. 00268 if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { 00269 DOUT << "Found alloca equal to global: " << *AI; 00270 DOUT << " memcpy = " << *TheCopy; 00271 Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2)); 00272 AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); 00273 TheCopy->eraseFromParent(); // Don't mutate the global. 00274 AI->eraseFromParent(); 00275 ++NumGlobals; 00276 Changed = true; 00277 continue; 00278 } 00279 00280 // Otherwise, couldn't process this. 00281 } 00282 00283 return Changed; 00284 } 00285 00286 /// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl 00287 /// predicate, do SROA now. 00288 void SROA::DoScalarReplacement(AllocationInst *AI, 00289 std::vector<AllocationInst*> &WorkList) { 00290 DOUT << "Found inst to SROA: " << *AI; 00291 SmallVector<AllocaInst*, 32> ElementAllocas; 00292 if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { 00293 ElementAllocas.reserve(ST->getNumContainedTypes()); 00294 for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { 00295 AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, 00296 AI->getAlignment(), 00297 AI->getName() + "." + utostr(i), AI); 00298 ElementAllocas.push_back(NA); 00299 WorkList.push_back(NA); // Add to worklist for recursive processing 00300 } 00301 } else { 00302 const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType()); 00303 ElementAllocas.reserve(AT->getNumElements()); 00304 const Type *ElTy = AT->getElementType(); 00305 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 00306 AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), 00307 AI->getName() + "." + utostr(i), AI); 00308 ElementAllocas.push_back(NA); 00309 WorkList.push_back(NA); // Add to worklist for recursive processing 00310 } 00311 } 00312 00313 // Now that we have created the alloca instructions that we want to use, 00314 // expand the getelementptr instructions to use them. 00315 // 00316 while (!AI->use_empty()) { 00317 Instruction *User = cast<Instruction>(AI->use_back()); 00318 if (BitCastInst *BCInst = dyn_cast<BitCastInst>(User)) { 00319 RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas); 00320 BCInst->eraseFromParent(); 00321 continue; 00322 } 00323 00324 // Replace: 00325 // %res = load { i32, i32 }* %alloc 00326 // with: 00327 // %load.0 = load i32* %alloc.0 00328 // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 00329 // %load.1 = load i32* %alloc.1 00330 // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 00331 // (Also works for arrays instead of structs) 00332 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 00333 Value *Insert = UndefValue::get(LI->getType()); 00334 for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { 00335 Value *Load = new LoadInst(ElementAllocas[i], "load", LI); 00336 Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); 00337 } 00338 LI->replaceAllUsesWith(Insert); 00339 LI->eraseFromParent(); 00340 continue; 00341 } 00342 00343 // Replace: 00344 // store { i32, i32 } %val, { i32, i32 }* %alloc 00345 // with: 00346 // %val.0 = extractvalue { i32, i32 } %val, 0 00347 // store i32 %val.0, i32* %alloc.0 00348 // %val.1 = extractvalue { i32, i32 } %val, 1 00349 // store i32 %val.1, i32* %alloc.1 00350 // (Also works for arrays instead of structs) 00351 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 00352 Value *Val = SI->getOperand(0); 00353 for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { 00354 Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); 00355 new StoreInst(Extract, ElementAllocas[i], SI); 00356 } 00357 SI->eraseFromParent(); 00358 continue; 00359 } 00360 00361 GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User); 00362 // We now know that the GEP is of the form: GEP <ptr>, 0, <cst> 00363 unsigned Idx = 00364 (unsigned)cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); 00365 00366 assert(Idx < ElementAllocas.size() && "Index out of range?"); 00367 AllocaInst *AllocaToUse = ElementAllocas[Idx]; 00368 00369 Value *RepValue; 00370 if (GEPI->getNumOperands() == 3) { 00371 // Do not insert a new getelementptr instruction with zero indices, only 00372 // to have it optimized out later. 00373 RepValue = AllocaToUse; 00374 } else { 00375 // We are indexing deeply into the structure, so we still need a 00376 // getelement ptr instruction to finish the indexing. This may be 00377 // expanded itself once the worklist is rerun. 00378 // 00379 SmallVector<Value*, 8> NewArgs; 00380 NewArgs.push_back(Constant::getNullValue(Type::Int32Ty)); 00381 NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); 00382 RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(), 00383 NewArgs.end(), "", GEPI); 00384 RepValue->takeName(GEPI); 00385 } 00386 00387 // If this GEP is to the start of the aggregate, check for memcpys. 00388 if (Idx == 0 && GEPI->hasAllZeroIndices()) 00389 RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas); 00390 00391 // Move all of the users over to the new GEP. 00392 GEPI->replaceAllUsesWith(RepValue); 00393 // Delete the old GEP 00394 GEPI->eraseFromParent(); 00395 } 00396 00397 // Finally, delete the Alloca instruction 00398 AI->eraseFromParent(); 00399 NumReplaced++; 00400 } 00401 00402 00403 /// isSafeElementUse - Check to see if this use is an allowed use for a 00404 /// getelementptr instruction of an array aggregate allocation. isFirstElt 00405 /// indicates whether Ptr is known to the start of the aggregate. 00406 /// 00407 void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI, 00408 AllocaInfo &Info) { 00409 for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); 00410 I != E; ++I) { 00411 Instruction *User = cast<Instruction>(*I); 00412 switch (User->getOpcode()) { 00413 case Instruction::Load: break; 00414 case Instruction::Store: 00415 // Store is ok if storing INTO the pointer, not storing the pointer 00416 if (User->getOperand(0) == Ptr) return MarkUnsafe(Info); 00417 break; 00418 case Instruction::GetElementPtr: { 00419 GetElementPtrInst *GEP = cast<GetElementPtrInst>(User); 00420 bool AreAllZeroIndices = isFirstElt; 00421 if (GEP->getNumOperands() > 1) { 00422 if (!isa<ConstantInt>(GEP->getOperand(1)) || 00423 !cast<ConstantInt>(GEP->getOperand(1))->isZero()) 00424 // Using pointer arithmetic to navigate the array. 00425 return MarkUnsafe(Info); 00426 00427 if (AreAllZeroIndices) 00428 AreAllZeroIndices = GEP->hasAllZeroIndices(); 00429 } 00430 isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); 00431 if (Info.isUnsafe) return; 00432 break; 00433 } 00434 case Instruction::BitCast: 00435 if (isFirstElt) { 00436 isSafeUseOfBitCastedAllocation(cast<BitCastInst>(User), AI, Info); 00437 if (Info.isUnsafe) return; 00438 break; 00439 } 00440 DOUT << " Transformation preventing inst: " << *User; 00441 return MarkUnsafe(Info); 00442 case Instruction::Call: 00443 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { 00444 if (isFirstElt) { 00445 isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info); 00446 if (Info.isUnsafe) return; 00447 break; 00448 } 00449 } 00450 DOUT << " Transformation preventing inst: " << *User; 00451 return MarkUnsafe(Info); 00452 default: 00453 DOUT << " Transformation preventing inst: " << *User; 00454 return MarkUnsafe(Info); 00455 } 00456 } 00457 return; // All users look ok :) 00458 } 00459 00460 /// AllUsersAreLoads - Return true if all users of this value are loads. 00461 static bool AllUsersAreLoads(Value *Ptr) { 00462 for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); 00463 I != E; ++I) 00464 if (cast<Instruction>(*I)->getOpcode() != Instruction::Load) 00465 return false; 00466 return true; 00467 } 00468 00469 /// isSafeUseOfAllocation - Check to see if this user is an allowed use for an 00470 /// aggregate allocation. 00471 /// 00472 void SROA::isSafeUseOfAllocation(Instruction *User, AllocationInst *AI, 00473 AllocaInfo &Info) { 00474 if (BitCastInst *C = dyn_cast<BitCastInst>(User)) 00475 return isSafeUseOfBitCastedAllocation(C, AI, Info); 00476 00477 if (isa<LoadInst>(User)) 00478 return; // Loads (returning a first class aggregrate) are always rewritable 00479 00480 if (isa<StoreInst>(User) && User->getOperand(0) != AI) 00481 return; // Store is ok if storing INTO the pointer, not storing the pointer 00482 00483 GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User); 00484 if (GEPI == 0) 00485 return MarkUnsafe(Info); 00486 00487 gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI); 00488 00489 // The GEP is not safe to transform if not of the form "GEP <ptr>, 0, <cst>". 00490 if (I == E || 00491 I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) { 00492 return MarkUnsafe(Info); 00493 } 00494 00495 ++I; 00496 if (I == E) return MarkUnsafe(Info); // ran out of GEP indices?? 00497 00498 bool IsAllZeroIndices = true; 00499 00500 // If the first index is a non-constant index into an array, see if we can 00501 // handle it as a special case. 00502 if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) { 00503 if (!isa<ConstantInt>(I.getOperand())) { 00504 IsAllZeroIndices = 0; 00505 uint64_t NumElements = AT->getNumElements(); 00506 00507 // If this is an array index and the index is not constant, we cannot 00508 // promote... that is unless the array has exactly one or two elements in 00509 // it, in which case we CAN promote it, but we have to canonicalize this 00510 // out if this is the only problem. 00511 if ((NumElements == 1 || NumElements == 2) && 00512 AllUsersAreLoads(GEPI)) { 00513 Info.needsCanon = true; 00514 return; // Canonicalization required! 00515 } 00516 return MarkUnsafe(Info); 00517 } 00518 } 00519 00520 // Walk through the GEP type indices, checking the types that this indexes 00521 // into. 00522 for (; I != E; ++I) { 00523 // Ignore struct elements, no extra checking needed for these. 00524 if (isa<StructType>(*I)) 00525 continue; 00526 00527 ConstantInt *IdxVal = dyn_cast<ConstantInt>(I.getOperand()); 00528 if (!IdxVal) return MarkUnsafe(Info); 00529 00530 // Are all indices still zero? 00531 IsAllZeroIndices &= IdxVal->isZero(); 00532 00533 if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) { 00534 // This GEP indexes an array. Verify that this is an in-range constant 00535 // integer. Specifically, consider A[0][i]. We cannot know that the user 00536 // isn't doing invalid things like allowing i to index an out-of-range 00537 // subscript that accesses A[1]. Because of this, we have to reject SROA 00538 // of any accesses into structs where any of the components are variables. 00539 if (IdxVal->getZExtValue() >= AT->getNumElements()) 00540 return MarkUnsafe(Info); 00541 } else if (const VectorType *VT = dyn_cast<VectorType>(*I)) { 00542 if (IdxVal->getZExtValue() >= VT->getNumElements()) 00543 return MarkUnsafe(Info); 00544 } 00545 } 00546 00547 // If there are any non-simple uses of this getelementptr, make sure to reject 00548 // them. 00549 return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info); 00550 } 00551 00552 /// isSafeMemIntrinsicOnAllocation - Return true if the specified memory 00553 /// intrinsic can be promoted by SROA. At this point, we know that the operand 00554 /// of the memintrinsic is a pointer to the beginning of the allocation. 00555 void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI, 00556 unsigned OpNo, AllocaInfo &Info) { 00557 // If not constant length, give up. 00558 ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); 00559 if (!Length) return MarkUnsafe(Info); 00560 00561 // If not the whole aggregate, give up. 00562 if (Length->getZExtValue() != 00563 TD->getABITypeSize(AI->getType()->getElementType())) 00564 return MarkUnsafe(Info); 00565 00566 // We only know about memcpy/memset/memmove. 00567 if (!isa<MemCpyInst>(MI) && !isa<MemSetInst>(MI) && !isa<MemMoveInst>(MI)) 00568 return MarkUnsafe(Info); 00569 00570 // Otherwise, we can transform it. Determine whether this is a memcpy/set 00571 // into or out of the aggregate. 00572 if (OpNo == 1) 00573 Info.isMemCpyDst = true; 00574 else { 00575 assert(OpNo == 2); 00576 Info.isMemCpySrc = true; 00577 } 00578 } 00579 00580 /// isSafeUseOfBitCastedAllocation - Return true if all users of this bitcast 00581 /// are 00582 void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocationInst *AI, 00583 AllocaInfo &Info) { 00584 for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); 00585 UI != E; ++UI) { 00586 if (BitCastInst *BCU = dyn_cast<BitCastInst>(UI)) { 00587 isSafeUseOfBitCastedAllocation(BCU, AI, Info); 00588 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) { 00589 isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info); 00590 } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { 00591 // If storing the entire alloca in one chunk through a bitcasted pointer 00592 // to integer, we can transform it. This happens (for example) when you 00593 // cast a {i32,i32}* to i64* and store through it. This is similar to the 00594 // memcpy case and occurs in various "byval" cases and emulated memcpys. 00595 if (isa<IntegerType>(SI->getOperand(0)->getType()) && 00596 TD->getABITypeSize(SI->getOperand(0)->getType()) == 00597 TD->getABITypeSize(AI->getType()->getElementType())) { 00598 Info.isMemCpyDst = true; 00599 continue; 00600 } 00601 return MarkUnsafe(Info); 00602 } else { 00603 return MarkUnsafe(Info); 00604 } 00605 if (Info.isUnsafe) return; 00606 } 00607 } 00608 00609 /// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes 00610 /// to its first element. Transform users of the cast to use the new values 00611 /// instead. 00612 void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI, 00613 SmallVector<AllocaInst*, 32> &NewElts) { 00614 Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end(); 00615 while (UI != UE) { 00616 Instruction *User = cast<Instruction>(*UI++); 00617 if (BitCastInst *BCU = dyn_cast<BitCastInst>(User)) { 00618 RewriteBitCastUserOfAlloca(BCU, AI, NewElts); 00619 if (BCU->use_empty()) BCU->eraseFromParent(); 00620 continue; 00621 } 00622 00623 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { 00624 // This must be memcpy/memmove/memset of the entire aggregate. 00625 // Split into one per element. 00626 RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts); 00627 continue; 00628 } 00629 00630 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 00631 // This must be a store of the entire alloca from an integer. 00632 RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); 00633 continue; 00634 } 00635 00636 // Otherwise it must be some other user of a gep of the first pointer. Just 00637 // leave these alone. 00638 continue; 00639 } 00640 } 00641 00642 /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. 00643 /// Rewrite it to copy or set the elements of the scalarized memory. 00644 void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, 00645 AllocationInst *AI, 00646 SmallVector<AllocaInst*, 32> &NewElts) { 00647 00648 // If this is a memcpy/memmove, construct the other pointer as the 00649 // appropriate type. 00650 Value *OtherPtr = 0; 00651 if (MemCpyInst *MCI = dyn_cast<MemCpyInst>(MI)) { 00652 if (BCInst == MCI->getRawDest()) 00653 OtherPtr = MCI->getRawSource(); 00654 else { 00655 assert(BCInst == MCI->getRawSource()); 00656 OtherPtr = MCI->getRawDest(); 00657 } 00658 } else if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) { 00659 if (BCInst == MMI->getRawDest()) 00660 OtherPtr = MMI->getRawSource(); 00661 else { 00662 assert(BCInst == MMI->getRawSource()); 00663 OtherPtr = MMI->getRawDest(); 00664 } 00665 } 00666 00667 // If there is an other pointer, we want to convert it to the same pointer 00668 // type as AI has, so we can GEP through it safely. 00669 if (OtherPtr) { 00670 // It is likely that OtherPtr is a bitcast, if so, remove it. 00671 if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr)) 00672 OtherPtr = BC->getOperand(0); 00673 // All zero GEPs are effectively bitcasts. 00674 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr)) 00675 if (GEP->hasAllZeroIndices()) 00676 OtherPtr = GEP->getOperand(0); 00677 00678 if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr)) 00679 if (BCE->getOpcode() == Instruction::BitCast) 00680 OtherPtr = BCE->getOperand(0); 00681 00682 // If the pointer is not the right type, insert a bitcast to the right 00683 // type. 00684 if (OtherPtr->getType() != AI->getType()) 00685 OtherPtr = new BitCastInst(OtherPtr, AI->getType(), OtherPtr->getName(), 00686 MI); 00687 } 00688 00689 // Process each element of the aggregate. 00690 Value *TheFn = MI->getOperand(0); 00691 const Type *BytePtrTy = MI->getRawDest()->getType(); 00692 bool SROADest = MI->getRawDest() == BCInst; 00693 00694 Constant *Zero = Constant::getNullValue(Type::Int32Ty); 00695 00696 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 00697 // If this is a memcpy/memmove, emit a GEP of the other element address. 00698 Value *OtherElt = 0; 00699 if (OtherPtr) { 00700 Value *Idx[2] = { Zero, ConstantInt::get(Type::Int32Ty, i) }; 00701 OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, 00702 OtherPtr->getNameStr()+"."+utostr(i), 00703 MI); 00704 } 00705 00706 Value *EltPtr = NewElts[i]; 00707 const Type *EltTy =cast<PointerType>(EltPtr->getType())->getElementType(); 00708 00709 // If we got down to a scalar, insert a load or store as appropriate. 00710 if (EltTy->isSingleValueType()) { 00711 if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) { 00712 Value *Elt = new LoadInst(SROADest ? OtherElt : EltPtr, "tmp", 00713 MI); 00714 new StoreInst(Elt, SROADest ? EltPtr : OtherElt, MI); 00715 continue; 00716 } 00717 assert(isa<MemSetInst>(MI)); 00718 00719 // If the stored element is zero (common case), just store a null 00720 // constant. 00721 Constant *StoreVal; 00722 if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) { 00723 if (CI->isZero()) { 00724 StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> 00725 } else { 00726 // If EltTy is a vector type, get the element type. 00727 const Type *ValTy = EltTy; 00728 if (const VectorType *VTy = dyn_cast<VectorType>(ValTy)) 00729 ValTy = VTy->getElementType(); 00730 00731 // Construct an integer with the right value. 00732 unsigned EltSize = TD->getTypeSizeInBits(ValTy); 00733 APInt OneVal(EltSize, CI->getZExtValue()); 00734 APInt TotalVal(OneVal); 00735 // Set each byte. 00736 for (unsigned i = 0; 8*i < EltSize; ++i) { 00737 TotalVal = TotalVal.shl(8); 00738 TotalVal |= OneVal; 00739 } 00740 00741 // Convert the integer value to the appropriate type. 00742 StoreVal = ConstantInt::get(TotalVal); 00743 if (isa<PointerType>(ValTy)) 00744 StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy); 00745 else if (ValTy->isFloatingPoint()) 00746 StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); 00747 assert(StoreVal->getType() == ValTy && "Type mismatch!"); 00748 00749 // If the requested value was a vector constant, create it. 00750 if (EltTy != ValTy) { 00751 unsigned NumElts = cast<VectorType>(ValTy)->getNumElements(); 00752 SmallVector<Constant*, 16> Elts(NumElts, StoreVal); 00753 StoreVal = ConstantVector::get(&Elts[0], NumElts); 00754 } 00755 } 00756 new StoreInst(StoreVal, EltPtr, MI); 00757 continue; 00758 } 00759 // Otherwise, if we're storing a byte variable, use a memset call for 00760 // this element. 00761 } 00762 00763 // Cast the element pointer to BytePtrTy. 00764 if (EltPtr->getType() != BytePtrTy) 00765 EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getNameStr(), MI); 00766 00767 // Cast the other pointer (if we have one) to BytePtrTy. 00768 if (OtherElt && OtherElt->getType() != BytePtrTy) 00769 OtherElt = new BitCastInst(OtherElt, BytePtrTy,OtherElt->getNameStr(), 00770 MI); 00771 00772 unsigned EltSize = TD->getABITypeSize(EltTy); 00773 00774 // Finally, insert the meminst for this element. 00775 if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) { 00776 Value *Ops[] = { 00777 SROADest ? EltPtr : OtherElt, // Dest ptr 00778 SROADest ? OtherElt : EltPtr, // Src ptr 00779 ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size 00780 Zero // Align 00781 }; 00782 CallInst::Create(TheFn, Ops, Ops + 4, "", MI); 00783 } else { 00784 assert(isa<MemSetInst>(MI)); 00785 Value *Ops[] = { 00786 EltPtr, MI->getOperand(2), // Dest, Value, 00787 ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size 00788 Zero // Align 00789 }; 00790 CallInst::Create(TheFn, Ops, Ops + 4, "", MI); 00791 } 00792 } 00793 MI->eraseFromParent(); 00794 } 00795 00796 /// RewriteStoreUserOfWholeAlloca - We found an store of an integer that 00797 /// overwrites the entire allocation. Extract out the pieces of the stored 00798 /// integer and store them individually. 00799 void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, 00800 AllocationInst *AI, 00801 SmallVector<AllocaInst*, 32> &NewElts){ 00802 // Extract each element out of the integer according to its structure offset 00803 // and store the element value to the individual alloca. 00804 Value *SrcVal = SI->getOperand(0); 00805 const Type *AllocaEltTy = AI->getType()->getElementType(); 00806 uint64_t AllocaSizeBits = TD->getABITypeSizeInBits(AllocaEltTy); 00807 00808 // If this isn't a store of an integer to the whole alloca, it may be a store 00809 // to the first element. Just ignore the store in this case and normal SROA 00810 // will handle it. 00811 if (!isa<IntegerType>(SrcVal->getType()) || 00812 TD->getABITypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) 00813 return; 00814 00815 DOUT << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << *SI; 00816 00817 // There are two forms here: AI could be an array or struct. Both cases 00818 // have different ways to compute the element offset. 00819 if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { 00820 const StructLayout *Layout = TD->getStructLayout(EltSTy); 00821 00822 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 00823 // Get the number of bits to shift SrcVal to get the value. 00824 const Type *FieldTy = EltSTy->getElementType(i); 00825 uint64_t Shift = Layout->getElementOffsetInBits(i); 00826 00827 if (TD->isBigEndian()) 00828 Shift = AllocaSizeBits-Shift-TD->getABITypeSizeInBits(FieldTy); 00829 00830 Value *EltVal = SrcVal; 00831 if (Shift) { 00832 Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); 00833 EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, 00834 "sroa.store.elt", SI); 00835 } 00836 00837 // Truncate down to an integer of the right size. 00838 uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); 00839 if (FieldSizeBits != AllocaSizeBits) 00840 EltVal = new TruncInst(EltVal, IntegerType::get(FieldSizeBits), "", SI); 00841 Value *DestField = NewElts[i]; 00842 if (EltVal->getType() == FieldTy) { 00843 // Storing to an integer field of this size, just do it. 00844 } else if (FieldTy->isFloatingPoint() || isa<VectorType>(FieldTy)) { 00845 // Bitcast to the right element type (for fp/vector values). 00846 EltVal = new BitCastInst(EltVal, FieldTy, "", SI); 00847 } else { 00848 // Otherwise, bitcast the dest pointer (for aggregates). 00849 DestField = new BitCastInst(DestField, 00850 PointerType::getUnqual(EltVal->getType()), 00851 "", SI); 00852 } 00853 new StoreInst(EltVal, DestField, SI); 00854 } 00855 00856 } else { 00857 const ArrayType *ATy = cast<ArrayType>(AllocaEltTy); 00858 const Type *ArrayEltTy = ATy->getElementType(); 00859 uint64_t ElementOffset = TD->getABITypeSizeInBits(ArrayEltTy); 00860 uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy); 00861 00862 uint64_t Shift; 00863 00864 if (TD->isBigEndian()) 00865 Shift = AllocaSizeBits-ElementOffset; 00866 else 00867 Shift = 0; 00868 00869 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 00870 00871 Value *EltVal = SrcVal; 00872 if (Shift) { 00873 Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); 00874 EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, 00875 "sroa.store.elt", SI); 00876 } 00877 00878 // Truncate down to an integer of the right size. 00879 if (ElementSizeBits != AllocaSizeBits) 00880 EltVal = new TruncInst(EltVal, IntegerType::get(ElementSizeBits),"",SI); 00881 Value *DestField = NewElts[i]; 00882 if (EltVal->getType() == ArrayEltTy) { 00883 // Storing to an integer field of this size, just do it. 00884 } else if (ArrayEltTy->isFloatingPoint() || isa<VectorType>(ArrayEltTy)) { 00885 // Bitcast to the right element type (for fp/vector values). 00886 EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI); 00887 } else { 00888 // Otherwise, bitcast the dest pointer (for aggregates). 00889 DestField = new BitCastInst(DestField, 00890 PointerType::getUnqual(EltVal->getType()), 00891 "", SI); 00892 } 00893 new StoreInst(EltVal, DestField, SI); 00894 00895 if (TD->isBigEndian()) 00896 Shift -= ElementOffset; 00897 else 00898 Shift += ElementOffset; 00899 } 00900 } 00901 00902 SI->eraseFromParent(); 00903 } 00904 00905 00906 /// HasPadding - Return true if the specified type has any structure or 00907 /// alignment padding, false otherwise. 00908 static bool HasPadding(const Type *Ty, const TargetData &TD) { 00909 if (const StructType *STy = dyn_cast<StructType>(Ty)) { 00910 const StructLayout *SL = TD.getStructLayout(STy); 00911 unsigned PrevFieldBitOffset = 0; 00912 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 00913 unsigned FieldBitOffset = SL->getElementOffsetInBits(i); 00914 00915 // Padding in sub-elements? 00916 if (HasPadding(STy->getElementType(i), TD)) 00917 return true; 00918 00919 // Check to see if there is any padding between this element and the 00920 // previous one. 00921 if (i) { 00922 unsigned PrevFieldEnd = 00923 PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1)); 00924 if (PrevFieldEnd < FieldBitOffset) 00925 return true; 00926 } 00927 00928 PrevFieldBitOffset = FieldBitOffset; 00929 } 00930 00931 // Check for tail padding. 00932 if (unsigned EltCount = STy->getNumElements()) { 00933 unsigned PrevFieldEnd = PrevFieldBitOffset + 00934 TD.getTypeSizeInBits(STy->getElementType(EltCount-1)); 00935 if (PrevFieldEnd < SL->getSizeInBits()) 00936 return true; 00937 } 00938 00939 } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 00940 return HasPadding(ATy->getElementType(), TD); 00941 } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { 00942 return HasPadding(VTy->getElementType(), TD); 00943 } 00944 return TD.getTypeSizeInBits(Ty) != TD.getABITypeSizeInBits(Ty); 00945 } 00946 00947 /// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of 00948 /// an aggregate can be broken down into elements. Return 0 if not, 3 if safe, 00949 /// or 1 if safe after canonicalization has been performed. 00950 /// 00951