LLVM API Documentation
00001 //===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file promotes memory references to be register references. It promotes 00011 // alloca instructions which only have loads and stores as uses. An alloca is 00012 // transformed by using dominator frontiers to place PHI nodes, then traversing 00013 // the function in depth-first order to rewrite loads and stores as appropriate. 00014 // This is just the standard SSA construction algorithm to construct "pruned" 00015 // SSA form. 00016 // 00017 //===----------------------------------------------------------------------===// 00018 00019 #define DEBUG_TYPE "mem2reg" 00020 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 00021 #include "llvm/Constants.h" 00022 #include "llvm/DerivedTypes.h" 00023 #include "llvm/Function.h" 00024 #include "llvm/Instructions.h" 00025 #include "llvm/IntrinsicInst.h" 00026 #include "llvm/Analysis/Dominators.h" 00027 #include "llvm/Analysis/AliasSetTracker.h" 00028 #include "llvm/ADT/DenseMap.h" 00029 #include "llvm/ADT/SmallPtrSet.h" 00030 #include "llvm/ADT/SmallVector.h" 00031 #include "llvm/ADT/Statistic.h" 00032 #include "llvm/ADT/StringExtras.h" 00033 #include "llvm/ADT/STLExtras.h" 00034 #include "llvm/Support/CFG.h" 00035 #include "llvm/Support/Compiler.h" 00036 #include <algorithm> 00037 using namespace llvm; 00038 00039 STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); 00040 STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); 00041 STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); 00042 STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); 00043 00044 // Provide DenseMapInfo for all pointers. 00045 namespace llvm { 00046 template<> 00047 struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { 00048 typedef std::pair<BasicBlock*, unsigned> EltTy; 00049 static inline EltTy getEmptyKey() { 00050 return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U); 00051 } 00052 static inline EltTy getTombstoneKey() { 00053 return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U); 00054 } 00055 static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) { 00056 return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2; 00057 } 00058 static bool isEqual(const EltTy &LHS, const EltTy &RHS) { 00059 return LHS == RHS; 00060 } 00061 static bool isPod() { return true; } 00062 }; 00063 } 00064 00065 /// isAllocaPromotable - Return true if this alloca is legal for promotion. 00066 /// This is true if there are only loads and stores to the alloca. 00067 /// 00068 bool llvm::isAllocaPromotable(const AllocaInst *AI) { 00069 // FIXME: If the memory unit is of pointer or integer type, we can permit 00070 // assignments to subsections of the memory unit. 00071 00072 // Only allow direct and non-volatile loads and stores... 00073 for (Value::use_const_iterator UI = AI->use_begin(), UE = AI->use_end(); 00074 UI != UE; ++UI) // Loop over all of the uses of the alloca 00075 if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) { 00076 if (LI->isVolatile()) 00077 return false; 00078 } else if (const StoreInst *SI = dyn_cast<StoreInst>(*UI)) { 00079 if (SI->getOperand(0) == AI) 00080 return false; // Don't allow a store OF the AI, only INTO the AI. 00081 if (SI->isVolatile()) 00082 return false; 00083 } else if (const BitCastInst *BC = dyn_cast<BitCastInst>(*UI)) { 00084 // Uses by dbg info shouldn't inhibit promotion. 00085 if (!BC->hasOneUse() || !isa<DbgInfoIntrinsic>(*BC->use_begin())) 00086 return false; 00087 } else { 00088 return false; 00089 } 00090 00091 return true; 00092 } 00093 00094 namespace { 00095 struct AllocaInfo; 00096 00097 // Data package used by RenamePass() 00098 class VISIBILITY_HIDDEN RenamePassData { 00099 public: 00100 typedef std::vector<Value *> ValVector; 00101 00102 RenamePassData() {} 00103 RenamePassData(BasicBlock *B, BasicBlock *P, 00104 const ValVector &V) : BB(B), Pred(P), Values(V) {} 00105 BasicBlock *BB; 00106 BasicBlock *Pred; 00107 ValVector Values; 00108 00109 void swap(RenamePassData &RHS) { 00110 std::swap(BB, RHS.BB); 00111 std::swap(Pred, RHS.Pred); 00112 Values.swap(RHS.Values); 00113 } 00114 }; 00115 00116 /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of 00117 /// load/store instructions in the block that directly load or store an alloca. 00118 /// 00119 /// This functionality is important because it avoids scanning large basic 00120 /// blocks multiple times when promoting many allocas in the same block. 00121 class VISIBILITY_HIDDEN LargeBlockInfo { 00122 /// InstNumbers - For each instruction that we track, keep the index of the 00123 /// instruction. The index starts out as the number of the instruction from 00124 /// the start of the block. 00125 DenseMap<const Instruction *, unsigned> InstNumbers; 00126 public: 00127 00128 /// isInterestingInstruction - This code only looks at accesses to allocas. 00129 static bool isInterestingInstruction(const Instruction *I) { 00130 return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || 00131 (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); 00132 } 00133 00134 /// getInstructionIndex - Get or calculate the index of the specified 00135 /// instruction. 00136 unsigned getInstructionIndex(const Instruction *I) { 00137 assert(isInterestingInstruction(I) && 00138 "Not a load/store to/from an alloca?"); 00139 00140 // If we already have this instruction number, return it. 00141 DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); 00142 if (It != InstNumbers.end()) return It->second; 00143 00144 // Scan the whole block to get the instruction. This accumulates 00145 // information for every interesting instruction in the block, in order to 00146 // avoid gratuitus rescans. 00147 const BasicBlock *BB = I->getParent(); 00148 unsigned InstNo = 0; 00149 for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); 00150 BBI != E; ++BBI) 00151 if (isInterestingInstruction(BBI)) 00152 InstNumbers[BBI] = InstNo++; 00153 It = InstNumbers.find(I); 00154 00155 assert(It != InstNumbers.end() && "Didn't insert instruction?"); 00156 return It->second; 00157 } 00158 00159 void deleteValue(const Instruction *I) { 00160 InstNumbers.erase(I); 00161 } 00162 00163 void clear() { 00164 InstNumbers.clear(); 00165 } 00166 }; 00167 00168 struct VISIBILITY_HIDDEN PromoteMem2Reg { 00169 /// Allocas - The alloca instructions being promoted. 00170 /// 00171 std::vector<AllocaInst*> Allocas; 00172 DominatorTree &DT; 00173 DominanceFrontier &DF; 00174 00175 /// AST - An AliasSetTracker object to update. If null, don't update it. 00176 /// 00177 AliasSetTracker *AST; 00178 00179 /// AllocaLookup - Reverse mapping of Allocas. 00180 /// 00181 std::map<AllocaInst*, unsigned> AllocaLookup; 00182 00183 /// NewPhiNodes - The PhiNodes we're adding. 00184 /// 00185 DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes; 00186 00187 /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas 00188 /// it corresponds to. 00189 DenseMap<PHINode*, unsigned> PhiToAllocaMap; 00190 00191 /// PointerAllocaValues - If we are updating an AliasSetTracker, then for 00192 /// each alloca that is of pointer type, we keep track of what to copyValue 00193 /// to the inserted PHI nodes here. 00194 /// 00195 std::vector<Value*> PointerAllocaValues; 00196 00197 /// Visited - The set of basic blocks the renamer has already visited. 00198 /// 00199 SmallPtrSet<BasicBlock*, 16> Visited; 00200 00201 /// BBNumbers - Contains a stable numbering of basic blocks to avoid 00202 /// non-determinstic behavior. 00203 DenseMap<BasicBlock*, unsigned> BBNumbers; 00204 00205 /// BBNumPreds - Lazily compute the number of predecessors a block has. 00206 DenseMap<const BasicBlock*, unsigned> BBNumPreds; 00207 public: 00208 PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, 00209 DominanceFrontier &df, AliasSetTracker *ast) 00210 : Allocas(A), DT(dt), DF(df), AST(ast) {} 00211 00212 void run(); 00213 00214 /// properlyDominates - Return true if I1 properly dominates I2. 00215 /// 00216 bool properlyDominates(Instruction *I1, Instruction *I2) const { 00217 if (InvokeInst *II = dyn_cast<InvokeInst>(I1)) 00218 I1 = II->getNormalDest()->begin(); 00219 return DT.properlyDominates(I1->getParent(), I2->getParent()); 00220 } 00221 00222 /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. 00223 /// 00224 bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { 00225 return DT.dominates(BB1, BB2); 00226 } 00227 00228 private: 00229 void RemoveFromAllocasList(unsigned &AllocaIdx) { 00230 Allocas[AllocaIdx] = Allocas.back(); 00231 Allocas.pop_back(); 00232 --AllocaIdx; 00233 } 00234 00235 unsigned getNumPreds(const BasicBlock *BB) { 00236 unsigned &NP = BBNumPreds[BB]; 00237 if (NP == 0) 00238 NP = std::distance(pred_begin(BB), pred_end(BB))+1; 00239 return NP-1; 00240 } 00241 00242 void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 00243 AllocaInfo &Info); 00244 void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 00245 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 00246 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks); 00247 00248 void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, 00249 LargeBlockInfo &LBI); 00250 void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 00251 LargeBlockInfo &LBI); 00252 00253 00254 void RenamePass(BasicBlock *BB, BasicBlock *Pred, 00255 RenamePassData::ValVector &IncVals, 00256 std::vector<RenamePassData> &Worklist); 00257 bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version, 00258 SmallPtrSet<PHINode*, 16> &InsertedPHINodes); 00259 }; 00260 00261 struct AllocaInfo { 00262 std::vector<BasicBlock*> DefiningBlocks; 00263 std::vector<BasicBlock*> UsingBlocks; 00264 00265 StoreInst *OnlyStore; 00266 BasicBlock *OnlyBlock; 00267 bool OnlyUsedInOneBlock; 00268 00269 Value *AllocaPointerVal; 00270 00271 void clear() { 00272 DefiningBlocks.clear(); 00273 UsingBlocks.clear(); 00274 OnlyStore = 0; 00275 OnlyBlock = 0; 00276 OnlyUsedInOneBlock = true; 00277 AllocaPointerVal = 0; 00278 } 00279 00280 /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our 00281 /// ivars. 00282 void AnalyzeAlloca(AllocaInst *AI) { 00283 clear(); 00284 00285 // As we scan the uses of the alloca instruction, keep track of stores, 00286 // and decide whether all of the loads and stores to the alloca are within 00287 // the same basic block. 00288 for (Value::use_iterator U = AI->use_begin(), E = AI->use_end(); 00289 U != E;) { 00290 Instruction *User = cast<Instruction>(*U); 00291 ++U; 00292 if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { 00293 // Remove any uses of this alloca in DbgInfoInstrinsics. 00294 assert(BC->hasOneUse() && "Unexpected alloca uses!"); 00295 DbgInfoIntrinsic *DI = cast<DbgInfoIntrinsic>(*BC->use_begin()); 00296 DI->eraseFromParent(); 00297 BC->eraseFromParent(); 00298 continue; 00299 } 00300 else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 00301 // Remember the basic blocks which define new values for the alloca 00302 DefiningBlocks.push_back(SI->getParent()); 00303 AllocaPointerVal = SI->getOperand(0); 00304 OnlyStore = SI; 00305 } else { 00306 LoadInst *LI = cast<LoadInst>(User); 00307 // Otherwise it must be a load instruction, keep track of variable 00308 // reads. 00309 UsingBlocks.push_back(LI->getParent()); 00310 AllocaPointerVal = LI; 00311 } 00312 00313 if (OnlyUsedInOneBlock) { 00314 if (OnlyBlock == 0) 00315 OnlyBlock = User->getParent(); 00316 else if (OnlyBlock != User->getParent()) 00317 OnlyUsedInOneBlock = false; 00318 } 00319 } 00320 } 00321 }; 00322 } // end of anonymous namespace 00323 00324 00325 void PromoteMem2Reg::run() { 00326 Function &F = *DF.getRoot()->getParent(); 00327 00328 if (AST) PointerAllocaValues.resize(Allocas.size()); 00329 00330 AllocaInfo Info; 00331 LargeBlockInfo LBI; 00332 00333 for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { 00334 AllocaInst *AI = Allocas[AllocaNum]; 00335 00336 assert(isAllocaPromotable(AI) && 00337 "Cannot promote non-promotable alloca!"); 00338 assert(AI->getParent()->getParent() == &F && 00339 "All allocas should be in the same function, which is same as DF!"); 00340 00341 if (AI->use_empty()) { 00342 // If there are no uses of the alloca, just delete it now. 00343 if (AST) AST->deleteValue(AI); 00344 AI->eraseFromParent(); 00345 00346 // Remove the alloca from the Allocas list, since it has been processed 00347 RemoveFromAllocasList(AllocaNum); 00348 ++NumDeadAlloca; 00349 continue; 00350 } 00351 00352 // Calculate the set of read and write-locations for each alloca. This is 00353 // analogous to finding the 'uses' and 'definitions' of each variable. 00354 Info.AnalyzeAlloca(AI); 00355 00356 // If there is only a single store to this value, replace any loads of 00357 // it that are directly dominated by the definition with the value stored. 00358 if (Info.DefiningBlocks.size() == 1) { 00359 RewriteSingleStoreAlloca(AI, Info, LBI); 00360 00361 // Finally, after the scan, check to see if the store is all that is left. 00362 if (Info.UsingBlocks.empty()) { 00363 // Remove the (now dead) store and alloca. 00364 Info.OnlyStore->eraseFromParent(); 00365 LBI.deleteValue(Info.OnlyStore); 00366 00367 if (AST) AST->deleteValue(AI); 00368 AI->eraseFromParent(); 00369 LBI.deleteValue(AI); 00370 00371 // The alloca has been processed, move on. 00372 RemoveFromAllocasList(AllocaNum); 00373 00374 ++NumSingleStore; 00375 continue; 00376 } 00377 } 00378 00379 // If the alloca is only read and written in one basic block, just perform a 00380 // linear sweep over the block to eliminate it. 00381 if (Info.OnlyUsedInOneBlock) { 00382 PromoteSingleBlockAlloca(AI, Info, LBI); 00383 00384 // Finally, after the scan, check to see if the stores are all that is 00385 // left. 00386 if (Info.UsingBlocks.empty()) { 00387 00388 // Remove the (now dead) stores and alloca. 00389 while (!AI->use_empty()) { 00390 StoreInst *SI = cast<StoreInst>(AI->use_back()); 00391 SI->eraseFromParent(); 00392 LBI.deleteValue(SI); 00393 } 00394 00395 if (AST) AST->deleteValue(AI); 00396 AI->eraseFromParent(); 00397 LBI.deleteValue(AI); 00398 00399 // The alloca has been processed, move on. 00400 RemoveFromAllocasList(AllocaNum); 00401 00402 ++NumLocalPromoted; 00403 continue; 00404 } 00405 } 00406 00407 // If we haven't computed a numbering for the BB's in the function, do so 00408 // now. 00409 if (BBNumbers.empty()) { 00410 unsigned ID = 0; 00411 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) 00412 BBNumbers[I] = ID++; 00413 } 00414 00415 // If we have an AST to keep updated, remember some pointer value that is 00416 // stored into the alloca. 00417 if (AST) 00418 PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; 00419 00420 // Keep the reverse mapping of the 'Allocas' array for the rename pass. 00421 AllocaLookup[Allocas[AllocaNum]] = AllocaNum; 00422 00423 // At this point, we're committed to promoting the alloca using IDF's, and 00424 // the standard SSA construction algorithm. Determine which blocks need PHI 00425 // nodes and see if we can optimize out some work by avoiding insertion of 00426 // dead phi nodes. 00427 DetermineInsertionPoint(AI, AllocaNum, Info); 00428 } 00429 00430 if (Allocas.empty()) 00431 return; // All of the allocas must have been trivial! 00432 00433 LBI.clear(); 00434 00435 00436 // Set the incoming values for the basic block to be null values for all of 00437 // the alloca's. We do this in case there is a load of a value that has not 00438 // been stored yet. In this case, it will get this null value. 00439 // 00440 RenamePassData::ValVector Values(Allocas.size()); 00441 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) 00442 Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); 00443 00444 // Walks all basic blocks in the function performing the SSA rename algorithm 00445 // and inserting the phi nodes we marked as necessary 00446 // 00447 std::vector<RenamePassData> RenamePassWorkList; 00448 RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); 00449 while (!RenamePassWorkList.empty()) { 00450 RenamePassData RPD; 00451 RPD.swap(RenamePassWorkList.back()); 00452 RenamePassWorkList.pop_back(); 00453 // RenamePass may add new worklist entries. 00454 RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); 00455 } 00456 00457 // The renamer uses the Visited set to avoid infinite loops. Clear it now. 00458 Visited.clear(); 00459 00460 // Remove the allocas themselves from the function. 00461 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 00462 Instruction *A = Allocas[i]; 00463 00464 // If there are any uses of the alloca instructions left, they must be in 00465 // sections of dead code that were not processed on the dominance frontier. 00466 // Just delete the users now. 00467 // 00468 if (!A->use_empty()) 00469 A->replaceAllUsesWith(UndefValue::get(A->getType())); 00470 if (AST) AST->deleteValue(A); 00471 A->eraseFromParent(); 00472 } 00473 00474 00475 // Loop over all of the PHI nodes and see if there are any that we can get 00476 // rid of because they merge all of the same incoming values. This can 00477 // happen due to undef values coming into the PHI nodes. This process is 00478 // iterative, because eliminating one PHI node can cause others to be removed. 00479 bool EliminatedAPHI = true; 00480 while (EliminatedAPHI) { 00481 EliminatedAPHI = false; 00482 00483 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 00484 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { 00485 PHINode *PN = I->second; 00486 00487 // If this PHI node merges one value and/or undefs, get the value. 00488 if (Value *V = PN->hasConstantValue(true)) { 00489 if (!isa<Instruction>(V) || 00490 properlyDominates(cast<Instruction>(V), PN)) { 00491 if (AST && isa<PointerType>(PN->getType())) 00492 AST->deleteValue(PN); 00493 PN->replaceAllUsesWith(V); 00494 PN->eraseFromParent(); 00495 NewPhiNodes.erase(I++); 00496 EliminatedAPHI = true; 00497 continue; 00498 } 00499 } 00500 ++I; 00501 } 00502 } 00503 00504 // At this point, the renamer has added entries to PHI nodes for all reachable 00505 // code. Unfortunately, there may be unreachable blocks which the renamer 00506 // hasn't traversed. If this is the case, the PHI nodes may not 00507 // have incoming values for all predecessors. Loop over all PHI nodes we have 00508 // created, inserting undef values if they are missing any incoming values. 00509 // 00510 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 00511 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { 00512 // We want to do this once per basic block. As such, only process a block 00513 // when we find the PHI that is the first entry in the block. 00514 PHINode *SomePHI = I->second; 00515 BasicBlock *BB = SomePHI->getParent(); 00516 if (&BB->front() != SomePHI) 00517 continue; 00518 00519 // Only do work here if there the PHI nodes are missing incoming values. We 00520 // know that all PHI nodes that were inserted in a block will have the same 00521 // number of incoming values, so we can just check any of them. 00522 if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) 00523 continue; 00524 00525 // Get the preds for BB. 00526 SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); 00527 00528 // Ok, now we know that all of the PHI nodes are missing entries for some 00529 // basic blocks. Start by sorting the incoming predecessors for efficient 00530 // access. 00531 std::sort(Preds.begin(), Preds.end()); 00532 00533 // Now we loop through all BB's which have entries in SomePHI and remove 00534 // them from the Preds list. 00535 for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { 00536 // Do a log(n) search of the Preds list for the entry we want. 00537 SmallVector<BasicBlock*, 16>::iterator EntIt = 00538 std::lower_bound(Preds.begin(), Preds.end(), 00539 SomePHI->getIncomingBlock(i)); 00540 assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& 00541 "PHI node has entry for a block which is not a predecessor!"); 00542 00543 // Remove the entry 00544 Preds.erase(EntIt); 00545 } 00546 00547 // At this point, the blocks left in the preds list must have dummy 00548 // entries inserted into every PHI nodes for the block. Update all the phi 00549 // nodes in this block that we are inserting (there could be phis before 00550 // mem2reg runs). 00551 unsigned NumBadPreds = SomePHI->getNumIncomingValues(); 00552 BasicBlock::iterator BBI = BB->begin(); 00553 while ((SomePHI = dyn_cast<PHINode>(BBI++)) && 00554 SomePHI->getNumIncomingValues() == NumBadPreds) { 00555 Value *UndefVal = UndefValue::get(SomePHI->getType()); 00556 for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) 00557 SomePHI->addIncoming(UndefVal, Preds[pred]); 00558 } 00559 } 00560 00561 NewPhiNodes.clear(); 00562 } 00563 00564 00565 /// ComputeLiveInBlocks - Determine which blocks the value is live in. These 00566 /// are blocks which lead to uses. Knowing this allows us to avoid inserting 00567 /// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes 00568 /// would be dead). 00569 void PromoteMem2Reg:: 00570 ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 00571 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 00572 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) { 00573 00574 // To determine liveness, we must iterate through the predecessors of blocks 00575 // where the def is live. Blocks are added to the worklist if we need to 00576 // check their predecessors. Start with all the using blocks. 00577 SmallVector<BasicBlock*, 64> LiveInBlockWorklist; 00578 LiveInBlockWorklist.insert(LiveInBlockWorklist.end(), 00579 Info.UsingBlocks.begin(), Info.UsingBlocks.end()); 00580 00581 // If any of the using blocks is also a definition block, check to see if the 00582 // definition occurs before or after the use. If it happens before the use, 00583 // the value isn't really live-in. 00584 for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { 00585 BasicBlock *BB = LiveInBlockWorklist[i]; 00586 if (!DefBlocks.count(BB)) continue; 00587 00588 // Okay, this is a block that both uses and defines the value. If the first 00589 // reference to the alloca is a def (store), then we know it isn't live-in. 00590 for (BasicBlock::iterator I = BB->begin(); ; ++I) { 00591 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 00592 if (SI->getOperand(1) != AI) continue; 00593 00594 // We found a store to the alloca before a load. The alloca is not 00595 // actually live-in here. 00596 LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); 00597 LiveInBlockWorklist.pop_back(); 00598 --i, --e; 00599 break; 00600 } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 00601 if (LI->getOperand(0) != AI) continue; 00602 00603 // Okay, we found a load before a store to the alloca. It is actually 00604 // live into this block. 00605 break; 00606 } 00607 } 00608 } 00609 00610 // Now that we have a set of blocks where the phi is live-in, recursively add 00611 // their predecessors until we find the full region the value is live. 00612 while (!LiveInBlockWorklist.empty()) { 00613 BasicBlock *BB = LiveInBlockWorklist.back(); 00614 LiveInBlockWorklist.pop_back(); 00615 00616 // The block really is live in here, insert it into the set. If already in 00617 // the set, then it has already been processed. 00618 if (!LiveInBlocks.insert(BB)) 00619 continue; 00620 00621 // Since the value is live into BB, it is either defined in a predecessor or 00622 // live into it to. Add the preds to the worklist unless they are a 00623 // defining block. 00624 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 00625 BasicBlock *P = *PI; 00626 00627 // The value is not live into a predecessor if it defines the value. 00628 if (DefBlocks.count(P)) 00629 continue; 00630 00631 // Otherwise it is, add to the worklist. 00632 LiveInBlockWorklist.push_back(P); 00633 } 00634 } 00635 } 00636 00637 /// DetermineInsertionPoint - At this point, we're committed to promoting the 00638 /// alloca using IDF's, and the standard SSA construction algorithm. Determine 00639 /// which blocks need phi nodes and see if we can optimize out some work by 00640 /// avoiding insertion of dead phi nodes. 00641 void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 00642 AllocaInfo &Info) { 00643 00644 // Unique the set of defining blocks for efficient lookup. 00645 SmallPtrSet<BasicBlock*, 32> DefBlocks; 00646 DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); 00647 00648 // Determine which blocks the value is live in. These are blocks which lead 00649 // to uses. 00650 SmallPtrSet<BasicBlock*, 32> LiveInBlocks; 00651 ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); 00652 00653 // Compute the locations where PhiNodes need to be inserted. Look at the 00654 // dominance frontier of EACH basic-block we have a write in. 00655 unsigned CurrentVersion = 0; 00656 SmallPtrSet<PHINode*, 16> InsertedPHINodes; 00657 std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks; 00658 while (!Info.DefiningBlocks.empty()) { 00659 BasicBlock *BB = Info.DefiningBlocks.back(); 00660 Info.DefiningBlocks.pop_back(); 00661 00662 // Look up the DF for this write, add it to defining blocks. 00663 DominanceFrontier::const_iterator it = DF.find(BB); 00664 if (it == DF.end()) continue; 00665 00666 const DominanceFrontier::DomSetType &S = it->second; 00667 00668 // In theory we don't need the indirection through the DFBlocks vector. 00669 // In practice, the order of calling QueuePhiNode would depend on the 00670 // (unspecified) ordering of basic blocks in the dominance frontier, 00671 // which would give PHI nodes non-determinstic subscripts. Fix this by 00672 // processing blocks in order of the occurance in the function. 00673 for (DominanceFrontier::DomSetType::const_iterator P = S.begin(), 00674 PE = S.end(); P != PE; ++P) { 00675 // If the frontier block is not in the live-in set for the alloca, don't 00676 // bother processing it. 00677 if (!LiveInBlocks.count(*P)) 00678 continue; 00679 00680 DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P)); 00681 } 00682 00683 // Sort by which the block ordering in the function. 00684 if (DFBlocks.size() > 1) 00685 std::sort(DFBlocks.begin(), DFBlocks.end()); 00686 00687 for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) { 00688 BasicBlock *BB = DFBlocks[i].second; 00689 if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes)) 00690 Info.DefiningBlocks.push_back(BB); 00691 } 00692 DFBlocks.clear(); 00693 } 00694 } 00695 00696 /// RewriteSingleStoreAlloca - If there is only a single store to this value, 00697 /// replace any loads of it that are directly dominated by the definition with 00698 /// the value stored. 00699 void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, 00700 AllocaInfo &Info, 00701 LargeBlockInfo &LBI) { 00702 StoreInst *OnlyStore = Info.OnlyStore; 00703 bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); 00704 BasicBlock *StoreBB = OnlyStore->getParent(); 00705 int StoreIndex = -1; 00706 00707 // Clear out UsingBlocks. We will reconstruct it here if needed. 00708 Info.UsingBlocks.clear(); 00709 00710 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { 00711 Instruction *UserInst = cast<Instruction>(*UI++); 00712 if (!isa<LoadInst>(UserInst)) { 00713 assert(UserInst == OnlyStore && "Should only have load/stores"); 00714 continue; 00715 } 00716 LoadInst *LI = cast<LoadInst>(UserInst); 00717 00718 // Okay, if we have a load from the alloca, we want to replace it with the 00719 // only value stored to the alloca. We can do this if the value is 00720 // dominated by the store. If not, we use the rest of the mem2reg machinery 00721 // to insert the phi nodes as needed. 00722 if (!StoringGlobalVal) { // Non-instructions are always dominated. 00723 if (LI->getParent() == StoreBB) { 00724 // If we have a use that is in the same block as the store, compare the 00725 // indices of the two instructions to see which one came first. If the 00726 // load came before the store, we can't handle it. 00727 if (StoreIndex == -1) 00728 StoreIndex = LBI.getInstructionIndex(OnlyStore); 00729 00730 if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { 00731 // Can't handle this load, bail out. 00732 Info.UsingBlocks.push_back(StoreBB); 00733 continue; 00734 } 00735 00736 } else if (LI->getParent() != StoreBB && 00737 !dominates(StoreBB, LI->getParent())) { 00738 // If the load and store are in different blocks, use BB dominance to 00739 // check their relationships. If the store doesn't dom the use, bail 00740 // out. 00741 Info.UsingBlocks.push_back(LI->getParent()); 00742 continue; 00743 } 00744 } 00745 00746 // Otherwise, we *can* safely rewrite this load. 00747 LI->replaceAllUsesWith(OnlyStore->getOperand(0)); 00748 if (AST && isa<PointerType>(LI->getType())) 00749 AST->deleteValue(LI); 00750 LI->eraseFromParent(); 00751 LBI.deleteValue(LI); 00752 } 00753 } 00754 00755 00756 /// StoreIndexSearchPredicate - This is a helper predicate used to search by the 00757 /// first element of a pair. 00758 struct StoreIndexSearchPredicate { 00759 bool operator()(const std::pair<unsigned, StoreInst*> &LHS, 00760 const std::pair<unsigned, StoreInst*> &RHS) { 00761 return LHS.first < RHS.first; 00762 } 00763 }; 00764 00765 /// PromoteSingleBlockAlloca - Many allocas are only used within a single basic 00766 /// block. If this is the case, avoid traversing the CFG and inserting a lot of 00767 /// potentially useless PHI nodes by just performing a single linear pass over 00768 /// the basic block using the Alloca. 00769 /// 00770 /// If we cannot promote this alloca (because it is read before it is written), 00771 /// return true. This is necessary in cases where, due to control flow, the 00772 /// alloca is potentially undefined on some control flow paths. e.g. code like 00773 /// this is potentially correct: 00774 /// 00775 /// for (...) { if (c) { A = undef; undef = B; } } 00776 /// 00777 /// ... so long as A is not used before undef is set. 00778 /// 00779 void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 00780 LargeBlockInfo &LBI) { 00781 // The trickiest case to handle is when we have large blocks. Because of this, 00782 // this code is optimized assuming that large blocks happen. This does not 00783 // significantly pessimize the small block case. This uses LargeBlockInfo to 00784 // make it efficient to get the index of various operations in the block. 00785 00786 // Clear out UsingBlocks. We will reconstruct it here if needed. 00787 Info.UsingBlocks.clear(); 00788 00789 // Walk the use-def list of the alloca, getting the locations of all stores. 00790 typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy; 00791 StoresByIndexTy StoresByIndex; 00792 00793 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 00794 UI != E; ++UI) 00795 if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) 00796 StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); 00797 00798 // If there are no stores to the alloca, just replace any loads with undef. 00799 if (StoresByIndex.empty()) { 00800 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) 00801 if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) { 00802 LI->replaceAllUsesWith(UndefValue::get(LI->getType())); 00803 if (AST && isa<PointerType>(LI->getType())) 00804 AST->deleteValue(LI); 00805 LBI.deleteValue(LI); 00806 LI->eraseFromParent(); 00807 } 00808 return; 00809 } 00810 00811 // Sort the stores by their index, making it efficient to do a lookup with a 00812 // binary search. 00813 std::sort(StoresByIndex.begin(), StoresByIndex.end()); 00814 00815 // Walk all of the loads from this alloca, replacing them with the nearest 00816 // store above them, if any. 00817 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { 00818 LoadInst *LI = dyn_cast<LoadInst>(*UI++); 00819 if (!LI) continue; 00820 00821 unsigned LoadIdx = LBI.getInstructionIndex(LI); 00822 00823 // Find the nearest store that has a lower than this load. 00824 StoresByIndexTy::iterator I = 00825 std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), 00826 std::pair<unsigned, StoreInst*>(LoadIdx, 0), 00827 StoreIndexSearchPredicate()); 00828 00829 // If there is no store before this load, then we can't promote this load. 00830 if (I == StoresByIndex.begin()) { 00831 // Can't handle this load, bail out. 00832 Info.UsingBlocks.push_back(LI->getParent()); 00833 continue; 00834 } 00835 00836 // Otherwise, there was a store before this load, the load takes its value. 00837 --I; 00838 LI->replaceAllUsesWith(I->second->getOperand(0)); 00839 if (AST && isa<PointerType>(LI->getType())) 00840 AST->deleteValue(LI); 00841 LI->eraseFromParent(); 00842 LBI.deleteValue(LI); 00843 } 00844 } 00845 00846 00847 // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific 00848 // Alloca returns true if there wasn't already a phi-node for that variable 00849 // 00850 bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, 00851 unsigned &Version, 00852 SmallPtrSet<PHINode*, 16> &InsertedPHINodes) { 00853 // Look up the basic-block in question. 00854 PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; 00855 00856 // If the BB already has a phi node added for the i'th alloca then we're done! 00857 if (PN) return false; 00858 00859 // Create a PhiNode using the dereferenced type... and add the phi-node to the 00860 // BasicBlock. 00861 PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), 00862 Allocas[AllocaNo]->getName() + "." + 00863 utostr(Version++), BB->begin()); 00864 ++NumPHIInsert; 00865 PhiToAllocaMap[PN] = AllocaNo; 00866 PN->reserveOperandSpace(getNumPreds(BB)); 00867 00868 InsertedPHINodes.insert(PN); 00869 00870 if (AST && isa<PointerType>(PN->getType())) 00871 AST->copyValue(PointerAllocaValues[AllocaNo], PN); 00872 00873 return true; 00874 } 00875 00876 // RenamePass - Recursively traverse the CFG of the function, renaming loads and 00877 // stores to the allocas which we are promoting. IncomingVals indicates what 00878 // value each Alloca contains on exit from the predecessor block Pred. 00879 // 00880 void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, 00881 RenamePassData::ValVector &IncomingVals, 00882 std::vector<RenamePassData> &Worklist) { 00883 NextIteration: 00884 // If we are inserting any phi nodes into this BB, they will already be in the 00885 // block. 00886 if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { 00887 // Pred may have multiple edges to BB. If so, we want to add N incoming 00888 // values to each PHI we are inserting on the first time we see the edge. 00889 // Check to see if APN already has incoming values from Pred. This also 00890 // prevents us from modifying PHI nodes that are not currently being 00891 // inserted. 00892 bool HasPredEntries = false; 00893 for (unsigned i = 0, e = APN->getNumIncomingValues(); i != e; ++i) { 00894 if (APN->getIncomingBlock(i) == Pred) { 00895 HasPredEntries = true; 00896 break; 00897 } 00898 } 00899 00900 // If we have PHI nodes to update, compute the number of edges from Pred to 00901 // BB. 00902 if (!HasPredEntries) { 00903 // We want to be able to distinguish between PHI nodes being inserted by 00904 // this invocation of mem2reg from those phi nodes that already existed in 00905 // the IR before mem2reg was run. We determine that APN is being inserted 00906 // because it is missing incoming edges. All other PHI nodes being 00907 // inserted by this pass of mem2reg will have the same number of incoming 00908 // operands so far. Remember this count. 00909 unsigned NewPHINumOperands = APN->getNumOperands(); 00910 00911 unsigned NumEdges = 0; 00912 for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I) 00913 if (*I == BB) 00914 ++NumEdges; 00915 assert(NumEdges && "Must be at least one edge from Pred to BB!"); 00916 00917 // Add entries for all the phis. 00918 BasicBlock::iterator PNI = BB->begin(); 00919 do { 00920 unsigned AllocaNo = PhiToAllocaMap[APN]; 00921 00922 // Add N incoming values to the PHI node. 00923 for (unsigned i = 0; i != NumEdges; ++i) 00924 APN->addIncoming(IncomingVals[AllocaNo], Pred); 00925 00926 // The currently active variable for this block is now the PHI. 00927 IncomingVals[AllocaNo] = APN; 00928 00929 // Get the next phi node. 00930 ++PNI; 00931 APN = dyn_cast<PHINode>(PNI); 00932 if (APN == 0) break; 00933 00934 // Verify that it is missing entries. If not, it is not being inserted 00935 // by this mem2reg invocation so we want to ignore it. 00936 } while (APN->getNumOperands() == NewPHINumOperands); 00937 } 00938 } 00939 00940 // Don't revisit blocks. 00941 if (!Visited.insert(BB)) return; 00942 00943 for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) { 00944 Instruction *I = II++; // get the instruction, increment iterator 00945 00946 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 00947 AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); 00948 if (!Src) continue; 00949 00950 std::map<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); 00951 if (AI == AllocaLookup.end()) continue; 00952 00953 Value *V = IncomingVals[AI->second]; 00954 00955 // Anything using the load now uses the current value. 00956 LI->replaceAllUsesWith(V); 00957 if (AST && isa<PointerType>(LI->getType())) 00958 AST->deleteValue(LI); 00959 BB->getInstList().erase(LI); 00960 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 00961 // Delete this instruction and mark the name as the current holder of the 00962 // value 00963 AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); 00964 if (!Dest) continue; 00965 00966 std::map<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); 00967 if (ai == AllocaLookup.end()) 00968 continue; 00969 00970 // what value were we writing? 00971 IncomingVals[ai->second] = SI->getOperand(0); 00972 BB->getInstList().erase(SI); 00973 } 00974 } 00975 00976 // 'Recurse' t