LLVM API Documentation

PromoteMemoryToRegister.cpp

Go to the documentation of this file.
00001 //===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file promotes memory references to be register references.  It promotes
00011 // alloca instructions which only have loads and stores as uses.  An alloca is
00012 // transformed by using dominator frontiers to place PHI nodes, then traversing
00013 // the function in depth-first order to rewrite loads and stores as appropriate.
00014 // This is just the standard SSA construction algorithm to construct "pruned"
00015 // SSA form.
00016 //
00017 //===----------------------------------------------------------------------===//
00018 
00019 #define DEBUG_TYPE "mem2reg"
00020 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
00021 #include "llvm/Constants.h"
00022 #include "llvm/DerivedTypes.h"
00023 #include "llvm/Function.h"
00024 #include "llvm/Instructions.h"
00025 #include "llvm/IntrinsicInst.h"
00026 #include "llvm/Analysis/Dominators.h"
00027 #include "llvm/Analysis/AliasSetTracker.h"
00028 #include "llvm/ADT/DenseMap.h"
00029 #include "llvm/ADT/SmallPtrSet.h"
00030 #include "llvm/ADT/SmallVector.h"
00031 #include "llvm/ADT/Statistic.h"
00032 #include "llvm/ADT/StringExtras.h"
00033 #include "llvm/ADT/STLExtras.h"
00034 #include "llvm/Support/CFG.h"
00035 #include "llvm/Support/Compiler.h"
00036 #include <algorithm>
00037 using namespace llvm;
00038 
00039 STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
00040 STATISTIC(NumSingleStore,   "Number of alloca's promoted with a single store");
00041 STATISTIC(NumDeadAlloca,    "Number of dead alloca's removed");
00042 STATISTIC(NumPHIInsert,     "Number of PHI nodes inserted");
00043 
00044 // Provide DenseMapInfo for all pointers.
00045 namespace llvm {
00046 template<>
00047 struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
00048   typedef std::pair<BasicBlock*, unsigned> EltTy;
00049   static inline EltTy getEmptyKey() {
00050     return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U);
00051   }
00052   static inline EltTy getTombstoneKey() {
00053     return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U);
00054   }
00055   static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) {
00056     return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2;
00057   }
00058   static bool isEqual(const EltTy &LHS, const EltTy &RHS) {
00059     return LHS == RHS;
00060   }
00061   static bool isPod() { return true; }
00062 };
00063 }
00064 
00065 /// isAllocaPromotable - Return true if this alloca is legal for promotion.
00066 /// This is true if there are only loads and stores to the alloca.
00067 ///
00068 bool llvm::isAllocaPromotable(const AllocaInst *AI) {
00069   // FIXME: If the memory unit is of pointer or integer type, we can permit
00070   // assignments to subsections of the memory unit.
00071 
00072   // Only allow direct and non-volatile loads and stores...
00073   for (Value::use_const_iterator UI = AI->use_begin(), UE = AI->use_end();
00074        UI != UE; ++UI)     // Loop over all of the uses of the alloca
00075     if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
00076       if (LI->isVolatile())
00077         return false;
00078     } else if (const StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
00079       if (SI->getOperand(0) == AI)
00080         return false;   // Don't allow a store OF the AI, only INTO the AI.
00081       if (SI->isVolatile())
00082         return false;
00083     } else if (const BitCastInst *BC = dyn_cast<BitCastInst>(*UI)) {
00084       // Uses by dbg info shouldn't inhibit promotion.
00085       if (!BC->hasOneUse() || !isa<DbgInfoIntrinsic>(*BC->use_begin()))
00086         return false;
00087     } else {
00088       return false;
00089     }
00090 
00091   return true;
00092 }
00093 
00094 namespace {
00095   struct AllocaInfo;
00096 
00097   // Data package used by RenamePass()
00098   class VISIBILITY_HIDDEN RenamePassData {
00099   public:
00100     typedef std::vector<Value *> ValVector;
00101     
00102     RenamePassData() {}
00103     RenamePassData(BasicBlock *B, BasicBlock *P,
00104                    const ValVector &V) : BB(B), Pred(P), Values(V) {}
00105     BasicBlock *BB;
00106     BasicBlock *Pred;
00107     ValVector Values;
00108     
00109     void swap(RenamePassData &RHS) {
00110       std::swap(BB, RHS.BB);
00111       std::swap(Pred, RHS.Pred);
00112       Values.swap(RHS.Values);
00113     }
00114   };
00115   
00116   /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of
00117   /// load/store instructions in the block that directly load or store an alloca.
00118   ///
00119   /// This functionality is important because it avoids scanning large basic
00120   /// blocks multiple times when promoting many allocas in the same block.
00121   class VISIBILITY_HIDDEN LargeBlockInfo {
00122     /// InstNumbers - For each instruction that we track, keep the index of the
00123     /// instruction.  The index starts out as the number of the instruction from
00124     /// the start of the block.
00125     DenseMap<const Instruction *, unsigned> InstNumbers;
00126   public:
00127     
00128     /// isInterestingInstruction - This code only looks at accesses to allocas.
00129     static bool isInterestingInstruction(const Instruction *I) {
00130       return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
00131              (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
00132     }
00133     
00134     /// getInstructionIndex - Get or calculate the index of the specified
00135     /// instruction.
00136     unsigned getInstructionIndex(const Instruction *I) {
00137       assert(isInterestingInstruction(I) &&
00138              "Not a load/store to/from an alloca?");
00139       
00140       // If we already have this instruction number, return it.
00141       DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
00142       if (It != InstNumbers.end()) return It->second;
00143       
00144       // Scan the whole block to get the instruction.  This accumulates
00145       // information for every interesting instruction in the block, in order to
00146       // avoid gratuitus rescans.
00147       const BasicBlock *BB = I->getParent();
00148       unsigned InstNo = 0;
00149       for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end();
00150            BBI != E; ++BBI)
00151         if (isInterestingInstruction(BBI))
00152           InstNumbers[BBI] = InstNo++;
00153       It = InstNumbers.find(I);
00154       
00155       assert(It != InstNumbers.end() && "Didn't insert instruction?");
00156       return It->second;
00157     }
00158     
00159     void deleteValue(const Instruction *I) {
00160       InstNumbers.erase(I);
00161     }
00162     
00163     void clear() {
00164       InstNumbers.clear();
00165     }
00166   };
00167 
00168   struct VISIBILITY_HIDDEN PromoteMem2Reg {
00169     /// Allocas - The alloca instructions being promoted.
00170     ///
00171     std::vector<AllocaInst*> Allocas;
00172     DominatorTree &DT;
00173     DominanceFrontier &DF;
00174 
00175     /// AST - An AliasSetTracker object to update.  If null, don't update it.
00176     ///
00177     AliasSetTracker *AST;
00178 
00179     /// AllocaLookup - Reverse mapping of Allocas.
00180     ///
00181     std::map<AllocaInst*, unsigned>  AllocaLookup;
00182 
00183     /// NewPhiNodes - The PhiNodes we're adding.
00184     ///
00185     DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes;
00186     
00187     /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas
00188     /// it corresponds to.
00189     DenseMap<PHINode*, unsigned> PhiToAllocaMap;
00190     
00191     /// PointerAllocaValues - If we are updating an AliasSetTracker, then for
00192     /// each alloca that is of pointer type, we keep track of what to copyValue
00193     /// to the inserted PHI nodes here.
00194     ///
00195     std::vector<Value*> PointerAllocaValues;
00196 
00197     /// Visited - The set of basic blocks the renamer has already visited.
00198     ///
00199     SmallPtrSet<BasicBlock*, 16> Visited;
00200 
00201     /// BBNumbers - Contains a stable numbering of basic blocks to avoid
00202     /// non-determinstic behavior.
00203     DenseMap<BasicBlock*, unsigned> BBNumbers;
00204 
00205     /// BBNumPreds - Lazily compute the number of predecessors a block has.
00206     DenseMap<const BasicBlock*, unsigned> BBNumPreds;
00207   public:
00208     PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
00209                    DominanceFrontier &df, AliasSetTracker *ast)
00210       : Allocas(A), DT(dt), DF(df), AST(ast) {}
00211 
00212     void run();
00213 
00214     /// properlyDominates - Return true if I1 properly dominates I2.
00215     ///
00216     bool properlyDominates(Instruction *I1, Instruction *I2) const {
00217       if (InvokeInst *II = dyn_cast<InvokeInst>(I1))
00218         I1 = II->getNormalDest()->begin();
00219       return DT.properlyDominates(I1->getParent(), I2->getParent());
00220     }
00221     
00222     /// dominates - Return true if BB1 dominates BB2 using the DominatorTree.
00223     ///
00224     bool dominates(BasicBlock *BB1, BasicBlock *BB2) const {
00225       return DT.dominates(BB1, BB2);
00226     }
00227 
00228   private:
00229     void RemoveFromAllocasList(unsigned &AllocaIdx) {
00230       Allocas[AllocaIdx] = Allocas.back();
00231       Allocas.pop_back();
00232       --AllocaIdx;
00233     }
00234 
00235     unsigned getNumPreds(const BasicBlock *BB) {
00236       unsigned &NP = BBNumPreds[BB];
00237       if (NP == 0)
00238         NP = std::distance(pred_begin(BB), pred_end(BB))+1;
00239       return NP-1;
00240     }
00241 
00242     void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
00243                                  AllocaInfo &Info);
00244     void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 
00245                              const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
00246                              SmallPtrSet<BasicBlock*, 32> &LiveInBlocks);
00247     
00248     void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
00249                                   LargeBlockInfo &LBI);
00250     void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
00251                                   LargeBlockInfo &LBI);
00252 
00253     
00254     void RenamePass(BasicBlock *BB, BasicBlock *Pred,
00255                     RenamePassData::ValVector &IncVals,
00256                     std::vector<RenamePassData> &Worklist);
00257     bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version,
00258                       SmallPtrSet<PHINode*, 16> &InsertedPHINodes);
00259   };
00260   
00261   struct AllocaInfo {
00262     std::vector<BasicBlock*> DefiningBlocks;
00263     std::vector<BasicBlock*> UsingBlocks;
00264     
00265     StoreInst  *OnlyStore;
00266     BasicBlock *OnlyBlock;
00267     bool OnlyUsedInOneBlock;
00268     
00269     Value *AllocaPointerVal;
00270     
00271     void clear() {
00272       DefiningBlocks.clear();
00273       UsingBlocks.clear();
00274       OnlyStore = 0;
00275       OnlyBlock = 0;
00276       OnlyUsedInOneBlock = true;
00277       AllocaPointerVal = 0;
00278     }
00279     
00280     /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our
00281     /// ivars.
00282     void AnalyzeAlloca(AllocaInst *AI) {
00283       clear();
00284 
00285       // As we scan the uses of the alloca instruction, keep track of stores,
00286       // and decide whether all of the loads and stores to the alloca are within
00287       // the same basic block.
00288       for (Value::use_iterator U = AI->use_begin(), E = AI->use_end();
00289            U != E;)  {
00290         Instruction *User = cast<Instruction>(*U);
00291         ++U;
00292         if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
00293           // Remove any uses of this alloca in DbgInfoInstrinsics.
00294           assert(BC->hasOneUse() && "Unexpected alloca uses!");
00295           DbgInfoIntrinsic *DI = cast<DbgInfoIntrinsic>(*BC->use_begin());
00296           DI->eraseFromParent();
00297           BC->eraseFromParent();
00298           continue;
00299         } 
00300         else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
00301           // Remember the basic blocks which define new values for the alloca
00302           DefiningBlocks.push_back(SI->getParent());
00303           AllocaPointerVal = SI->getOperand(0);
00304           OnlyStore = SI;
00305         } else {
00306           LoadInst *LI = cast<LoadInst>(User);
00307           // Otherwise it must be a load instruction, keep track of variable
00308           // reads.
00309           UsingBlocks.push_back(LI->getParent());
00310           AllocaPointerVal = LI;
00311         }
00312         
00313         if (OnlyUsedInOneBlock) {
00314           if (OnlyBlock == 0)
00315             OnlyBlock = User->getParent();
00316           else if (OnlyBlock != User->getParent())
00317             OnlyUsedInOneBlock = false;
00318         }
00319       }
00320     }
00321   };
00322 }  // end of anonymous namespace
00323 
00324 
00325 void PromoteMem2Reg::run() {
00326   Function &F = *DF.getRoot()->getParent();
00327 
00328   if (AST) PointerAllocaValues.resize(Allocas.size());
00329 
00330   AllocaInfo Info;
00331   LargeBlockInfo LBI;
00332 
00333   for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
00334     AllocaInst *AI = Allocas[AllocaNum];
00335 
00336     assert(isAllocaPromotable(AI) &&
00337            "Cannot promote non-promotable alloca!");
00338     assert(AI->getParent()->getParent() == &F &&
00339            "All allocas should be in the same function, which is same as DF!");
00340 
00341     if (AI->use_empty()) {
00342       // If there are no uses of the alloca, just delete it now.
00343       if (AST) AST->deleteValue(AI);
00344       AI->eraseFromParent();
00345 
00346       // Remove the alloca from the Allocas list, since it has been processed
00347       RemoveFromAllocasList(AllocaNum);
00348       ++NumDeadAlloca;
00349       continue;
00350     }
00351     
00352     // Calculate the set of read and write-locations for each alloca.  This is
00353     // analogous to finding the 'uses' and 'definitions' of each variable.
00354     Info.AnalyzeAlloca(AI);
00355 
00356     // If there is only a single store to this value, replace any loads of
00357     // it that are directly dominated by the definition with the value stored.
00358     if (Info.DefiningBlocks.size() == 1) {
00359       RewriteSingleStoreAlloca(AI, Info, LBI);
00360 
00361       // Finally, after the scan, check to see if the store is all that is left.
00362       if (Info.UsingBlocks.empty()) {
00363         // Remove the (now dead) store and alloca.
00364         Info.OnlyStore->eraseFromParent();
00365         LBI.deleteValue(Info.OnlyStore);
00366 
00367         if (AST) AST->deleteValue(AI);
00368         AI->eraseFromParent();
00369         LBI.deleteValue(AI);
00370         
00371         // The alloca has been processed, move on.
00372         RemoveFromAllocasList(AllocaNum);
00373         
00374         ++NumSingleStore;
00375         continue;
00376       }
00377     }
00378     
00379     // If the alloca is only read and written in one basic block, just perform a
00380     // linear sweep over the block to eliminate it.
00381     if (Info.OnlyUsedInOneBlock) {
00382       PromoteSingleBlockAlloca(AI, Info, LBI);
00383       
00384       // Finally, after the scan, check to see if the stores are all that is
00385       // left.
00386       if (Info.UsingBlocks.empty()) {
00387         
00388         // Remove the (now dead) stores and alloca.
00389         while (!AI->use_empty()) {
00390           StoreInst *SI = cast<StoreInst>(AI->use_back());
00391           SI->eraseFromParent();
00392           LBI.deleteValue(SI);
00393         }
00394         
00395         if (AST) AST->deleteValue(AI);
00396         AI->eraseFromParent();
00397         LBI.deleteValue(AI);
00398         
00399         // The alloca has been processed, move on.
00400         RemoveFromAllocasList(AllocaNum);
00401         
00402         ++NumLocalPromoted;
00403         continue;
00404       }
00405     }
00406     
00407     // If we haven't computed a numbering for the BB's in the function, do so
00408     // now.
00409     if (BBNumbers.empty()) {
00410       unsigned ID = 0;
00411       for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
00412         BBNumbers[I] = ID++;
00413     }
00414 
00415     // If we have an AST to keep updated, remember some pointer value that is
00416     // stored into the alloca.
00417     if (AST)
00418       PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
00419     
00420     // Keep the reverse mapping of the 'Allocas' array for the rename pass.
00421     AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
00422 
00423     // At this point, we're committed to promoting the alloca using IDF's, and
00424     // the standard SSA construction algorithm.  Determine which blocks need PHI
00425     // nodes and see if we can optimize out some work by avoiding insertion of
00426     // dead phi nodes.
00427     DetermineInsertionPoint(AI, AllocaNum, Info);
00428   }
00429 
00430   if (Allocas.empty())
00431     return; // All of the allocas must have been trivial!
00432 
00433   LBI.clear();
00434   
00435   
00436   // Set the incoming values for the basic block to be null values for all of
00437   // the alloca's.  We do this in case there is a load of a value that has not
00438   // been stored yet.  In this case, it will get this null value.
00439   //
00440   RenamePassData::ValVector Values(Allocas.size());
00441   for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
00442     Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
00443 
00444   // Walks all basic blocks in the function performing the SSA rename algorithm
00445   // and inserting the phi nodes we marked as necessary
00446   //
00447   std::vector<RenamePassData> RenamePassWorkList;
00448   RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
00449   while (!RenamePassWorkList.empty()) {
00450     RenamePassData RPD;
00451     RPD.swap(RenamePassWorkList.back());
00452     RenamePassWorkList.pop_back();
00453     // RenamePass may add new worklist entries.
00454     RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
00455   }
00456   
00457   // The renamer uses the Visited set to avoid infinite loops.  Clear it now.
00458   Visited.clear();
00459 
00460   // Remove the allocas themselves from the function.
00461   for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
00462     Instruction *A = Allocas[i];
00463 
00464     // If there are any uses of the alloca instructions left, they must be in
00465     // sections of dead code that were not processed on the dominance frontier.
00466     // Just delete the users now.
00467     //
00468     if (!A->use_empty())
00469       A->replaceAllUsesWith(UndefValue::get(A->getType()));
00470     if (AST) AST->deleteValue(A);
00471     A->eraseFromParent();
00472   }
00473 
00474   
00475   // Loop over all of the PHI nodes and see if there are any that we can get
00476   // rid of because they merge all of the same incoming values.  This can
00477   // happen due to undef values coming into the PHI nodes.  This process is
00478   // iterative, because eliminating one PHI node can cause others to be removed.
00479   bool EliminatedAPHI = true;
00480   while (EliminatedAPHI) {
00481     EliminatedAPHI = false;
00482     
00483     for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
00484            NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
00485       PHINode *PN = I->second;
00486       
00487       // If this PHI node merges one value and/or undefs, get the value.
00488       if (Value *V = PN->hasConstantValue(true)) {
00489         if (!isa<Instruction>(V) ||
00490             properlyDominates(cast<Instruction>(V), PN)) {
00491           if (AST && isa<PointerType>(PN->getType()))
00492             AST->deleteValue(PN);
00493           PN->replaceAllUsesWith(V);
00494           PN->eraseFromParent();
00495           NewPhiNodes.erase(I++);
00496           EliminatedAPHI = true;
00497           continue;
00498         }
00499       }
00500       ++I;
00501     }
00502   }
00503   
00504   // At this point, the renamer has added entries to PHI nodes for all reachable
00505   // code.  Unfortunately, there may be unreachable blocks which the renamer
00506   // hasn't traversed.  If this is the case, the PHI nodes may not
00507   // have incoming values for all predecessors.  Loop over all PHI nodes we have
00508   // created, inserting undef values if they are missing any incoming values.
00509   //
00510   for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
00511          NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) {
00512     // We want to do this once per basic block.  As such, only process a block
00513     // when we find the PHI that is the first entry in the block.
00514     PHINode *SomePHI = I->second;
00515     BasicBlock *BB = SomePHI->getParent();
00516     if (&BB->front() != SomePHI)
00517       continue;
00518 
00519     // Only do work here if there the PHI nodes are missing incoming values.  We
00520     // know that all PHI nodes that were inserted in a block will have the same
00521     // number of incoming values, so we can just check any of them.
00522     if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
00523       continue;
00524 
00525     // Get the preds for BB.
00526     SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
00527     
00528     // Ok, now we know that all of the PHI nodes are missing entries for some
00529     // basic blocks.  Start by sorting the incoming predecessors for efficient
00530     // access.
00531     std::sort(Preds.begin(), Preds.end());
00532     
00533     // Now we loop through all BB's which have entries in SomePHI and remove
00534     // them from the Preds list.
00535     for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
00536       // Do a log(n) search of the Preds list for the entry we want.
00537       SmallVector<BasicBlock*, 16>::iterator EntIt =
00538         std::lower_bound(Preds.begin(), Preds.end(),
00539                          SomePHI->getIncomingBlock(i));
00540       assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&&
00541              "PHI node has entry for a block which is not a predecessor!");
00542 
00543       // Remove the entry
00544       Preds.erase(EntIt);
00545     }
00546 
00547     // At this point, the blocks left in the preds list must have dummy
00548     // entries inserted into every PHI nodes for the block.  Update all the phi
00549     // nodes in this block that we are inserting (there could be phis before
00550     // mem2reg runs).
00551     unsigned NumBadPreds = SomePHI->getNumIncomingValues();
00552     BasicBlock::iterator BBI = BB->begin();
00553     while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
00554            SomePHI->getNumIncomingValues() == NumBadPreds) {
00555       Value *UndefVal = UndefValue::get(SomePHI->getType());
00556       for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
00557         SomePHI->addIncoming(UndefVal, Preds[pred]);
00558     }
00559   }
00560         
00561   NewPhiNodes.clear();
00562 }
00563 
00564 
00565 /// ComputeLiveInBlocks - Determine which blocks the value is live in.  These
00566 /// are blocks which lead to uses.  Knowing this allows us to avoid inserting
00567 /// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes
00568 /// would be dead).
00569 void PromoteMem2Reg::
00570 ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 
00571                     const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
00572                     SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) {
00573   
00574   // To determine liveness, we must iterate through the predecessors of blocks
00575   // where the def is live.  Blocks are added to the worklist if we need to
00576   // check their predecessors.  Start with all the using blocks.
00577   SmallVector<BasicBlock*, 64> LiveInBlockWorklist;
00578   LiveInBlockWorklist.insert(LiveInBlockWorklist.end(), 
00579                              Info.UsingBlocks.begin(), Info.UsingBlocks.end());
00580   
00581   // If any of the using blocks is also a definition block, check to see if the
00582   // definition occurs before or after the use.  If it happens before the use,
00583   // the value isn't really live-in.
00584   for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
00585     BasicBlock *BB = LiveInBlockWorklist[i];
00586     if (!DefBlocks.count(BB)) continue;
00587     
00588     // Okay, this is a block that both uses and defines the value.  If the first
00589     // reference to the alloca is a def (store), then we know it isn't live-in.
00590     for (BasicBlock::iterator I = BB->begin(); ; ++I) {
00591       if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
00592         if (SI->getOperand(1) != AI) continue;
00593         
00594         // We found a store to the alloca before a load.  The alloca is not
00595         // actually live-in here.
00596         LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
00597         LiveInBlockWorklist.pop_back();
00598         --i, --e;
00599         break;
00600       } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
00601         if (LI->getOperand(0) != AI) continue;
00602         
00603         // Okay, we found a load before a store to the alloca.  It is actually
00604         // live into this block.
00605         break;
00606       }
00607     }
00608   }
00609   
00610   // Now that we have a set of blocks where the phi is live-in, recursively add
00611   // their predecessors until we find the full region the value is live.
00612   while (!LiveInBlockWorklist.empty()) {
00613     BasicBlock *BB = LiveInBlockWorklist.back();
00614     LiveInBlockWorklist.pop_back();
00615     
00616     // The block really is live in here, insert it into the set.  If already in
00617     // the set, then it has already been processed.
00618     if (!LiveInBlocks.insert(BB))
00619       continue;
00620     
00621     // Since the value is live into BB, it is either defined in a predecessor or
00622     // live into it to.  Add the preds to the worklist unless they are a
00623     // defining block.
00624     for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
00625       BasicBlock *P = *PI;
00626       
00627       // The value is not live into a predecessor if it defines the value.
00628       if (DefBlocks.count(P))
00629         continue;
00630       
00631       // Otherwise it is, add to the worklist.
00632       LiveInBlockWorklist.push_back(P);
00633     }
00634   }
00635 }
00636 
00637 /// DetermineInsertionPoint - At this point, we're committed to promoting the
00638 /// alloca using IDF's, and the standard SSA construction algorithm.  Determine
00639 /// which blocks need phi nodes and see if we can optimize out some work by
00640 /// avoiding insertion of dead phi nodes.
00641 void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
00642                                              AllocaInfo &Info) {
00643 
00644   // Unique the set of defining blocks for efficient lookup.
00645   SmallPtrSet<BasicBlock*, 32> DefBlocks;
00646   DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
00647 
00648   // Determine which blocks the value is live in.  These are blocks which lead
00649   // to uses.
00650   SmallPtrSet<BasicBlock*, 32> LiveInBlocks;
00651   ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
00652 
00653   // Compute the locations where PhiNodes need to be inserted.  Look at the
00654   // dominance frontier of EACH basic-block we have a write in.
00655   unsigned CurrentVersion = 0;
00656   SmallPtrSet<PHINode*, 16> InsertedPHINodes;
00657   std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks;
00658   while (!Info.DefiningBlocks.empty()) {
00659     BasicBlock *BB = Info.DefiningBlocks.back();
00660     Info.DefiningBlocks.pop_back();
00661     
00662     // Look up the DF for this write, add it to defining blocks.
00663     DominanceFrontier::const_iterator it = DF.find(BB);
00664     if (it == DF.end()) continue;
00665     
00666     const DominanceFrontier::DomSetType &S = it->second;
00667     
00668     // In theory we don't need the indirection through the DFBlocks vector.
00669     // In practice, the order of calling QueuePhiNode would depend on the
00670     // (unspecified) ordering of basic blocks in the dominance frontier,
00671     // which would give PHI nodes non-determinstic subscripts.  Fix this by
00672     // processing blocks in order of the occurance in the function.
00673     for (DominanceFrontier::DomSetType::const_iterator P = S.begin(),
00674          PE = S.end(); P != PE; ++P) {
00675       // If the frontier block is not in the live-in set for the alloca, don't
00676       // bother processing it.
00677       if (!LiveInBlocks.count(*P))
00678         continue;
00679       
00680       DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P));
00681     }
00682     
00683     // Sort by which the block ordering in the function.
00684     if (DFBlocks.size() > 1)
00685       std::sort(DFBlocks.begin(), DFBlocks.end());
00686     
00687     for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) {
00688       BasicBlock *BB = DFBlocks[i].second;
00689       if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes))
00690         Info.DefiningBlocks.push_back(BB);
00691     }
00692     DFBlocks.clear();
00693   }
00694 }
00695 
00696 /// RewriteSingleStoreAlloca - If there is only a single store to this value,
00697 /// replace any loads of it that are directly dominated by the definition with
00698 /// the value stored.
00699 void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
00700                                               AllocaInfo &Info,
00701                                               LargeBlockInfo &LBI) {
00702   StoreInst *OnlyStore = Info.OnlyStore;
00703   bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
00704   BasicBlock *StoreBB = OnlyStore->getParent();
00705   int StoreIndex = -1;
00706 
00707   // Clear out UsingBlocks.  We will reconstruct it here if needed.
00708   Info.UsingBlocks.clear();
00709   
00710   for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) {
00711     Instruction *UserInst = cast<Instruction>(*UI++);
00712     if (!isa<LoadInst>(UserInst)) {
00713       assert(UserInst == OnlyStore && "Should only have load/stores");
00714       continue;
00715     }
00716     LoadInst *LI = cast<LoadInst>(UserInst);
00717     
00718     // Okay, if we have a load from the alloca, we want to replace it with the
00719     // only value stored to the alloca.  We can do this if the value is
00720     // dominated by the store.  If not, we use the rest of the mem2reg machinery
00721     // to insert the phi nodes as needed.
00722     if (!StoringGlobalVal) {  // Non-instructions are always dominated.
00723       if (LI->getParent() == StoreBB) {
00724         // If we have a use that is in the same block as the store, compare the
00725         // indices of the two instructions to see which one came first.  If the
00726         // load came before the store, we can't handle it.
00727         if (StoreIndex == -1)
00728           StoreIndex = LBI.getInstructionIndex(OnlyStore);
00729 
00730         if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
00731           // Can't handle this load, bail out.
00732           Info.UsingBlocks.push_back(StoreBB);
00733           continue;
00734         }
00735         
00736       } else if (LI->getParent() != StoreBB &&
00737                  !dominates(StoreBB, LI->getParent())) {
00738         // If the load and store are in different blocks, use BB dominance to
00739         // check their relationships.  If the store doesn't dom the use, bail
00740         // out.
00741         Info.UsingBlocks.push_back(LI->getParent());
00742         continue;
00743       }
00744     }
00745     
00746     // Otherwise, we *can* safely rewrite this load.
00747     LI->replaceAllUsesWith(OnlyStore->getOperand(0));
00748     if (AST && isa<PointerType>(LI->getType()))
00749       AST->deleteValue(LI);
00750     LI->eraseFromParent();
00751     LBI.deleteValue(LI);
00752   }
00753 }
00754 
00755 
00756 /// StoreIndexSearchPredicate - This is a helper predicate used to search by the
00757 /// first element of a pair.
00758 struct StoreIndexSearchPredicate {
00759   bool operator()(const std::pair<unsigned, StoreInst*> &LHS,
00760                   const std::pair<unsigned, StoreInst*> &RHS) {
00761     return LHS.first < RHS.first;
00762   }
00763 };
00764 
00765 /// PromoteSingleBlockAlloca - Many allocas are only used within a single basic
00766 /// block.  If this is the case, avoid traversing the CFG and inserting a lot of
00767 /// potentially useless PHI nodes by just performing a single linear pass over
00768 /// the basic block using the Alloca.
00769 ///
00770 /// If we cannot promote this alloca (because it is read before it is written),
00771 /// return true.  This is necessary in cases where, due to control flow, the
00772 /// alloca is potentially undefined on some control flow paths.  e.g. code like
00773 /// this is potentially correct:
00774 ///
00775 ///   for (...) { if (c) { A = undef; undef = B; } }
00776 ///
00777 /// ... so long as A is not used before undef is set.
00778 ///
00779 void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
00780                                               LargeBlockInfo &LBI) {
00781   // The trickiest case to handle is when we have large blocks. Because of this,
00782   // this code is optimized assuming that large blocks happen.  This does not
00783   // significantly pessimize the small block case.  This uses LargeBlockInfo to
00784   // make it efficient to get the index of various operations in the block.
00785   
00786   // Clear out UsingBlocks.  We will reconstruct it here if needed.
00787   Info.UsingBlocks.clear();
00788   
00789   // Walk the use-def list of the alloca, getting the locations of all stores.
00790   typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy;
00791   StoresByIndexTy StoresByIndex;
00792   
00793   for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
00794        UI != E; ++UI) 
00795     if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
00796       StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
00797 
00798   // If there are no stores to the alloca, just replace any loads with undef.
00799   if (StoresByIndex.empty()) {
00800     for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) 
00801       if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) {
00802         LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
00803         if (AST && isa<PointerType>(LI->getType()))
00804           AST->deleteValue(LI);
00805         LBI.deleteValue(LI);
00806         LI->eraseFromParent();
00807       }
00808     return;
00809   }
00810   
00811   // Sort the stores by their index, making it efficient to do a lookup with a
00812   // binary search.
00813   std::sort(StoresByIndex.begin(), StoresByIndex.end());
00814   
00815   // Walk all of the loads from this alloca, replacing them with the nearest
00816   // store above them, if any.
00817   for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
00818     LoadInst *LI = dyn_cast<LoadInst>(*UI++);
00819     if (!LI) continue;
00820     
00821     unsigned LoadIdx = LBI.getInstructionIndex(LI);
00822     
00823     // Find the nearest store that has a lower than this load. 
00824     StoresByIndexTy::iterator I = 
00825       std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
00826                        std::pair<unsigned, StoreInst*>(LoadIdx, 0),
00827                        StoreIndexSearchPredicate());
00828     
00829     // If there is no store before this load, then we can't promote this load.
00830     if (I == StoresByIndex.begin()) {
00831       // Can't handle this load, bail out.
00832       Info.UsingBlocks.push_back(LI->getParent());
00833       continue;
00834     }
00835       
00836     // Otherwise, there was a store before this load, the load takes its value.
00837     --I;
00838     LI->replaceAllUsesWith(I->second->getOperand(0));
00839     if (AST && isa<PointerType>(LI->getType()))
00840       AST->deleteValue(LI);
00841     LI->eraseFromParent();
00842     LBI.deleteValue(LI);
00843   }
00844 }
00845 
00846 
00847 // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
00848 // Alloca returns true if there wasn't already a phi-node for that variable
00849 //
00850 bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
00851                                   unsigned &Version,
00852                                   SmallPtrSet<PHINode*, 16> &InsertedPHINodes) {
00853   // Look up the basic-block in question.
00854   PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)];
00855 
00856   // If the BB already has a phi node added for the i'th alloca then we're done!
00857   if (PN) return false;
00858 
00859   // Create a PhiNode using the dereferenced type... and add the phi-node to the
00860   // BasicBlock.
00861   PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(),
00862                        Allocas[AllocaNo]->getName() + "." +
00863                        utostr(Version++), BB->begin());
00864   ++NumPHIInsert;
00865   PhiToAllocaMap[PN] = AllocaNo;
00866   PN->reserveOperandSpace(getNumPreds(BB));
00867   
00868   InsertedPHINodes.insert(PN);
00869 
00870   if (AST && isa<PointerType>(PN->getType()))
00871     AST->copyValue(PointerAllocaValues[AllocaNo], PN);
00872 
00873   return true;
00874 }
00875 
00876 // RenamePass - Recursively traverse the CFG of the function, renaming loads and
00877 // stores to the allocas which we are promoting.  IncomingVals indicates what
00878 // value each Alloca contains on exit from the predecessor block Pred.
00879 //
00880 void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
00881                                 RenamePassData::ValVector &IncomingVals,
00882                                 std::vector<RenamePassData> &Worklist) {
00883 NextIteration:
00884   // If we are inserting any phi nodes into this BB, they will already be in the
00885   // block.
00886   if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
00887     // Pred may have multiple edges to BB.  If so, we want to add N incoming
00888     // values to each PHI we are inserting on the first time we see the edge.
00889     // Check to see if APN already has incoming values from Pred.  This also
00890     // prevents us from modifying PHI nodes that are not currently being
00891     // inserted.
00892     bool HasPredEntries = false;
00893     for (unsigned i = 0, e = APN->getNumIncomingValues(); i != e; ++i) {
00894       if (APN->getIncomingBlock(i) == Pred) {
00895         HasPredEntries = true;
00896         break;
00897       }
00898     }
00899     
00900     // If we have PHI nodes to update, compute the number of edges from Pred to
00901     // BB.
00902     if (!HasPredEntries) {
00903       // We want to be able to distinguish between PHI nodes being inserted by
00904       // this invocation of mem2reg from those phi nodes that already existed in
00905       // the IR before mem2reg was run.  We determine that APN is being inserted
00906       // because it is missing incoming edges.  All other PHI nodes being
00907       // inserted by this pass of mem2reg will have the same number of incoming
00908       // operands so far.  Remember this count.
00909       unsigned NewPHINumOperands = APN->getNumOperands();
00910       
00911       unsigned NumEdges = 0;
00912       for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I)
00913         if (*I == BB)
00914           ++NumEdges;
00915       assert(NumEdges && "Must be at least one edge from Pred to BB!");
00916       
00917       // Add entries for all the phis.
00918       BasicBlock::iterator PNI = BB->begin();
00919       do {
00920         unsigned AllocaNo = PhiToAllocaMap[APN];
00921         
00922         // Add N incoming values to the PHI node.
00923         for (unsigned i = 0; i != NumEdges; ++i)
00924           APN->addIncoming(IncomingVals[AllocaNo], Pred);
00925         
00926         // The currently active variable for this block is now the PHI.
00927         IncomingVals[AllocaNo] = APN;
00928         
00929         // Get the next phi node.
00930         ++PNI;
00931         APN = dyn_cast<PHINode>(PNI);
00932         if (APN == 0) break;
00933         
00934         // Verify that it is missing entries.  If not, it is not being inserted
00935         // by this mem2reg invocation so we want to ignore it.
00936       } while (APN->getNumOperands() == NewPHINumOperands);
00937     }
00938   }
00939   
00940   // Don't revisit blocks.
00941   if (!Visited.insert(BB)) return;
00942 
00943   for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) {
00944     Instruction *I = II++; // get the instruction, increment iterator
00945 
00946     if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
00947       AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
00948       if (!Src) continue;
00949   
00950       std::map<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src);
00951       if (AI == AllocaLookup.end()) continue;
00952 
00953       Value *V = IncomingVals[AI->second];
00954 
00955       // Anything using the load now uses the current value.
00956       LI->replaceAllUsesWith(V);
00957       if (AST && isa<PointerType>(LI->getType()))
00958         AST->deleteValue(LI);
00959       BB->getInstList().erase(LI);
00960     } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
00961       // Delete this instruction and mark the name as the current holder of the
00962       // value
00963       AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
00964       if (!Dest) continue;
00965       
00966       std::map<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
00967       if (ai == AllocaLookup.end())
00968         continue;
00969       
00970       // what value were we writing?
00971       IncomingVals[ai->second] = SI->getOperand(0);
00972       BB->getInstList().erase(SI);
00973     }
00974   }
00975 
00976   // 'Recurse' t