LLVM API Documentation
00001 //===- TailDuplication.cpp - Simplify CFG through tail duplication --------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This pass performs a limited form of tail duplication, intended to simplify 00011 // CFGs by removing some unconditional branches. This pass is necessary to 00012 // straighten out loops created by the C front-end, but also is capable of 00013 // making other code nicer. After this pass is run, the CFG simplify pass 00014 // should be run to clean up the mess. 00015 // 00016 // This pass could be enhanced in the future to use profile information to be 00017 // more aggressive. 00018 // 00019 //===----------------------------------------------------------------------===// 00020 00021 #define DEBUG_TYPE "tailduplicate" 00022 #include "llvm/Transforms/Scalar.h" 00023 #include "llvm/Constant.h" 00024 #include "llvm/Function.h" 00025 #include "llvm/Instructions.h" 00026 #include "llvm/IntrinsicInst.h" 00027 #include "llvm/Pass.h" 00028 #include "llvm/Type.h" 00029 #include "llvm/Support/CFG.h" 00030 #include "llvm/Analysis/ConstantFolding.h" 00031 #include "llvm/Transforms/Utils/Local.h" 00032 #include "llvm/Support/CommandLine.h" 00033 #include "llvm/Support/Compiler.h" 00034 #include "llvm/Support/Debug.h" 00035 #include "llvm/ADT/Statistic.h" 00036 #include "llvm/ADT/SmallPtrSet.h" 00037 #include <map> 00038 using namespace llvm; 00039 00040 STATISTIC(NumEliminated, "Number of unconditional branches eliminated"); 00041 00042 static cl::opt<unsigned> 00043 TailDupThreshold("taildup-threshold", 00044 cl::desc("Max block size to tail duplicate"), 00045 cl::init(1), cl::Hidden); 00046 00047 namespace { 00048 class VISIBILITY_HIDDEN TailDup : public FunctionPass { 00049 bool runOnFunction(Function &F); 00050 public: 00051 static char ID; // Pass identification, replacement for typeid 00052 TailDup() : FunctionPass(&ID) {} 00053 00054 private: 00055 inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned); 00056 inline void eliminateUnconditionalBranch(BranchInst *BI); 00057 SmallPtrSet<BasicBlock*, 4> CycleDetector; 00058 }; 00059 } 00060 00061 char TailDup::ID = 0; 00062 static RegisterPass<TailDup> X("tailduplicate", "Tail Duplication"); 00063 00064 // Public interface to the Tail Duplication pass 00065 FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); } 00066 00067 /// runOnFunction - Top level algorithm - Loop over each unconditional branch in 00068 /// the function, eliminating it if it looks attractive enough. CycleDetector 00069 /// prevents infinite loops by checking that we aren't redirecting a branch to 00070 /// a place it already pointed to earlier; see PR 2323. 00071 bool TailDup::runOnFunction(Function &F) { 00072 bool Changed = false; 00073 CycleDetector.clear(); 00074 for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { 00075 if (shouldEliminateUnconditionalBranch(I->getTerminator(), 00076 TailDupThreshold)) { 00077 eliminateUnconditionalBranch(cast<BranchInst>(I->getTerminator())); 00078 Changed = true; 00079 } else { 00080 ++I; 00081 CycleDetector.clear(); 00082 } 00083 } 00084 return Changed; 00085 } 00086 00087 /// shouldEliminateUnconditionalBranch - Return true if this branch looks 00088 /// attractive to eliminate. We eliminate the branch if the destination basic 00089 /// block has <= 5 instructions in it, not counting PHI nodes. In practice, 00090 /// since one of these is a terminator instruction, this means that we will add 00091 /// up to 4 instructions to the new block. 00092 /// 00093 /// We don't count PHI nodes in the count since they will be removed when the 00094 /// contents of the block are copied over. 00095 /// 00096 bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI, 00097 unsigned Threshold) { 00098 BranchInst *BI = dyn_cast<BranchInst>(TI); 00099 if (!BI || !BI->isUnconditional()) return false; // Not an uncond branch! 00100 00101 BasicBlock *Dest = BI->getSuccessor(0); 00102 if (Dest == BI->getParent()) return false; // Do not loop infinitely! 00103 00104 // Do not inline a block if we will just get another branch to the same block! 00105 TerminatorInst *DTI = Dest->getTerminator(); 00106 if (BranchInst *DBI = dyn_cast<BranchInst>(DTI)) 00107 if (DBI->isUnconditional() && DBI->getSuccessor(0) == Dest) 00108 return false; // Do not loop infinitely! 00109 00110 // FIXME: DemoteRegToStack cannot yet demote invoke instructions to the stack, 00111 // because doing so would require breaking critical edges. This should be 00112 // fixed eventually. 00113 if (!DTI->use_empty()) 00114 return false; 00115 00116 // Do not bother with blocks with only a single predecessor: simplify 00117 // CFG will fold these two blocks together! 00118 pred_iterator PI = pred_begin(Dest), PE = pred_end(Dest); 00119 ++PI; 00120 if (PI == PE) return false; // Exactly one predecessor! 00121 00122 BasicBlock::iterator I = Dest->getFirstNonPHI(); 00123 00124 for (unsigned Size = 0; I != Dest->end(); ++I) { 00125 if (Size == Threshold) return false; // The block is too large. 00126 00127 // Don't tail duplicate call instructions. They are very large compared to 00128 // other instructions. 00129 if (isa<CallInst>(I) || isa<InvokeInst>(I)) return false; 00130 00131 // Allso alloca and malloc. 00132 if (isa<AllocationInst>(I)) return false; 00133 00134 // Some vector instructions can expand into a number of instructions. 00135 if (isa<ShuffleVectorInst>(I) || isa<ExtractElementInst>(I) || 00136 isa<InsertElementInst>(I)) return false; 00137 00138 // Only count instructions that are not debugger intrinsics. 00139 if (!isa<DbgInfoIntrinsic>(I)) ++Size; 00140 } 00141 00142 // Do not tail duplicate a block that has thousands of successors into a block 00143 // with a single successor if the block has many other predecessors. This can 00144 // cause an N^2 explosion in CFG edges (and PHI node entries), as seen in 00145 // cases that have a large number of indirect gotos. 00146 unsigned NumSuccs = DTI->getNumSuccessors(); 00147 if (NumSuccs > 8) { 00148 unsigned TooMany = 128; 00149 if (NumSuccs >= TooMany) return false; 00150 TooMany = TooMany/NumSuccs; 00151 for (; PI != PE; ++PI) 00152 if (TooMany-- == 0) return false; 00153 } 00154 00155 // If this unconditional branch is a fall-through, be careful about 00156 // tail duplicating it. In particular, we don't want to taildup it if the 00157 // original block will still be there after taildup is completed: doing so 00158 // would eliminate the fall-through, requiring unconditional branches. 00159 Function::iterator DestI = Dest; 00160 if (&*--DestI == BI->getParent()) { 00161 // The uncond branch is a fall-through. Tail duplication of the block is 00162 // will eliminate the fall-through-ness and end up cloning the terminator 00163 // at the end of the Dest block. Since the original Dest block will 00164 // continue to exist, this means that one or the other will not be able to 00165 // fall through. One typical example that this helps with is code like: 00166 // if (a) 00167 // foo(); 00168 // if (b) 00169 // foo(); 00170 // Cloning the 'if b' block into the end of the first foo block is messy. 00171 00172 // The messy case is when the fall-through block falls through to other 00173 // blocks. This is what we would be preventing if we cloned the block. 00174 DestI = Dest; 00175 if (++DestI != Dest->getParent()->end()) { 00176 BasicBlock *DestSucc = DestI; 00177 // If any of Dest's successors are fall-throughs, don't do this xform. 00178 for (succ_iterator SI = succ_begin(Dest), SE = succ_end(Dest); 00179 SI != SE; ++SI) 00180 if (*SI == DestSucc) 00181 return false; 00182 } 00183 } 00184 00185 // Finally, check that we haven't redirected to this target block earlier; 00186 // there are cases where we loop forever if we don't check this (PR 2323). 00187 if (!CycleDetector.insert(Dest)) 00188 return false; 00189 00190 return true; 00191 } 00192 00193 /// FindObviousSharedDomOf - We know there is a branch from SrcBlock to 00194 /// DestBlock, and that SrcBlock is not the only predecessor of DstBlock. If we 00195 /// can find a predecessor of SrcBlock that is a dominator of both SrcBlock and 00196 /// DstBlock, return it. 00197 static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock, 00198 BasicBlock *DstBlock) { 00199 // SrcBlock must have a single predecessor. 00200 pred_iterator PI = pred_begin(SrcBlock), PE = pred_end(SrcBlock); 00201 if (PI == PE || ++PI != PE) return 0; 00202 00203 BasicBlock *SrcPred = *pred_begin(SrcBlock); 00204 00205 // Look at the predecessors of DstBlock. One of them will be SrcBlock. If 00206 // there is only one other pred, get it, otherwise we can't handle it. 00207 PI = pred_begin(DstBlock); PE = pred_end(DstBlock); 00208 BasicBlock *DstOtherPred = 0; 00209 if (*PI == SrcBlock) { 00210 if (++PI == PE) return 0; 00211 DstOtherPred = *PI; 00212 if (++PI != PE) return 0; 00213 } else { 00214 DstOtherPred = *PI; 00215 if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0; 00216 } 00217 00218 // We can handle two situations here: "if then" and "if then else" blocks. An 00219 // 'if then' situation is just where DstOtherPred == SrcPred. 00220 if (DstOtherPred == SrcPred) 00221 return SrcPred; 00222 00223 // Check to see if we have an "if then else" situation, which means that 00224 // DstOtherPred will have a single predecessor and it will be SrcPred. 00225 PI = pred_begin(DstOtherPred); PE = pred_end(DstOtherPred); 00226 if (PI != PE && *PI == SrcPred) { 00227 if (++PI != PE) return 0; // Not a single pred. 00228 return SrcPred; // Otherwise, it's an "if then" situation. Return the if. 00229 } 00230 00231 // Otherwise, this is something we can't handle. 00232 return 0; 00233 } 00234 00235 00236 /// eliminateUnconditionalBranch - Clone the instructions from the destination 00237 /// block into the source block, eliminating the specified unconditional branch. 00238 /// If the destination block defines values used by successors of the dest 00239 /// block, we may need to insert PHI nodes. 00240 /// 00241 void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { 00242 BasicBlock *SourceBlock = Branch->getParent(); 00243 BasicBlock *DestBlock = Branch->getSuccessor(0); 00244 assert(SourceBlock != DestBlock && "Our predicate is broken!"); 00245 00246 DOUT << "TailDuplication[" << SourceBlock->getParent()->getName() 00247 << "]: Eliminating branch: " << *Branch; 00248 00249 // See if we can avoid duplicating code by moving it up to a dominator of both 00250 // blocks. 00251 if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) { 00252 DOUT << "Found shared dominator: " << DomBlock->getName() << "\n"; 00253 00254 // If there are non-phi instructions in DestBlock that have no operands 00255 // defined in DestBlock, and if the instruction has no side effects, we can 00256 // move the instruction to DomBlock instead of duplicating it. 00257 BasicBlock::iterator BBI = DestBlock->getFirstNonPHI(); 00258 while (!isa<TerminatorInst>(BBI)) { 00259 Instruction *I = BBI++; 00260 00261 bool CanHoist = !I->isTrapping() && !I->mayWriteToMemory(); 00262 if (CanHoist) { 00263 for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) 00264 if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(op))) 00265 if (OpI->getParent() == DestBlock || 00266 (isa<InvokeInst>(OpI) && OpI->getParent() == DomBlock)) { 00267 CanHoist = false; 00268 break; 00269 } 00270 if (CanHoist) { 00271 // Remove from DestBlock, move right before the term in DomBlock. 00272 DestBlock->getInstList().remove(I); 00273 DomBlock->getInstList().insert(DomBlock->getTerminator(), I); 00274 DOUT << "Hoisted: " << *I; 00275 } 00276 } 00277 } 00278 } 00279 00280 // Tail duplication can not update SSA properties correctly if the values 00281 // defined in the duplicated tail are used outside of the tail itself. For 00282 // this reason, we spill all values that are used outside of the tail to the 00283 // stack. 00284 for (BasicBlock::iterator I = DestBlock->begin(); I != DestBlock->end(); ++I) 00285 if (I->isUsedOutsideOfBlock(DestBlock)) { 00286 // We found a use outside of the tail. Create a new stack slot to 00287 // break this inter-block usage pattern. 00288 DemoteRegToStack(*I); 00289 } 00290 00291 // We are going to have to map operands from the original block B to the new 00292 // copy of the block B'. If there are PHI nodes in the DestBlock, these PHI 00293 // nodes also define part of this mapping. Loop over these PHI nodes, adding 00294 // them to our mapping. 00295 // 00296 std::map<Value*, Value*> ValueMapping; 00297 00298 BasicBlock::iterator BI = DestBlock->begin(); 00299 bool HadPHINodes = isa<PHINode>(BI); 00300 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) 00301 ValueMapping[PN] = PN->getIncomingValueForBlock(SourceBlock); 00302 00303 // Clone the non-phi instructions of the dest block into the source block, 00304 // keeping track of the mapping... 00305 // 00306 for (; BI != DestBlock->end(); ++BI) { 00307 Instruction *New = BI->clone(); 00308 New->setName(BI->getName()); 00309 SourceBlock->getInstList().push_back(New); 00310 ValueMapping[BI] = New; 00311 } 00312 00313 // Now that we have built the mapping information and cloned all of the 00314 // instructions (giving us a new terminator, among other things), walk the new 00315 // instructions, rewriting references of old instructions to use new 00316 // instructions. 00317 // 00318 BI = Branch; ++BI; // Get an iterator to the first new instruction 00319 for (; BI != SourceBlock->end(); ++BI) 00320 for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) 00321 if (Value *Remapped = ValueMapping[BI->getOperand(i)]) 00322 BI->setOperand(i, Remapped); 00323 00324 // Next we check to see if any of the successors of DestBlock had PHI nodes. 00325 // If so, we need to add entries to the PHI nodes for SourceBlock now. 00326 for (succ_iterator SI = succ_begin(DestBlock), SE = succ_end(DestBlock); 00327 SI != SE; ++SI) { 00328 BasicBlock *Succ = *SI; 00329 for (BasicBlock::iterator PNI = Succ->begin(); isa<PHINode>(PNI); ++PNI) { 00330 PHINode *PN = cast<PHINode>(PNI); 00331 // Ok, we have a PHI node. Figure out what the incoming value was for the 00332 // DestBlock. 00333 Value *IV = PN->getIncomingValueForBlock(DestBlock); 00334 00335 // Remap the value if necessary... 00336 if (Value *MappedIV = ValueMapping[IV]) 00337 IV = MappedIV; 00338 PN->addIncoming(IV, SourceBlock); 00339 } 00340 } 00341 00342 // Next, remove the old branch instruction, and any PHI node entries that we 00343 // had. 00344 BI = Branch; ++BI; // Get an iterator to the first new instruction 00345 DestBlock->removePredecessor(SourceBlock); // Remove entries in PHI nodes... 00346 SourceBlock->getInstList().erase(Branch); // Destroy the uncond branch... 00347 00348 // Final step: now that we have finished everything up, walk the cloned 00349 // instructions one last time, constant propagating and DCE'ing them, because 00350 // they may not be needed anymore. 00351 // 00352 if (HadPHINodes) { 00353 while (BI != SourceBlock->end()) { 00354 Instruction *Inst = BI++; 00355 if (isInstructionTriviallyDead(Inst)) 00356 Inst->eraseFromParent(); 00357 else if (Constant *C = ConstantFoldInstruction(Inst)) { 00358 Inst->replaceAllUsesWith(C); 00359 Inst->eraseFromParent(); 00360 } 00361 } 00362 } 00363 00364 ++NumEliminated; // We just killed a branch! 00365 }
This web site is hosted by the Computer Science Department at the University of Illinois at Urbana-Champaign.