LLVM API Documentation
00001 //===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements some loop unrolling utilities. It does not define any 00011 // actual pass or policy, but provides a single function to perform loop 00012 // unrolling. 00013 // 00014 // It works best when loops have been canonicalized by the -indvars pass, 00015 // allowing it to determine the trip counts of loops easily. 00016 // 00017 // The process of unrolling can produce extraneous basic blocks linked with 00018 // unconditional branches. This will be corrected in the future. 00019 //===----------------------------------------------------------------------===// 00020 00021 #define DEBUG_TYPE "loop-unroll" 00022 #include "llvm/Transforms/Utils/UnrollLoop.h" 00023 #include "llvm/BasicBlock.h" 00024 #include "llvm/ADT/Statistic.h" 00025 #include "llvm/Analysis/ConstantFolding.h" 00026 #include "llvm/Analysis/LoopPass.h" 00027 #include "llvm/Support/Debug.h" 00028 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 00029 #include "llvm/Transforms/Utils/Cloning.h" 00030 #include "llvm/Transforms/Utils/Local.h" 00031 #include <cstdio> 00032 00033 using namespace llvm; 00034 00035 // TODO: Should these be here or in LoopUnroll? 00036 STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); 00037 STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); 00038 00039 /// RemapInstruction - Convert the instruction operands from referencing the 00040 /// current values into those specified by ValueMap. 00041 static inline void RemapInstruction(Instruction *I, 00042 DenseMap<const Value *, Value*> &ValueMap) { 00043 for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { 00044 Value *Op = I->getOperand(op); 00045 DenseMap<const Value *, Value*>::iterator It = ValueMap.find(Op); 00046 if (It != ValueMap.end()) Op = It->second; 00047 I->setOperand(op, Op); 00048 } 00049 } 00050 00051 /// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it 00052 /// only has one predecessor, and that predecessor only has one successor. 00053 /// The LoopInfo Analysis that is passed will be kept consistent. 00054 /// Returns the new combined block. 00055 static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { 00056 // Merge basic blocks into their predecessor if there is only one distinct 00057 // pred, and if there is only one distinct successor of the predecessor, and 00058 // if there are no PHI nodes. 00059 BasicBlock *OnlyPred = BB->getSinglePredecessor(); 00060 if (!OnlyPred) return 0; 00061 00062 if (OnlyPred->getTerminator()->getNumSuccessors() != 1) 00063 return 0; 00064 00065 DOUT << "Merging: " << *BB << "into: " << *OnlyPred; 00066 00067 // Resolve any PHI nodes at the start of the block. They are all 00068 // guaranteed to have exactly one entry if they exist, unless there are 00069 // multiple duplicate (but guaranteed to be equal) entries for the 00070 // incoming edges. This occurs when there are multiple edges from 00071 // OnlyPred to OnlySucc. 00072 FoldSingleEntryPHINodes(BB); 00073 00074 // Delete the unconditional branch from the predecessor... 00075 OnlyPred->getInstList().pop_back(); 00076 00077 // Move all definitions in the successor to the predecessor... 00078 OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList()); 00079 00080 // Make all PHI nodes that referred to BB now refer to Pred as their 00081 // source... 00082 BB->replaceAllUsesWith(OnlyPred); 00083 00084 std::string OldName = BB->getName(); 00085 00086 // Erase basic block from the function... 00087 LI->removeBlock(BB); 00088 BB->eraseFromParent(); 00089 00090 // Inherit predecessor's name if it exists... 00091 if (!OldName.empty() && !OnlyPred->hasName()) 00092 OnlyPred->setName(OldName); 00093 00094 return OnlyPred; 00095 } 00096 00097 /// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true 00098 /// if unrolling was succesful, or false if the loop was unmodified. Unrolling 00099 /// can only fail when the loop's latch block is not terminated by a conditional 00100 /// branch instruction. However, if the trip count (and multiple) are not known, 00101 /// loop unrolling will mostly produce more code that is no faster. 00102 /// 00103 /// The LoopInfo Analysis that is passed will be kept consistent. 00104 /// 00105 /// If a LoopPassManager is passed in, and the loop is fully removed, it will be 00106 /// removed from the LoopPassManager as well. LPM can also be NULL. 00107 bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) { 00108 assert(L->isLCSSAForm()); 00109 00110 BasicBlock *Header = L->getHeader(); 00111 BasicBlock *LatchBlock = L->getLoopLatch(); 00112 BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); 00113 00114 if (!BI || BI->isUnconditional()) { 00115 // The loop-rotate pass can be helpful to avoid this in many cases. 00116 DOUT << " Can't unroll; loop not terminated by a conditional branch.\n"; 00117 return false; 00118 } 00119 00120 // Find trip count 00121 unsigned TripCount = L->getSmallConstantTripCount(); 00122 // Find trip multiple if count is not available 00123 unsigned TripMultiple = 1; 00124 if (TripCount == 0) 00125 TripMultiple = L->getSmallConstantTripMultiple(); 00126 00127 if (TripCount != 0) 00128 DOUT << " Trip Count = " << TripCount << "\n"; 00129 if (TripMultiple != 1) 00130 DOUT << " Trip Multiple = " << TripMultiple << "\n"; 00131 00132 // Effectively "DCE" unrolled iterations that are beyond the tripcount 00133 // and will never be executed. 00134 if (TripCount != 0 && Count > TripCount) 00135 Count = TripCount; 00136 00137 assert(Count > 0); 00138 assert(TripMultiple > 0); 00139 assert(TripCount == 0 || TripCount % TripMultiple == 0); 00140 00141 // Are we eliminating the loop control altogether? 00142 bool CompletelyUnroll = Count == TripCount; 00143 00144 // If we know the trip count, we know the multiple... 00145 unsigned BreakoutTrip = 0; 00146 if (TripCount != 0) { 00147 BreakoutTrip = TripCount % Count; 00148 TripMultiple = 0; 00149 } else { 00150 // Figure out what multiple to use. 00151 BreakoutTrip = TripMultiple = 00152 (unsigned)GreatestCommonDivisor64(Count, TripMultiple); 00153 } 00154 00155 if (CompletelyUnroll) { 00156 DOUT << "COMPLETELY UNROLLING loop %" << Header->getName() 00157 << " with trip count " << TripCount << "!\n"; 00158 } else { 00159 DOUT << "UNROLLING loop %" << Header->getName() 00160 << " by " << Count; 00161 if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { 00162 DOUT << " with a breakout at trip " << BreakoutTrip; 00163 } else if (TripMultiple != 1) { 00164 DOUT << " with " << TripMultiple << " trips per branch"; 00165 } 00166 DOUT << "!\n"; 00167 } 00168 00169 std::vector<BasicBlock*> LoopBlocks = L->getBlocks(); 00170 00171 bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); 00172 BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); 00173 00174 // For the first iteration of the loop, we should use the precloned values for 00175 // PHI nodes. Insert associations now. 00176 typedef DenseMap<const Value*, Value*> ValueMapTy; 00177 ValueMapTy LastValueMap; 00178 std::vector<PHINode*> OrigPHINode; 00179 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { 00180 PHINode *PN = cast<PHINode>(I); 00181 OrigPHINode.push_back(PN); 00182 if (Instruction *I = 00183 dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock))) 00184 if (L->contains(I->getParent())) 00185 LastValueMap[I] = I; 00186 } 00187 00188 std::vector<BasicBlock*> Headers; 00189 std::vector<BasicBlock*> Latches; 00190 Headers.push_back(Header); 00191 Latches.push_back(LatchBlock); 00192 00193 for (unsigned It = 1; It != Count; ++It) { 00194 char SuffixBuffer[100]; 00195 sprintf(SuffixBuffer, ".%d", It); 00196 00197 std::vector<BasicBlock*> NewBlocks; 00198 00199 for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(), 00200 E = LoopBlocks.end(); BB != E; ++BB) { 00201 ValueMapTy ValueMap; 00202 BasicBlock *New = CloneBasicBlock(*BB, ValueMap, SuffixBuffer); 00203 Header->getParent()->getBasicBlockList().push_back(New); 00204 00205 // Loop over all of the PHI nodes in the block, changing them to use the 00206 // incoming values from the previous block. 00207 if (*BB == Header) 00208 for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { 00209 PHINode *NewPHI = cast<PHINode>(ValueMap[OrigPHINode[i]]); 00210 Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); 00211 if (Instruction *InValI = dyn_cast<Instruction>(InVal)) 00212 if (It > 1 && L->contains(InValI->getParent())) 00213 InVal = LastValueMap[InValI]; 00214 ValueMap[OrigPHINode[i]] = InVal; 00215 New->getInstList().erase(NewPHI); 00216 } 00217 00218 // Update our running map of newest clones 00219 LastValueMap[*BB] = New; 00220 for (ValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end(); 00221 VI != VE; ++VI) 00222 LastValueMap[VI->first] = VI->second; 00223 00224 L->addBasicBlockToLoop(New, LI->getBase()); 00225 00226 // Add phi entries for newly created values to all exit blocks except 00227 // the successor of the latch block. The successor of the exit block will 00228 // be updated specially after unrolling all the way. 00229 if (*BB != LatchBlock) 00230 for (Value::use_iterator UI = (*BB)->use_begin(), UE = (*BB)->use_end(); 00231 UI != UE;) { 00232 Instruction *UseInst = cast<Instruction>(*UI); 00233 ++UI; 00234 if (isa<PHINode>(UseInst) && !L->contains(UseInst->getParent())) { 00235 PHINode *phi = cast<PHINode>(UseInst); 00236 Value *Incoming = phi->getIncomingValueForBlock(*BB); 00237 phi->addIncoming(Incoming, New); 00238 } 00239 } 00240 00241 // Keep track of new headers and latches as we create them, so that 00242 // we can insert the proper branches later. 00243 if (*BB == Header) 00244 Headers.push_back(New); 00245 if (*BB == LatchBlock) { 00246 Latches.push_back(New); 00247 00248 // Also, clear out the new latch's back edge so that it doesn't look 00249 // like a new loop, so that it's amenable to being merged with adjacent 00250 // blocks later on. 00251 TerminatorInst *Term = New->getTerminator(); 00252 assert(L->contains(Term->getSuccessor(!ContinueOnTrue))); 00253 assert(Term->getSuccessor(ContinueOnTrue) == LoopExit); 00254 Term->setSuccessor(!ContinueOnTrue, NULL); 00255 } 00256 00257 NewBlocks.push_back(New); 00258 } 00259 00260 // Remap all instructions in the most recent iteration 00261 for (unsigned i = 0; i < NewBlocks.size(); ++i) 00262 for (BasicBlock::iterator I = NewBlocks[i]->begin(), 00263 E = NewBlocks[i]->end(); I != E; ++I) 00264 RemapInstruction(I, LastValueMap); 00265 } 00266 00267 // The latch block exits the loop. If there are any PHI nodes in the 00268 // successor blocks, update them to use the appropriate values computed as the 00269 // last iteration of the loop. 00270 if (Count != 1) { 00271 SmallPtrSet<PHINode*, 8> Users; 00272 for (Value::use_iterator UI = LatchBlock->use_begin(), 00273 UE = LatchBlock->use_end(); UI != UE; ++UI) 00274 if (PHINode *phi = dyn_cast<PHINode>(*UI)) 00275 Users.insert(phi); 00276 00277 BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]); 00278 for (SmallPtrSet<PHINode*,8>::iterator SI = Users.begin(), SE = Users.end(); 00279 SI != SE; ++SI) { 00280 PHINode *PN = *SI; 00281 Value *InVal = PN->removeIncomingValue(LatchBlock, false); 00282 // If this value was defined in the loop, take the value defined by the 00283 // last iteration of the loop. 00284 if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { 00285 if (L->contains(InValI->getParent())) 00286 InVal = LastValueMap[InVal]; 00287 } 00288 PN->addIncoming(InVal, LastIterationBB); 00289 } 00290 } 00291 00292 // Now, if we're doing complete unrolling, loop over the PHI nodes in the 00293 // original block, setting them to their incoming values. 00294 if (CompletelyUnroll) { 00295 BasicBlock *Preheader = L->getLoopPreheader(); 00296 for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { 00297 PHINode *PN = OrigPHINode[i]; 00298 PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); 00299 Header->getInstList().erase(PN); 00300 } 00301 } 00302 00303 // Now that all the basic blocks for the unrolled iterations are in place, 00304 // set up the branches to connect them. 00305 for (unsigned i = 0, e = Latches.size(); i != e; ++i) { 00306 // The original branch was replicated in each unrolled iteration. 00307 BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); 00308 00309 // The branch destination. 00310 unsigned j = (i + 1) % e; 00311 BasicBlock *Dest = Headers[j]; 00312 bool NeedConditional = true; 00313 00314 // For a complete unroll, make the last iteration end with a branch 00315 // to the exit block. 00316 if (CompletelyUnroll && j == 0) { 00317 Dest = LoopExit; 00318 NeedConditional = false; 00319 } 00320 00321 // If we know the trip count or a multiple of it, we can safely use an 00322 // unconditional branch for some iterations. 00323 if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { 00324 NeedConditional = false; 00325 } 00326 00327 if (NeedConditional) { 00328 // Update the conditional branch's successor for the following 00329 // iteration. 00330 Term->setSuccessor(!ContinueOnTrue, Dest); 00331 } else { 00332 Term->setUnconditionalDest(Dest); 00333 // Merge adjacent basic blocks, if possible. 00334 if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) { 00335 std::replace(Latches.begin(), Latches.end(), Dest, Fold); 00336 std::replace(Headers.begin(), Headers.end(), Dest, Fold); 00337 } 00338 } 00339 } 00340 00341 // At this point, the code is well formed. We now do a quick sweep over the 00342 // inserted code, doing constant propagation and dead code elimination as we 00343 // go. 00344 const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); 00345 for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(), 00346 BBE = NewLoopBlocks.end(); BB != BBE; ++BB) 00347 for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) { 00348 Instruction *Inst = I++; 00349 00350 if (isInstructionTriviallyDead(Inst)) 00351 (*BB)->getInstList().erase(Inst); 00352 else if (Constant *C = ConstantFoldInstruction(Inst)) { 00353 Inst->replaceAllUsesWith(C); 00354 (*BB)->getInstList().erase(Inst); 00355 } 00356 } 00357 00358 NumCompletelyUnrolled += CompletelyUnroll; 00359 ++NumUnrolled; 00360 // Remove the loop from the LoopPassManager if it's completely removed. 00361 if (CompletelyUnroll && LPM != NULL) 00362 LPM->deleteLoopFromQueue(L); 00363 00364 // If we didn't completely unroll the loop, it should still be in LCSSA form. 00365 if (!CompletelyUnroll) 00366 assert(L->isLCSSAForm()); 00367 00368 return true; 00369 }
This web site is hosted by the Computer Science Department at the University of Illinois at Urbana-Champaign.