LLVM API Documentation
00001 //===- StrongPhiElimination.cpp - Eliminate PHI nodes by inserting copies -===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This pass eliminates machine instruction PHI nodes by inserting copy 00011 // instructions, using an intelligent copy-folding technique based on 00012 // dominator information. This is technique is derived from: 00013 // 00014 // Budimlic, et al. Fast copy coalescing and live-range identification. 00015 // In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language 00016 // Design and Implementation (Berlin, Germany, June 17 - 19, 2002). 00017 // PLDI '02. ACM, New York, NY, 25-32. 00018 // DOI= http://doi.acm.org/10.1145/512529.512534 00019 // 00020 //===----------------------------------------------------------------------===// 00021 00022 #define DEBUG_TYPE "strongphielim" 00023 #include "llvm/CodeGen/Passes.h" 00024 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 00025 #include "llvm/CodeGen/MachineDominators.h" 00026 #include "llvm/CodeGen/MachineFunctionPass.h" 00027 #include "llvm/CodeGen/MachineInstr.h" 00028 #include "llvm/CodeGen/MachineLoopInfo.h" 00029 #include "llvm/CodeGen/MachineRegisterInfo.h" 00030 #include "llvm/CodeGen/RegisterCoalescer.h" 00031 #include "llvm/Target/TargetInstrInfo.h" 00032 #include "llvm/Target/TargetMachine.h" 00033 #include "llvm/ADT/DepthFirstIterator.h" 00034 #include "llvm/ADT/Statistic.h" 00035 #include "llvm/Support/Compiler.h" 00036 using namespace llvm; 00037 00038 namespace { 00039 struct VISIBILITY_HIDDEN StrongPHIElimination : public MachineFunctionPass { 00040 static char ID; // Pass identification, replacement for typeid 00041 StrongPHIElimination() : MachineFunctionPass((intptr_t)&ID) {} 00042 00043 // Waiting stores, for each MBB, the set of copies that need to 00044 // be inserted into that MBB 00045 DenseMap<MachineBasicBlock*, 00046 std::map<unsigned, unsigned> > Waiting; 00047 00048 // Stacks holds the renaming stack for each register 00049 std::map<unsigned, std::vector<unsigned> > Stacks; 00050 00051 // Registers in UsedByAnother are PHI nodes that are themselves 00052 // used as operands to another another PHI node 00053 std::set<unsigned> UsedByAnother; 00054 00055 // RenameSets are the is a map from a PHI-defined register 00056 // to the input registers to be coalesced along with the 00057 // predecessor block for those input registers. 00058 std::map<unsigned, std::map<unsigned, MachineBasicBlock*> > RenameSets; 00059 00060 // PhiValueNumber holds the ID numbers of the VNs for each phi that we're 00061 // eliminating, indexed by the register defined by that phi. 00062 std::map<unsigned, unsigned> PhiValueNumber; 00063 00064 // Store the DFS-in number of each block 00065 DenseMap<MachineBasicBlock*, unsigned> preorder; 00066 00067 // Store the DFS-out number of each block 00068 DenseMap<MachineBasicBlock*, unsigned> maxpreorder; 00069 00070 bool runOnMachineFunction(MachineFunction &Fn); 00071 00072 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 00073 AU.addRequired<MachineDominatorTree>(); 00074 AU.addRequired<LiveIntervals>(); 00075 00076 // TODO: Actually make this true. 00077 AU.addPreserved<LiveIntervals>(); 00078 AU.addPreserved<RegisterCoalescer>(); 00079 MachineFunctionPass::getAnalysisUsage(AU); 00080 } 00081 00082 virtual void releaseMemory() { 00083 preorder.clear(); 00084 maxpreorder.clear(); 00085 00086 Waiting.clear(); 00087 Stacks.clear(); 00088 UsedByAnother.clear(); 00089 RenameSets.clear(); 00090 } 00091 00092 private: 00093 00094 /// DomForestNode - Represents a node in the "dominator forest". This is 00095 /// a forest in which the nodes represent registers and the edges 00096 /// represent a dominance relation in the block defining those registers. 00097 struct DomForestNode { 00098 private: 00099 // Store references to our children 00100 std::vector<DomForestNode*> children; 00101 // The register we represent 00102 unsigned reg; 00103 00104 // Add another node as our child 00105 void addChild(DomForestNode* DFN) { children.push_back(DFN); } 00106 00107 public: 00108 typedef std::vector<DomForestNode*>::iterator iterator; 00109 00110 // Create a DomForestNode by providing the register it represents, and 00111 // the node to be its parent. The virtual root node has register 0 00112 // and a null parent. 00113 DomForestNode(unsigned r, DomForestNode* parent) : reg(r) { 00114 if (parent) 00115 parent->addChild(this); 00116 } 00117 00118 ~DomForestNode() { 00119 for (iterator I = begin(), E = end(); I != E; ++I) 00120 delete *I; 00121 } 00122 00123 /// getReg - Return the regiser that this node represents 00124 inline unsigned getReg() { return reg; } 00125 00126 // Provide iterator access to our children 00127 inline DomForestNode::iterator begin() { return children.begin(); } 00128 inline DomForestNode::iterator end() { return children.end(); } 00129 }; 00130 00131 void computeDFS(MachineFunction& MF); 00132 void processBlock(MachineBasicBlock* MBB); 00133 00134 std::vector<DomForestNode*> computeDomForest( 00135 std::map<unsigned, MachineBasicBlock*>& instrs, 00136 MachineRegisterInfo& MRI); 00137 void processPHIUnion(MachineInstr* Inst, 00138 std::map<unsigned, MachineBasicBlock*>& PHIUnion, 00139 std::vector<StrongPHIElimination::DomForestNode*>& DF, 00140 std::vector<std::pair<unsigned, unsigned> >& locals); 00141 void ScheduleCopies(MachineBasicBlock* MBB, std::set<unsigned>& pushed); 00142 void InsertCopies(MachineDomTreeNode* MBB, 00143 SmallPtrSet<MachineBasicBlock*, 16>& v); 00144 void mergeLiveIntervals(unsigned primary, unsigned secondary); 00145 }; 00146 } 00147 00148 char StrongPHIElimination::ID = 0; 00149 static RegisterPass<StrongPHIElimination> 00150 X("strong-phi-node-elimination", 00151 "Eliminate PHI nodes for register allocation, intelligently"); 00152 00153 const PassInfo *const llvm::StrongPHIEliminationID = &X; 00154 00155 /// computeDFS - Computes the DFS-in and DFS-out numbers of the dominator tree 00156 /// of the given MachineFunction. These numbers are then used in other parts 00157 /// of the PHI elimination process. 00158 void StrongPHIElimination::computeDFS(MachineFunction& MF) { 00159 SmallPtrSet<MachineDomTreeNode*, 8> frontier; 00160 SmallPtrSet<MachineDomTreeNode*, 8> visited; 00161 00162 unsigned time = 0; 00163 00164 MachineDominatorTree& DT = getAnalysis<MachineDominatorTree>(); 00165 00166 MachineDomTreeNode* node = DT.getRootNode(); 00167 00168 std::vector<MachineDomTreeNode*> worklist; 00169 worklist.push_back(node); 00170 00171 while (!worklist.empty()) { 00172 MachineDomTreeNode* currNode = worklist.back(); 00173 00174 if (!frontier.count(currNode)) { 00175 frontier.insert(currNode); 00176 ++time; 00177 preorder.insert(std::make_pair(currNode->getBlock(), time)); 00178 } 00179 00180 bool inserted = false; 00181 for (MachineDomTreeNode::iterator I = currNode->begin(), E = currNode->end(); 00182 I != E; ++I) 00183 if (!frontier.count(*I) && !visited.count(*I)) { 00184 worklist.push_back(*I); 00185 inserted = true; 00186 break; 00187 } 00188 00189 if (!inserted) { 00190 frontier.erase(currNode); 00191 visited.insert(currNode); 00192 maxpreorder.insert(std::make_pair(currNode->getBlock(), time)); 00193 00194 worklist.pop_back(); 00195 } 00196 } 00197 } 00198 00199 namespace { 00200 00201 /// PreorderSorter - a helper class that is used to sort registers 00202 /// according to the preorder number of their defining blocks 00203 class PreorderSorter { 00204 private: 00205 DenseMap<MachineBasicBlock*, unsigned>& preorder; 00206 MachineRegisterInfo& MRI; 00207 00208 public: 00209 PreorderSorter(DenseMap<MachineBasicBlock*, unsigned>& p, 00210 MachineRegisterInfo& M) : preorder(p), MRI(M) { } 00211 00212 bool operator()(unsigned A, unsigned B) { 00213 if (A == B) 00214 return false; 00215 00216 MachineBasicBlock* ABlock = MRI.getVRegDef(A)->getParent(); 00217 MachineBasicBlock* BBlock = MRI.getVRegDef(B)->getParent(); 00218 00219 if (preorder[ABlock] < preorder[BBlock]) 00220 return true; 00221 else if (preorder[ABlock] > preorder[BBlock]) 00222 return false; 00223 00224 return false; 00225 } 00226 }; 00227 00228 } 00229 00230 /// computeDomForest - compute the subforest of the DomTree corresponding 00231 /// to the defining blocks of the registers in question 00232 std::vector<StrongPHIElimination::DomForestNode*> 00233 StrongPHIElimination::computeDomForest( 00234 std::map<unsigned, MachineBasicBlock*>& regs, 00235 MachineRegisterInfo& MRI) { 00236 // Begin by creating a virtual root node, since the actual results 00237 // may well be a forest. Assume this node has maximum DFS-out number. 00238 DomForestNode* VirtualRoot = new DomForestNode(0, 0); 00239 maxpreorder.insert(std::make_pair((MachineBasicBlock*)0, ~0UL)); 00240 00241 // Populate a worklist with the registers 00242 std::vector<unsigned> worklist; 00243 worklist.reserve(regs.size()); 00244 for (std::map<unsigned, MachineBasicBlock*>::iterator I = regs.begin(), 00245 E = regs.end(); I != E; ++I) 00246 worklist.push_back(I->first); 00247 00248 // Sort the registers by the DFS-in number of their defining block 00249 PreorderSorter PS(preorder, MRI); 00250 std::sort(worklist.begin(), worklist.end(), PS); 00251 00252 // Create a "current parent" stack, and put the virtual root on top of it 00253 DomForestNode* CurrentParent = VirtualRoot; 00254 std::vector<DomForestNode*> stack; 00255 stack.push_back(VirtualRoot); 00256 00257 // Iterate over all the registers in the previously computed order 00258 for (std::vector<unsigned>::iterator I = worklist.begin(), E = worklist.end(); 00259 I != E; ++I) { 00260 unsigned pre = preorder[MRI.getVRegDef(*I)->getParent()]; 00261 MachineBasicBlock* parentBlock = CurrentParent->getReg() ? 00262 MRI.getVRegDef(CurrentParent->getReg())->getParent() : 00263 0; 00264 00265 // If the DFS-in number of the register is greater than the DFS-out number 00266 // of the current parent, repeatedly pop the parent stack until it isn't. 00267 while (pre > maxpreorder[parentBlock]) { 00268 stack.pop_back(); 00269 CurrentParent = stack.back(); 00270 00271 parentBlock = CurrentParent->getReg() ? 00272 MRI.getVRegDef(CurrentParent->getReg())->getParent() : 00273 0; 00274 } 00275 00276 // Now that we've found the appropriate parent, create a DomForestNode for 00277 // this register and attach it to the forest 00278 DomForestNode* child = new DomForestNode(*I, CurrentParent); 00279 00280 // Push this new node on the "current parent" stack 00281 stack.push_back(child); 00282 CurrentParent = child; 00283 } 00284 00285 // Return a vector containing the children of the virtual root node 00286 std::vector<DomForestNode*> ret; 00287 ret.insert(ret.end(), VirtualRoot->begin(), VirtualRoot->end()); 00288 return ret; 00289 } 00290 00291 /// isLiveIn - helper method that determines, from a regno, if a register 00292 /// is live into a block 00293 static bool isLiveIn(unsigned r, MachineBasicBlock* MBB, 00294 LiveIntervals& LI) { 00295 LiveInterval& I = LI.getOrCreateInterval(r); 00296 unsigned idx = LI.getMBBStartIdx(MBB); 00297 return I.liveBeforeAndAt(idx); 00298 } 00299 00300 /// isLiveOut - help method that determines, from a regno, if a register is 00301 /// live out of a block. 00302 static bool isLiveOut(unsigned r, MachineBasicBlock* MBB, 00303 LiveIntervals& LI) { 00304 for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(), 00305 E = MBB->succ_end(); PI != E; ++PI) 00306 if (isLiveIn(r, *PI, LI)) 00307 return true; 00308 00309 return false; 00310 } 00311 00312 /// interferes - checks for local interferences by scanning a block. The only 00313 /// trick parameter is 'mode' which tells it the relationship of the two 00314 /// registers. 0 - defined in the same block, 1 - first properly dominates 00315 /// second, 2 - second properly dominates first 00316 static bool interferes(unsigned a, unsigned b, MachineBasicBlock* scan, 00317 LiveIntervals& LV, unsigned mode) { 00318 MachineInstr* def = 0; 00319 MachineInstr* kill = 0; 00320 00321 // The code is still in SSA form at this point, so there is only one 00322 // definition per VReg. Thus we can safely use MRI->getVRegDef(). 00323 const MachineRegisterInfo* MRI = &scan->getParent()->getRegInfo(); 00324 00325 bool interference = false; 00326 00327 // Wallk the block, checking for interferences 00328 for (MachineBasicBlock::iterator MBI = scan->begin(), MBE = scan->end(); 00329 MBI != MBE; ++MBI) { 00330 MachineInstr* curr = MBI; 00331 00332 // Same defining block... 00333 if (mode == 0) { 00334 if (curr == MRI->getVRegDef(a)) { 00335 // If we find our first definition, save it 00336 if (!def) { 00337 def = curr; 00338 // If there's already an unkilled definition, then 00339 // this is an interference 00340 } else if (!kill) { 00341 interference = true; 00342 break; 00343 // If there's a definition followed by a KillInst, then 00344 // they can't interfere 00345 } else { 00346 interference = false; 00347 break; 00348 } 00349 // Symmetric with the above 00350 } else if (curr == MRI->getVRegDef(b)) { 00351 if (!def) { 00352 def = curr; 00353 } else if (!kill) { 00354 interference = true; 00355 break; 00356 } else { 00357 interference = false; 00358 break; 00359 } 00360 // Store KillInsts if they match up with the definition 00361 } else if (curr->killsRegister(a)) { 00362 if (def == MRI->getVRegDef(a)) { 00363 kill = curr; 00364 } else if (curr->killsRegister(b)) { 00365 if (def == MRI->getVRegDef(b)) { 00366 kill = curr; 00367 } 00368 } 00369 } 00370 // First properly dominates second... 00371 } else if (mode == 1) { 00372 if (curr == MRI->getVRegDef(b)) { 00373 // Definition of second without kill of first is an interference 00374 if (!kill) { 00375 interference = true; 00376 break; 00377 // Definition after a kill is a non-interference 00378 } else { 00379 interference = false; 00380 break; 00381 } 00382 // Save KillInsts of First 00383 } else if (curr->killsRegister(a)) { 00384 kill = curr; 00385 } 00386 // Symmetric with the above 00387 } else if (mode == 2) { 00388 if (curr == MRI->getVRegDef(a)) { 00389 if (!kill) { 00390 interference = true; 00391 break; 00392 } else { 00393 interference = false; 00394 break; 00395 } 00396 } else if (curr->killsRegister(b)) { 00397 kill = curr; 00398 } 00399 } 00400 } 00401 00402 return interference; 00403 } 00404 00405 /// processBlock - Determine how to break up PHIs in the current block. Each 00406 /// PHI is broken up by some combination of renaming its operands and inserting 00407 /// copies. This method is responsible for determining which operands receive 00408 /// which treatment. 00409 void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) { 00410 LiveIntervals& LI = getAnalysis<LiveIntervals>(); 00411 MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo(); 00412 00413 // Holds names that have been added to a set in any PHI within this block 00414 // before the current one. 00415 std::set<unsigned> ProcessedNames; 00416 00417 // Iterate over all the PHI nodes in this block 00418 MachineBasicBlock::iterator P = MBB->begin(); 00419 while (P != MBB->end() && P->getOpcode() == TargetInstrInfo::PHI) { 00420 unsigned DestReg = P->getOperand(0).getReg(); 00421 00422 00423 // Don't both doing PHI elimination for dead PHI's. 00424 if (P->registerDefIsDead(DestReg)) { 00425 ++P; 00426 continue; 00427 } 00428 00429 LiveInterval& PI = LI.getOrCreateInterval(DestReg); 00430 unsigned pIdx = LI.getDefIndex(LI.getInstructionIndex(P)); 00431 VNInfo* PVN = PI.getLiveRangeContaining(pIdx)->valno; 00432 PhiValueNumber.insert(std::make_pair(DestReg, PVN->id)); 00433 00434 // PHIUnion is the set of incoming registers to the PHI node that 00435 // are going to be renames rather than having copies inserted. This set 00436 // is refinded over the course of this function. UnionedBlocks is the set 00437 // of corresponding MBBs. 00438 std::map<unsigned, MachineBasicBlock*> PHIUnion; 00439 SmallPtrSet<MachineBasicBlock*, 8> UnionedBlocks; 00440 00441 // Iterate over the operands of the PHI node 00442 for (int i = P->getNumOperands() - 1; i >= 2; i-=2) { 00443 unsigned SrcReg = P->getOperand(i-1).getReg(); 00444 00445 // Don't need to try to coalesce a register with itself. 00446 if (SrcReg == DestReg) { 00447 ProcessedNames.insert(SrcReg); 00448 continue; 00449 } 00450 00451 // Check for trivial interferences via liveness information, allowing us 00452 // to avoid extra work later. Any registers that interfere cannot both 00453 // be in the renaming set, so choose one and add copies for it instead. 00454 // The conditions are: 00455 // 1) if the operand is live into the PHI node's block OR 00456 // 2) if the PHI node is live out of the operand's defining block OR 00457 // 3) if the operand is itself a PHI node and the original PHI is 00458 // live into the operand's defining block OR 00459 // 4) if the operand is already being renamed for another PHI node 00460 // in this block OR 00461 // 5) if any two operands are defined in the same block, insert copies 00462 // for one of them 00463 if (isLiveIn(SrcReg, P->getParent(), LI) || 00464 isLiveOut(P->getOperand(0).getReg(), 00465 MRI.getVRegDef(SrcReg)->getParent(), LI) || 00466 ( MRI.getVRegDef(SrcReg)->getOpcode() == TargetInstrInfo::PHI && 00467 isLiveIn(P->getOperand(0).getReg(), 00468 MRI.getVRegDef(SrcReg)->getParent(), LI) ) || 00469 ProcessedNames.count(SrcReg) || 00470 UnionedBlocks.count(MRI.getVRegDef(SrcReg)->getParent())) { 00471 00472 // Add a copy for the selected register 00473 MachineBasicBlock* From = P->getOperand(i).getMBB(); 00474 Waiting[From].insert(std::make_pair(SrcReg, DestReg)); 00475 UsedByAnother.insert(SrcReg); 00476 } else { 00477 // Otherwise, add it to the renaming set 00478 PHIUnion.insert(std::make_pair(SrcReg,P->getOperand(i).getMBB())); 00479 UnionedBlocks.insert(MRI.getVRegDef(SrcReg)->getParent()); 00480 } 00481 } 00482 00483 // Compute the dominator forest for the renaming set. This is a forest 00484 // where the nodes are the registers and the edges represent dominance 00485 // relations between the defining blocks of the registers 00486 std::vector<StrongPHIElimination::DomForestNode*> DF = 00487 computeDomForest(PHIUnion, MRI); 00488 00489 // Walk DomForest to resolve interferences at an inter-block level. This 00490 // will remove registers from the renaming set (and insert copies for them) 00491 // if interferences are found. 00492 std::vector<std::pair<unsigned, unsigned> > localInterferences; 00493 processPHIUnion(P, PHIUnion, DF, localInterferences); 00494 00495 // If one of the inputs is defined in the same block as the current PHI 00496 // then we need to check for a local interference between that input and 00497 // the PHI. 00498 for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(), 00499 E = PHIUnion.end(); I != E; ++I) 00500 if (MRI.getVRegDef(I->first)->getParent() == P->getParent()) 00501 localInterferences.push_back(std::make_pair(I->first, 00502 P->getOperand(0).getReg())); 00503 00504 // The dominator forest walk may have returned some register pairs whose 00505 // interference cannot be determined from dominator analysis. We now 00506 // examine these pairs for local interferences. 00507 for (std::vector<std::pair<unsigned, unsigned> >::iterator I = 00508 localInterferences.begin(), E = localInterferences.end(); I != E; ++I) { 00509 std::pair<unsigned, unsigned> p = *I; 00510 00511 MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>(); 00512 00513 // Determine the block we need to scan and the relationship between 00514 // the two registers 00515 MachineBasicBlock* scan = 0; 00516 unsigned mode = 0; 00517 if (MRI.getVRegDef(p.first)->getParent() == 00518 MRI.getVRegDef(p.second)->getParent()) { 00519 scan = MRI.getVRegDef(p.first)->getParent(); 00520 mode = 0; // Same block 00521 } else if (MDT.dominates(MRI.getVRegDef(p.first)->getParent(), 00522 MRI.getVRegDef(p.second)->getParent())) { 00523 scan = MRI.getVRegDef(p.second)->getParent(); 00524 mode = 1; // First dominates second 00525 } else { 00526 scan = MRI.getVRegDef(p.first)->getParent(); 00527 mode = 2; // Second dominates first 00528 } 00529 00530 // If there's an interference, we need to insert copies 00531 if (interferes(p.first, p.second, scan, LI, mode)) { 00532 // Insert copies for First 00533 for (int i = P->getNumOperands() - 1; i >= 2; i-=2) { 00534 if (P->getOperand(i-1).getReg() == p.first) { 00535 unsigned SrcReg = p.first; 00536 MachineBasicBlock* From = P->getOperand(i).getMBB(); 00537 00538 Waiting[From].insert(std::make_pair(SrcReg, 00539 P->getOperand(0).getReg())); 00540 UsedByAnother.insert(SrcReg); 00541 00542 PHIUnion.erase(SrcReg); 00543 } 00544 } 00545 } 00546 } 00547 00548 // Add the renaming set for this PHI node to our overall renaming information 00549 RenameSets.insert(std::make_pair(P->getOperand(0).getReg(), PHIUnion)); 00550 00551 // Remember which registers are already renamed, so that we don't try to 00552 // rename them for another PHI node in this block 00553 for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(), 00554 E = PHIUnion.end(); I != E; ++I) 00555 ProcessedNames.insert(I->first); 00556 00557 ++P; 00558 } 00559 } 00560 00561 /// processPHIUnion - Take a set of candidate registers to be coalesced when 00562 /// decomposing the PHI instruction. Use the DominanceForest to remove the ones 00563 /// that are known to interfere, and flag others that need to be checked for 00564 /// local interferences. 00565 void StrongPHIElimination::processPHIUnion(MachineInstr* Inst, 00566 std::map<unsigned, MachineBasicBlock*>& PHIUnion, 00567 std::vector<StrongPHIElimination::DomForestNode*>& DF, 00568 std::vector<std::pair<unsigned, unsigned> >& locals) { 00569 00570 std::vector<DomForestNode*> worklist(DF.begin(), DF.end()); 00571 SmallPtrSet<DomForestNode*, 4> visited; 00572 00573 // Code is still in SSA form, so we can use MRI::getVRegDef() 00574 MachineRegisterInfo& MRI = Inst->getParent()->getParent()->getRegInfo(); 00575 00576 LiveIntervals& LI = getAnalysis<LiveIntervals>(); 00577 unsigned DestReg = Inst->getOperand(0).getReg(); 00578 00579 // DF walk on the DomForest 00580 while (!worklist.empty()) { 00581 DomForestNode* DFNode = worklist.back(); 00582 00583 visited.insert(DFNode); 00584 00585 bool inserted = false; 00586 for (DomForestNode::iterator CI = DFNode->begin(), CE = DFNode->end(); 00587 CI != CE; ++CI) { 00588 DomForestNode* child = *CI; 00589 00590 // If the current node is live-out of the defining block of one of its 00591 // children, insert a copy for it. NOTE: The paper actually calls for 00592 // a more elaborate heuristic for determining whether to insert copies 00593 // for the child or the parent. In the interest of simplicity, we're 00594 // just always choosing the parent. 00595 if (isLiveOut(DFNode->getReg(), 00596 MRI.getVRegDef(child->getReg())->getParent(), LI)) { 00597 // Insert copies for parent 00598 for (int i = Inst->getNumOperands() - 1; i >= 2; i-=2) { 00599 if (Inst->getOperand(i-1).getReg() == DFNode->getReg()) { 00600 unsigned SrcReg = DFNode->getReg(); 00601 MachineBasicBlock* From = Inst->getOperand(i).getMBB(); 00602 00603 Waiting[From].insert(std::make_pair(SrcReg, DestReg)); 00604 UsedByAnother.insert(SrcReg); 00605 00606 PHIUnion.erase(SrcReg); 00607 } 00608 } 00609 00610 // If a node is live-in to the defining block of one of its children, but 00611 // not live-out, then we need to scan that block for local interferences. 00612 } else if (isLiveIn(DFNode->getReg(), 00613 MRI.getVRegDef(child->getReg())->getParent(), LI) || 00614 MRI.getVRegDef(DFNode->getReg())->getParent() == 00615 MRI.getVRegDef(child->getReg())->getParent()) { 00616 // Add (p, c) to possible local interferences 00617 locals.push_back(std::make_pair(DFNode->getReg(), child->getReg())); 00618 } 00619 00620 if (!visited.count(child)) { 00621 worklist.push_back(child); 00622 inserted = true; 00623 } 00624 } 00625 00626 if (!inserted) worklist.pop_back(); 00627 } 00628 } 00629 00630 /// ScheduleCopies - Insert copies into predecessor blocks, scheduling 00631 /// them properly so as to avoid the 'lost copy' and the 'virtual swap' 00632 /// problems. 00633 /// 00634 /// Based on "Practical Improvements to the Construction and Destruction 00635 /// of Static Single Assignment Form" by Briggs, et al. 00636 void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, 00637 std::set<unsigned>& pushed) { 00638 // FIXME: This function needs to update LiveIntervals 00639 std::map<unsigned, unsigned>& copy_set= Waiting[MBB]; 00640 00641 std::map<unsigned, unsigned> worklist; 00642 std::map<unsigned, unsigned> map; 00643 00644 // Setup worklist of initial copies 00645 for (std::map<unsigned, unsigned>::iterator I = copy_set.begin(), 00646 E = copy_set.end(); I != E; ) { 00647 map.insert(std::make_pair(I->first, I->first)); 00648 map.insert(std::make_pair(I->second, I->second)); 00649 00650 if (!UsedByAnother.count(I->second)) { 00651 worklist.insert(*I); 00652 00653 // Avoid iterator invalidation 00654 unsigned first = I->first; 00655 ++I; 00656 copy_set.erase(first); 00657 } else { 00658 ++I; 00659 } 00660 } 00661 00662 LiveIntervals& LI = getAnalysis<LiveIntervals>(); 00663 MachineFunction* MF = MBB->getParent(); 00664 MachineRegisterInfo& MRI = MF->getRegInfo(); 00665 const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); 00666 00667 SmallVector<std::pair<unsigned, MachineInstr*>, 4> InsertedPHIDests; 00668 00669 // Iterate over the worklist, inserting copies 00670 while (!worklist.empty() || !copy_set.empty()) { 00671 while (!worklist.empty()) { 00672 std::pair<unsigned, unsigned> curr = *worklist.begin(); 00673 worklist.erase(curr.first); 00674 00675 const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(curr.first); 00676 00677 if (isLiveOut(curr.second, MBB, LI)) { 00678 // Create a temporary 00679 unsigned t = MF->getRegInfo().createVirtualRegister(RC); 00680 00681 // Insert copy from curr.second to a temporary at 00682 // the Phi defining curr.second 00683 MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second); 00684 TII->copyRegToReg(*PI->getParent(), PI, t, 00685 curr.second, RC, RC); 00686 00687 // Push temporary on Stacks 00688 Stacks[curr.second].push_back(t); 00689 00690 // Insert curr.second in pushed 00691 pushed.insert(curr.second); 00692 00693 // Create a live interval for this temporary 00694 InsertedPHIDests.push_back(std::make_pair(t, --PI)); 00695 } 00696 00697 // Insert copy from map[curr.first] to curr.second 00698 TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second, 00699 map[curr.first], RC, RC); 00700 map[curr.first] = curr.second; 00701 00702 // Push this copy onto InsertedPHICopies so we can 00703 // update LiveIntervals with it. 00704 MachineBasicBlock::iterator MI = MBB->getFirstTerminator(); 00705 InsertedPHIDests.push_back(std::make_pair(curr.second, --MI)); 00706 00707 // If curr.first is a destination in copy_set... 00708 for (std::map<unsigned, unsigned>::iterator I = copy_set.begin(), 00709 E = copy_set.end(); I != E; ) 00710 if (curr.first == I->second) { 00711 std::pair<unsigned, unsigned> temp = *I; 00712 00713 // Avoid iterator invalidation 00714 ++I; 00715 copy_set.erase(temp.first); 00716 worklist.insert(temp); 00717 00718 break; 00719 } else { 00720 ++I; 00721 } 00722 } 00723 00724 if (!copy_set.empty()) { 00725 std::pair<unsigned, unsigned> curr = *copy_set.begin(); 00726 copy_set.erase(curr.first); 00727 worklist.insert(curr); 00728 00729 LiveInterval& I = LI.getInterval(curr.second); 00730 MachineBasicBlock::iterator term = MBB->getFirstTerminator(); 00731 unsigned endIdx = 0; 00732 if (term != MBB->end()) 00733 endIdx = LI.getInstructionIndex(term); 00734 else 00735 endIdx = LI.getMBBEndIdx(MBB); 00736 00737 if (I.liveAt(endIdx)) { 00738 const TargetRegisterClass *RC = 00739 MF->getRegInfo().getRegClass(curr.first); 00740 00741 // Insert a copy from dest to a new temporary t at the end of b 00742 unsigned t = MF->getRegInfo().createVirtualRegister(RC); 00743 TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), t, 00744 curr.second, RC, RC); 00745 map[curr.second] = t; 00746 00747 MachineBasicBlock::iterator TI = MBB->getFirstTerminator(); 00748 InsertedPHIDests.push_back(std::make_pair(t, --TI)); 00749 } 00750 } 00751 } 00752 00753 // Renumber the instructions so that we can perform the index computations 00754 // needed to create new live intervals. 00755 LI.computeNumbering(); 00756 00757 // For copies that we inserted at the ends of predecessors, we construct 00758 // live intervals. This is pretty easy, since we know that the destination 00759 // register cannot have be in live at that point previously. We just have 00760 // to make sure that, for registers that serve as inputs to more than one 00761 // PHI, we don't create multiple overlapping live intervals. 00762 std::set<unsigned> RegHandled; 00763 for (SmallVector<std::pair<unsigned, MachineInstr*>, 4>::iterator I = 00764 InsertedPHIDests.begin(), E = InsertedPHIDests.end(); I != E; ++I) { 00765 if (RegHandled.insert(I->first).second && 00766 !LI.getOrCreateInterval(I->first).liveAt( 00767 LI.getMBBEndIdx(I->second->getParent()))) 00768 LI.addLiveRangeToEndOfBlock(I->first, I->second); 00769 } 00770 } 00771 00772 /// InsertCopies - insert copies into MBB and all of its successors 00773 void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN, 00774 SmallPtrSet<MachineBasicBlock*, 16>& visited) { 00775 MachineBasicBlock* MBB = MDTN->getBlock(); 00776 visited.insert(MBB); 00777 00778 std::set<unsigned> pushed; 00779 00780 LiveIntervals& LI = getAnalysis<LiveIntervals>(); 00781 // Rewrite register uses from Stacks 00782 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); 00783 I != E; ++I) { 00784 if (I->getOpcode() == TargetInstrInfo::PHI) 00785 continue; 00786 00787 for (unsigned i = 0; i < I->getNumOperands(); ++i) 00788 if (I->getOperand(i).isRegister() && 00789 Stacks[I->getOperand(i).getReg()].size()) { 00790 // Remove the live range for the old vreg. 00791 LiveInterval& OldInt = LI.getInterval(I->getOperand(i).getReg()); 00792 LiveInterval::iterator OldLR = OldInt.FindLiveRangeContaining( 00793 LiveIntervals::getUseIndex(LI.getInstructionIndex(I))); 00794 if (OldLR != OldInt.end()) 00795 OldInt.removeRange(*OldLR, true); 00796 00797 // Change the register 00798 I->getOperand(i).setReg(Stacks[I->getOperand(i).getReg()].back()); 00799 00800 // Add a live range for the new vreg 00801 LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg()); 00802 VNInfo* FirstVN = *Int.vni_begin(); 00803 FirstVN->hasPHIKill = false; 00804 if (I->getOperand(i).isKill()) 00805 FirstVN->kills.push_back( 00806 LiveIntervals::getUseIndex(LI.getInstructionIndex(I))); 00807 00808 LiveRange LR (LI.getMBBStartIdx(I->getParent()), 00809 LiveIntervals::getUseIndex(LI.getInstructionIndex(I)), 00810 FirstVN); 00811 00812 Int.addRange(LR); 00813 } 00814 } 00815 00816 // Schedule the copies for this block 00817 ScheduleCopies(MBB, pushed); 00818 00819 // Recur down the dominator tree. 00820 for (MachineDomTreeNode::iterator I = MDTN->begin(), 00821 E = MDTN->end(); I != E; ++I) 00822 if (!visited.count((*I)->getBlock())) 00823 InsertCopies(*I, visited); 00824 00825 // As we exit this block, pop the names we pushed while processing it 00826 for (std::set<unsigned>::iterator I = pushed.begin(), 00827 E = pushed.end(); I != E; ++I) 00828 Stacks[*I].pop_back(); 00829 } 00830 00831 void StrongPHIElimination::mergeLiveIntervals(unsigned primary, 00832 unsigned secondary) { 00833 00834 LiveIntervals& LI = getAnalysis<LiveIntervals>(); 00835 LiveInterval& LHS = LI.getOrCreateInterval(primary); 00836 LiveInterval& RHS = LI.getOrCreateInterval(secondary); 00837 00838 LI.computeNumbering(); 00839 00840 SmallVector<VNInfo*, 4> VNSet (RHS.vni_begin(), RHS.vni_end()); 00841 DenseMap<VNInfo*, VNInfo*> VNMap; 00842 for (SmallVector<VNInfo*, 4>::iterator VI = VNSet.begin(), 00843 VE = VNSet.end(); VI != VE; ++VI) { 00844 VNInfo* NewVN = LHS.getNextValue((*VI)->def, 00845 (*VI)->copy, 00846 LI.getVNInfoAllocator()); 00847 LHS.MergeValueInAsValue(RHS, *VI, NewVN); 00848 RHS.removeValNo(*VI); 00849 } 00850 00851 if (RHS.empty()) 00852 LI.removeInterval(RHS.reg); 00853 } 00854 00855 bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { 00856 LiveIntervals& LI = getAnalysis<LiveIntervals>(); 00857 00858 // Compute DFS numbers of each block 00859 computeDFS(Fn); 00860 00861 // Determine which phi node operands need copies 00862 for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) 00863 if (!I->empty() && 00864 I->begin()->getOpcode() == TargetInstrInfo::PHI) 00865 processBlock(I); 00866 00867 // Break interferences where two different phis want to coalesce 00868 // in the same register. 00869 std::set<unsigned> seen; 00870 typedef std::map<unsigned, std::map<unsigned, MachineBasicBlock*> > 00871 RenameSetType; 00872 for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end(); 00873 I != E; ++I) { 00874 for (std::map<unsigned, MachineBasicBlock*>::iterator 00875 OI = I->second.begin(), OE = I->second.end(); OI != OE; ) { 00876 if (!seen.count(OI->first)) { 00877 seen.insert(OI->first); 00878 ++OI; 00879 } else { 00880 Waiting[OI->second].insert(std::make_pair(OI->first, I->first)); 00881 unsigned reg = OI->first; 00882 ++OI; 00883 I->second.erase(reg); 00884 } 00885 } 00886 } 00887 00888 // Insert copies 00889 // FIXME: This process should probably preserve LiveIntervals 00890 SmallPtrSet<MachineBasicBlock*, 16> visited; 00891 MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>(); 00892 InsertCopies(MDT.getRootNode(), visited); 00893 00894 // Perform renaming 00895 for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end(); 00896 I != E; ++I) 00897 while (I->second.size()) { 00898 std::map<unsigned, MachineBasicBlock*>::iterator SI = I->second.begin(); 00899 00900 if (SI->first != I->first) { 00901 mergeLiveIntervals(I->first, SI->first); 00902 Fn.getRegInfo().replaceRegWith(SI->first, I->first); 00903 00904 if (RenameSets.count(SI->first)) { 00905 I->second.insert(RenameSets[SI->first].begin(), 00906 RenameSets[SI->first].end()); 00907 RenameSets.erase(SI->first); 00908 } 00909 } 00910 00911 I->second.erase(SI->first); 00912 } 00913 00914 // FIXME: Insert last-minute copies 00915 00916 // Remove PHIs 00917 std::vector<MachineInstr*> phis; 00918 for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { 00919 for (MachineBasicBlock::iterator BI = I->begin(), BE = I->end(); 00920 BI != BE; ++BI) 00921 if (BI->getOpcode() == TargetInstrInfo::PHI) 00922 phis.push_back(BI); 00923 } 00924 00925 for (std::vector<MachineInstr*>::iterator I = phis.begin(), E = phis.end(); 00926 I != E; ) { 00927 MachineInstr* PInstr = *(I++); 00928 00929 // If this is a dead PHI node, then remove it from LiveIntervals. 00930 unsigned DestReg = PInstr->getOperand(0).getReg(); 00931 LiveInterval& PI = LI.getInterval(DestReg); 00932 if (PInstr->registerDefIsDead(DestReg)) { 00933 if (PI.containsOneValue()) { 00934 LI.removeInterval(DestReg); 00935 } else { 00936 unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr)); 00937 PI.removeRange(*PI.getLiveRangeContaining(idx), true); 00938 } 00939 } else { 00940 // Trim live intervals of input registers. They are no longer live into 00941 // this block if they died after the PHI. If they lived after it, don't 00942 // trim them because they might have other legitimate uses. 00943 for (unsigned i = 1; i < PInstr->getNumOperands(); i += 2) { 00944 unsigned reg = PInstr->getOperand(i).getReg(); 00945 00946 MachineBasicBlock* MBB = PInstr->getOperand(i+1).getMBB(); 00947 LiveInterval& InputI = LI.getInterval(reg); 00948 if (MBB != PInstr->getParent() && 00949 InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent())) && 00950 InputI.expiredAt(LI.getInstructionIndex(PInstr) + 00951 LiveIntervals::InstrSlots::NUM)) 00952 InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()), 00953 LI.getInstructionIndex(PInstr), 00954 true); 00955 } 00956 00957 // If the PHI is not dead, then the valno defined by the PHI 00958 // now has an unknown def. 00959 unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr)); 00960 const LiveRange* PLR = PI.getLiveRangeContaining(idx); 00961 PLR->valno->def = ~0U; 00962 LiveRange R (LI.getMBBStartIdx(PInstr->getParent()), 00963 PLR->start, PLR->valno); 00964 PI.addRange(R); 00965 } 00966 00967 LI.RemoveMachineInstrFromMaps(PInstr); 00968 PInstr->eraseFromParent(); 00969 } 00970 00971 LI.computeNumbering(); 00972 00973 return true; 00974 }