LLVM API Documentation
00001 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines a DAG pattern matching instruction selector for X86, 00011 // converting from a legalized dag to a X86 dag. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #define DEBUG_TYPE "x86-isel" 00016 #include "X86.h" 00017 #include "X86InstrBuilder.h" 00018 #include "X86ISelLowering.h" 00019 #include "X86MachineFunctionInfo.h" 00020 #include "X86RegisterInfo.h" 00021 #include "X86Subtarget.h" 00022 #include "X86TargetMachine.h" 00023 #include "llvm/GlobalValue.h" 00024 #include "llvm/Instructions.h" 00025 #include "llvm/Intrinsics.h" 00026 #include "llvm/Support/CFG.h" 00027 #include "llvm/Type.h" 00028 #include "llvm/CodeGen/MachineConstantPool.h" 00029 #include "llvm/CodeGen/MachineFunction.h" 00030 #include "llvm/CodeGen/MachineFrameInfo.h" 00031 #include "llvm/CodeGen/MachineInstrBuilder.h" 00032 #include "llvm/CodeGen/MachineRegisterInfo.h" 00033 #include "llvm/CodeGen/SelectionDAGISel.h" 00034 #include "llvm/Target/TargetMachine.h" 00035 #include "llvm/Target/TargetOptions.h" 00036 #include "llvm/Support/Compiler.h" 00037 #include "llvm/Support/Debug.h" 00038 #include "llvm/Support/MathExtras.h" 00039 #include "llvm/Support/Streams.h" 00040 #include "llvm/ADT/SmallPtrSet.h" 00041 #include "llvm/ADT/Statistic.h" 00042 using namespace llvm; 00043 00044 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); 00045 00046 //===----------------------------------------------------------------------===// 00047 // Pattern Matcher Implementation 00048 //===----------------------------------------------------------------------===// 00049 00050 namespace { 00051 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses 00052 /// SDValue's instead of register numbers for the leaves of the matched 00053 /// tree. 00054 struct X86ISelAddressMode { 00055 enum { 00056 RegBase, 00057 FrameIndexBase 00058 } BaseType; 00059 00060 struct { // This is really a union, discriminated by BaseType! 00061 SDValue Reg; 00062 int FrameIndex; 00063 } Base; 00064 00065 bool isRIPRel; // RIP as base? 00066 unsigned Scale; 00067 SDValue IndexReg; 00068 int32_t Disp; 00069 GlobalValue *GV; 00070 Constant *CP; 00071 const char *ES; 00072 int JT; 00073 unsigned Align; // CP alignment. 00074 00075 X86ISelAddressMode() 00076 : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0), 00077 GV(0), CP(0), ES(0), JT(-1), Align(0) { 00078 } 00079 void dump() { 00080 cerr << "X86ISelAddressMode " << this << "\n"; 00081 cerr << "Base.Reg "; 00082 if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump(); 00083 else cerr << "nul"; 00084 cerr << " Base.FrameIndex " << Base.FrameIndex << "\n"; 00085 cerr << "isRIPRel " << isRIPRel << " Scale" << Scale << "\n"; 00086 cerr << "IndexReg "; 00087 if (IndexReg.getNode() != 0) IndexReg.getNode()->dump(); 00088 else cerr << "nul"; 00089 cerr << " Disp " << Disp << "\n"; 00090 cerr << "GV "; if (GV) GV->dump(); 00091 else cerr << "nul"; 00092 cerr << " CP "; if (CP) CP->dump(); 00093 else cerr << "nul"; 00094 cerr << "\n"; 00095 cerr << "ES "; if (ES) cerr << ES; else cerr << "nul"; 00096 cerr << " JT" << JT << " Align" << Align << "\n"; 00097 } 00098 }; 00099 } 00100 00101 namespace { 00102 //===--------------------------------------------------------------------===// 00103 /// ISel - X86 specific code to select X86 machine instructions for 00104 /// SelectionDAG operations. 00105 /// 00106 class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel { 00107 /// TM - Keep a reference to X86TargetMachine. 00108 /// 00109 X86TargetMachine &TM; 00110 00111 /// X86Lowering - This object fully describes how to lower LLVM code to an 00112 /// X86-specific SelectionDAG. 00113 X86TargetLowering &X86Lowering; 00114 00115 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 00116 /// make the right decision when generating code for different targets. 00117 const X86Subtarget *Subtarget; 00118 00119 /// CurBB - Current BB being isel'd. 00120 /// 00121 MachineBasicBlock *CurBB; 00122 00123 /// OptForSize - If true, selector should try to optimize for code size 00124 /// instead of performance. 00125 bool OptForSize; 00126 00127 public: 00128 X86DAGToDAGISel(X86TargetMachine &tm, bool fast) 00129 : SelectionDAGISel(*tm.getTargetLowering(), fast), 00130 TM(tm), X86Lowering(*TM.getTargetLowering()), 00131 Subtarget(&TM.getSubtarget<X86Subtarget>()), 00132 OptForSize(false) {} 00133 00134 virtual const char *getPassName() const { 00135 return "X86 DAG->DAG Instruction Selection"; 00136 } 00137 00138 /// InstructionSelect - This callback is invoked by 00139 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. 00140 virtual void InstructionSelect(); 00141 00142 virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); 00143 00144 virtual 00145 bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const; 00146 00147 // Include the pieces autogenerated from the target description. 00148 #include "X86GenDAGISel.inc" 00149 00150 private: 00151 SDNode *Select(SDValue N); 00152 SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); 00153 00154 bool MatchAddress(SDValue N, X86ISelAddressMode &AM, 00155 bool isRoot = true, unsigned Depth = 0); 00156 bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM, 00157 bool isRoot, unsigned Depth); 00158 bool SelectAddr(SDValue Op, SDValue N, SDValue &Base, 00159 SDValue &Scale, SDValue &Index, SDValue &Disp); 00160 bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base, 00161 SDValue &Scale, SDValue &Index, SDValue &Disp); 00162 bool SelectScalarSSELoad(SDValue Op, SDValue Pred, 00163 SDValue N, SDValue &Base, SDValue &Scale, 00164 SDValue &Index, SDValue &Disp, 00165 SDValue &InChain, SDValue &OutChain); 00166 bool TryFoldLoad(SDValue P, SDValue N, 00167 SDValue &Base, SDValue &Scale, 00168 SDValue &Index, SDValue &Disp); 00169 void PreprocessForRMW(); 00170 void PreprocessForFPConvert(); 00171 00172 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 00173 /// inline asm expressions. 00174 virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, 00175 char ConstraintCode, 00176 std::vector<SDValue> &OutOps); 00177 00178 void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); 00179 00180 inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, 00181 SDValue &Scale, SDValue &Index, 00182 SDValue &Disp) { 00183 Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? 00184 CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) : 00185 AM.Base.Reg; 00186 Scale = getI8Imm(AM.Scale); 00187 Index = AM.IndexReg; 00188 // These are 32-bit even in 64-bit mode since RIP relative offset 00189 // is 32-bit. 00190 if (AM.GV) 00191 Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp); 00192 else if (AM.CP) 00193 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, 00194 AM.Align, AM.Disp); 00195 else if (AM.ES) 00196 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32); 00197 else if (AM.JT != -1) 00198 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32); 00199 else 00200 Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32); 00201 } 00202 00203 /// getI8Imm - Return a target constant with the specified value, of type 00204 /// i8. 00205 inline SDValue getI8Imm(unsigned Imm) { 00206 return CurDAG->getTargetConstant(Imm, MVT::i8); 00207 } 00208 00209 /// getI16Imm - Return a target constant with the specified value, of type 00210 /// i16. 00211 inline SDValue getI16Imm(unsigned Imm) { 00212 return CurDAG->getTargetConstant(Imm, MVT::i16); 00213 } 00214 00215 /// getI32Imm - Return a target constant with the specified value, of type 00216 /// i32. 00217 inline SDValue getI32Imm(unsigned Imm) { 00218 return CurDAG->getTargetConstant(Imm, MVT::i32); 00219 } 00220 00221 /// getGlobalBaseReg - Return an SDNode that returns the value of 00222 /// the global base register. Output instructions required to 00223 /// initialize the global base register, if necessary. 00224 /// 00225 SDNode *getGlobalBaseReg(); 00226 00227 /// getTruncateTo8Bit - return an SDNode that implements a subreg based 00228 /// truncate of the specified operand to i8. This can be done with tablegen, 00229 /// except that this code uses MVT::Flag in a tricky way that happens to 00230 /// improve scheduling in some cases. 00231 SDNode *getTruncateTo8Bit(SDValue N0); 00232 00233 #ifndef NDEBUG 00234 unsigned Indent; 00235 #endif 00236 }; 00237 } 00238 00239 /// findFlagUse - Return use of MVT::Flag value produced by the specified 00240 /// SDNode. 00241 /// 00242 static SDNode *findFlagUse(SDNode *N) { 00243 unsigned FlagResNo = N->getNumValues()-1; 00244 for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { 00245 SDNode *User = *I; 00246 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 00247 SDValue Op = User->getOperand(i); 00248 if (Op.getNode() == N && Op.getResNo() == FlagResNo) 00249 return User; 00250 } 00251 } 00252 return NULL; 00253 } 00254 00255 /// findNonImmUse - Return true by reference in "found" if "Use" is an 00256 /// non-immediate use of "Def". This function recursively traversing 00257 /// up the operand chain ignoring certain nodes. 00258 static void findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, 00259 SDNode *Root, bool &found, 00260 SmallPtrSet<SDNode*, 16> &Visited) { 00261 if (found || 00262 Use->getNodeId() < Def->getNodeId() || 00263 !Visited.insert(Use)) 00264 return; 00265 00266 for (unsigned i = 0, e = Use->getNumOperands(); !found && i != e; ++i) { 00267 SDNode *N = Use->getOperand(i).getNode(); 00268 if (N == Def) { 00269 if (Use == ImmedUse || Use == Root) 00270 continue; // We are not looking for immediate use. 00271 assert(N != Root); 00272 found = true; 00273 break; 00274 } 00275 00276 // Traverse up the operand chain. 00277 findNonImmUse(N, Def, ImmedUse, Root, found, Visited); 00278 } 00279 } 00280 00281 /// isNonImmUse - Start searching from Root up the DAG to check is Def can 00282 /// be reached. Return true if that's the case. However, ignore direct uses 00283 /// by ImmedUse (which would be U in the example illustrated in 00284 /// IsLegalAndProfitableToFold) and by Root (which can happen in the store 00285 /// case). 00286 /// FIXME: to be really generic, we should allow direct use by any node 00287 /// that is being folded. But realisticly since we only fold loads which 00288 /// have one non-chain use, we only need to watch out for load/op/store 00289 /// and load/op/cmp case where the root (store / cmp) may reach the load via 00290 /// its chain operand. 00291 static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) { 00292 SmallPtrSet<SDNode*, 16> Visited; 00293 bool found = false; 00294 findNonImmUse(Root, Def, ImmedUse, Root, found, Visited); 00295 return found; 00296 } 00297 00298 00299 bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, 00300 SDNode *Root) const { 00301 if (Fast) return false; 00302 00303 if (U == Root) 00304 switch (U->getOpcode()) { 00305 default: break; 00306 case ISD::ADD: 00307 case ISD::ADDC: 00308 case ISD::ADDE: 00309 case ISD::AND: 00310 case ISD::OR: 00311 case ISD::XOR: { 00312 // If the other operand is a 8-bit immediate we should fold the immediate 00313 // instead. This reduces code size. 00314 // e.g. 00315 // movl 4(%esp), %eax 00316 // addl $4, %eax 00317 // vs. 00318 // movl $4, %eax 00319 // addl 4(%esp), %eax 00320 // The former is 2 bytes shorter. In case where the increment is 1, then 00321 // the saving can be 4 bytes (by using incl %eax). 00322 ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(U->getOperand(1)); 00323 if (Imm) { 00324 if (U->getValueType(0) == MVT::i64) { 00325 if ((int32_t)Imm->getZExtValue() == (int64_t)Imm->getZExtValue()) 00326 return false; 00327 } else { 00328 if ((int8_t)Imm->getZExtValue() == (int64_t)Imm->getZExtValue()) 00329 return false; 00330 } 00331 } 00332 } 00333 } 00334 00335 // If Root use can somehow reach N through a path that that doesn't contain 00336 // U then folding N would create a cycle. e.g. In the following 00337 // diagram, Root can reach N through X. If N is folded into into Root, then 00338 // X is both a predecessor and a successor of U. 00339 // 00340 // [N*] // 00341 // ^ ^ // 00342 // / \ // 00343 // [U*] [X]? // 00344 // ^ ^ // 00345 // \ / // 00346 // \ / // 00347 // [Root*] // 00348 // 00349 // * indicates nodes to be folded together. 00350 // 00351 // If Root produces a flag, then it gets (even more) interesting. Since it 00352 // will be "glued" together with its flag use in the scheduler, we need to 00353 // check if it might reach N. 00354 // 00355 // [N*] // 00356 // ^ ^ // 00357 // / \ // 00358 // [U*] [X]? // 00359 // ^ ^ // 00360 // \ \ // 00361 // \ | // 00362 // [Root*] | // 00363 // ^ | // 00364 // f | // 00365 // | / // 00366 // [Y] / // 00367 // ^ / // 00368 // f / // 00369 // | / // 00370 // [FU] // 00371 // 00372 // If FU (flag use) indirectly reaches N (the load), and Root folds N 00373 // (call it Fold), then X is a predecessor of FU and a successor of 00374 // Fold. But since Fold and FU are flagged together, this will create 00375 // a cycle in the scheduling graph. 00376 00377 MVT VT = Root->getValueType(Root->getNumValues()-1); 00378 while (VT == MVT::Flag) { 00379 SDNode *FU = findFlagUse(Root); 00380 if (FU == NULL) 00381 break; 00382 Root = FU; 00383 VT = Root->getValueType(Root->getNumValues()-1); 00384 } 00385 00386 return !isNonImmUse(Root, N, U); 00387 } 00388 00389 /// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand 00390 /// and move load below the TokenFactor. Replace store's chain operand with 00391 /// load's chain result. 00392 static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, 00393 SDValue Store, SDValue TF) { 00394 SmallVector<SDValue, 4> Ops; 00395 for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) 00396 if (Load.getNode() == TF.getOperand(i).getNode()) 00397 Ops.push_back(Load.getOperand(0)); 00398 else 00399 Ops.push_back(TF.getOperand(i)); 00400 CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); 00401 CurDAG->UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2)); 00402 CurDAG->UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1), 00403 Store.getOperand(2), Store.getOperand(3)); 00404 } 00405 00406 /// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. 00407 /// 00408 static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, 00409 SDValue &Load) { 00410 if (N.getOpcode() == ISD::BIT_CONVERT) 00411 N = N.getOperand(0); 00412 00413 LoadSDNode *LD = dyn_cast<LoadSDNode>(N); 00414 if (!LD || LD->isVolatile()) 00415 return false; 00416 if (LD->getAddressingMode() != ISD::UNINDEXED) 00417 return false; 00418 00419 ISD::LoadExtType ExtType = LD->getExtensionType(); 00420 if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD) 00421 return false; 00422 00423 if (N.hasOneUse() && 00424 N.getOperand(1) == Address && 00425 N.getNode()->isOperandOf(Chain.getNode())) { 00426 Load = N; 00427 return true; 00428 } 00429 return false; 00430 } 00431 00432 /// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain 00433 /// operand and move load below the call's chain operand. 00434 static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load, 00435 SDValue Call, SDValue Chain) { 00436 SmallVector<SDValue, 8> Ops; 00437 for (unsigned i = 0, e = Chain.getNode()->getNumOperands(); i != e; ++i) 00438 if (Load.getNode() == Chain.getOperand(i).getNode()) 00439 Ops.push_back(Load.getOperand(0)); 00440 else 00441 Ops.push_back(Chain.getOperand(i)); 00442 CurDAG->UpdateNodeOperands(Chain, &Ops[0], Ops.size()); 00443 CurDAG->UpdateNodeOperands(Load, Call.getOperand(0), 00444 Load.getOperand(1), Load.getOperand(2)); 00445 Ops.clear(); 00446 Ops.push_back(SDValue(Load.getNode(), 1)); 00447 for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i) 00448 Ops.push_back(Call.getOperand(i)); 00449 CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size()); 00450 } 00451 00452 /// isCalleeLoad - Return true if call address is a load and it can be 00453 /// moved below CALLSEQ_START and the chains leading up to the call. 00454 /// Return the CALLSEQ_START by reference as a second output. 00455 static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { 00456 if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) 00457 return false; 00458 LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode()); 00459 if (!LD || 00460 LD->isVolatile() || 00461 LD->getAddressingMode() != ISD::UNINDEXED || 00462 LD->getExtensionType() != ISD::NON_EXTLOAD) 00463 return false; 00464 00465 // Now let's find the callseq_start. 00466 while (Chain.getOpcode() != ISD::CALLSEQ_START) { 00467 if (!Chain.hasOneUse()) 00468 return false; 00469 Chain = Chain.getOperand(0); 00470 } 00471 return Chain.getOperand(0).getNode() == Callee.getNode(); 00472 } 00473 00474 00475 /// PreprocessForRMW - Preprocess the DAG to make instruction selection better. 00476 /// This is only run if not in -fast mode (aka -O0). 00477 /// This allows the instruction selector to pick more read-modify-write 00478 /// instructions. This is a common case: 00479 /// 00480 /// [Load chain] 00481 /// ^ 00482 /// | 00483 /// [Load] 00484 /// ^ ^ 00485 /// | | 00486 /// / \- 00487 /// / | 00488 /// [TokenFactor] [Op] 00489 /// ^ ^ 00490 /// | | 00491 /// \ / 00492 /// \ / 00493 /// [Store] 00494 /// 00495 /// The fact the store's chain operand != load's chain will prevent the 00496 /// (store (op (load))) instruction from being selected. We can transform it to: 00497 /// 00498 /// [Load chain] 00499 /// ^ 00500 /// | 00501 /// [TokenFactor] 00502 /// ^ 00503 /// | 00504 /// [Load] 00505 /// ^ ^ 00506 /// | | 00507 /// | \- 00508 /// | | 00509 /// | [Op] 00510 /// | ^ 00511 /// | | 00512 /// \ / 00513 /// \ / 00514 /// [Store] 00515 void X86DAGToDAGISel::PreprocessForRMW() { 00516 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 00517 E = CurDAG->allnodes_end(); I != E; ++I) { 00518 if (I->getOpcode() == X86ISD::CALL) { 00519 /// Also try moving call address load from outside callseq_start to just 00520 /// before the call to allow it to be folded. 00521 /// 00522 /// [Load chain] 00523 /// ^ 00524 /// | 00525 /// [Load] 00526 /// ^ ^ 00527 /// | | 00528 /// / \-- 00529 /// / | 00530 ///[CALLSEQ_START] | 00531 /// ^ | 00532 /// | | 00533 /// [LOAD/C2Reg] | 00534 /// | | 00535 /// \ / 00536 /// \ / 00537 /// [CALL] 00538 SDValue Chain = I->getOperand(0); 00539 SDValue Load = I->getOperand(1); 00540 if (!isCalleeLoad(Load, Chain)) 00541 continue; 00542 MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain); 00543 ++NumLoadMoved; 00544 continue; 00545 } 00546 00547 if (!ISD::isNON_TRUNCStore(I)) 00548 continue; 00549 SDValue Chain = I->getOperand(0); 00550 00551 if (Chain.getNode()->getOpcode() != ISD::TokenFactor) 00552 continue; 00553 00554 SDValue N1 = I->getOperand(1); 00555 SDValue N2 = I->getOperand(2); 00556 if ((N1.getValueType().isFloatingPoint() && 00557 !N1.getValueType().isVector()) || 00558 !N1.hasOneUse()) 00559 continue; 00560 00561 bool RModW = false; 00562 SDValue Load; 00563 unsigned Opcode = N1.getNode()->getOpcode(); 00564 switch (Opcode) { 00565 case ISD::ADD: 00566 case ISD::MUL: 00567 case ISD::AND: 00568 case ISD::OR: 00569 case ISD::XOR: 00570 case ISD::ADDC: 00571 case ISD::ADDE: 00572 case ISD::VECTOR_SHUFFLE: { 00573 SDValue N10 = N1.getOperand(0); 00574 SDValue N11 = N1.getOperand(1); 00575 RModW = isRMWLoad(N10, Chain, N2, Load); 00576 if (!RModW) 00577 RModW = isRMWLoad(N11, Chain, N2, Load); 00578 break; 00579 } 00580 case ISD::SUB: 00581 case ISD::SHL: 00582 case ISD::SRA: 00583 case ISD::SRL: 00584 case ISD::ROTL: 00585 case ISD::ROTR: 00586 case ISD::SUBC: 00587 case ISD::SUBE: 00588 case X86ISD::SHLD: 00589 case X86ISD::SHRD: { 00590 SDValue N10 = N1.getOperand(0); 00591 RModW = isRMWLoad(N10, Chain, N2, Load); 00592 break; 00593 } 00594 } 00595 00596 if (RModW) { 00597 MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain); 00598 ++NumLoadMoved; 00599 } 00600 } 00601 } 00602 00603 00604 /// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend 00605 /// nodes that target the FP stack to be store and load to the stack. This is a 00606 /// gross hack. We would like to simply mark these as being illegal, but when 00607 /// we do that, legalize produces these when it expands calls, then expands 00608 /// these in the same legalize pass. We would like dag combine to be able to 00609 /// hack on these between the call expansion and the node legalization. As such 00610 /// this pass basically does "really late" legalization of these inline with the 00611 /// X86 isel pass. 00612 void X86DAGToDAGISel::PreprocessForFPConvert() { 00613 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 00614 E = CurDAG->allnodes_end(); I != E; ) { 00615 SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. 00616 if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) 00617 continue; 00618 00619 // If the source and destination are SSE registers, then this is a legal 00620 // conversion that should not be lowered. 00621 MVT SrcVT = N->getOperand(0).getValueType(); 00622 MVT DstVT = N->getValueType(0); 00623 bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); 00624 bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); 00625 if (SrcIsSSE && DstIsSSE) 00626 continue; 00627 00628 if (!SrcIsSSE && !DstIsSSE) { 00629 // If this is an FPStack extension, it is a noop. 00630 if (N->getOpcode() == ISD::FP_EXTEND) 00631 continue; 00632 // If this is a value-preserving FPStack truncation, it is a noop. 00633 if (N->getConstantOperandVal(1)) 00634 continue; 00635 } 00636 00637 // Here we could have an FP stack truncation or an FPStack <-> SSE convert. 00638 // FPStack has extload and truncstore. SSE can fold direct loads into other 00639 // operations. Based on this, decide what we want to do. 00640 MVT MemVT; 00641 if (N->getOpcode() == ISD::FP_ROUND) 00642 MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. 00643 else 00644 MemVT = SrcIsSSE ? SrcVT : DstVT; 00645 00646 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); 00647 00648 // FIXME: optimize the case where the src/dest is a load or store? 00649 SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), 00650 N->getOperand(0), 00651 MemTmp, NULL, 0, MemVT); 00652 SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, DstVT, Store, MemTmp, 00653 NULL, 0, MemVT); 00654 00655 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the 00656 // extload we created. This will cause general havok on the dag because 00657 // anything below the conversion could be folded into other existing nodes. 00658 // To avoid invalidating 'I', back it up to the convert node. 00659 --I; 00660 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 00661 00662 // Now that we did that, the node is dead. Increment the iterator to the 00663 // next node to process, then delete N. 00664 ++I; 00665 CurDAG->DeleteNode(N); 00666 } 00667 } 00668 00669 /// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel 00670 /// when it has created a SelectionDAG for us to codegen. 00671 void X86DAGToDAGISel::InstructionSelect() { 00672 CurBB = BB; // BB can change as result of isel. 00673 const Function *F = CurDAG->getMachineFunction().getFunction(); 00674 OptForSize = F->hasFnAttr(Attribute::OptimizeForSize); 00675 00676 DEBUG(BB->dump()); 00677 if (!Fast) 00678 PreprocessForRMW(); 00679 00680 // FIXME: This should only happen when not -fast. 00681 PreprocessForFPConvert(); 00682 00683 // Codegen the basic block. 00684 #ifndef NDEBUG 00685 DOUT << "===== Instruction selection begins:\n"; 00686 Indent = 0; 00687 #endif 00688 SelectRoot(*CurDAG); 00689 #ifndef NDEBUG 00690 DOUT << "===== Instruction selection ends:\n"; 00691 #endif 00692 00693 CurDAG->RemoveDeadNodes(); 00694 } 00695 00696 /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in 00697 /// the main function. 00698 void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, 00699 MachineFrameInfo *MFI) { 00700 const TargetInstrInfo *TII = TM.getInstrInfo(); 00701 if (Subtarget->isTargetCygMing()) 00702 BuildMI(BB, TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); 00703 } 00704 00705 void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { 00706 // If this is main, emit special code for main. 00707 MachineBasicBlock *BB = MF.begin(); 00708 if (Fn.hasExternalLinkage() && Fn.getName() == "main") 00709 EmitSpecialCodeForMain(BB, MF.getFrameInfo()); 00710 } 00711 00712 /// MatchAddress - Add the specified node to the specified addressing mode, 00713 /// returning true if it cannot be done. This just pattern matches for the 00714 /// addressing mode. 00715 bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, 00716 bool isRoot, unsigned Depth) { 00717 bool is64Bit = Subtarget->is64Bit(); 00718 DOUT << "MatchAddress: "; DEBUG(AM.dump()); 00719 // Limit recursion. 00720 if (Depth > 5) 00721 return MatchAddressBase(N, AM, isRoot, Depth); 00722 00723 // RIP relative addressing: %rip + 32-bit displacement! 00724 if (AM.isRIPRel) { 00725 if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) { 00726 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); 00727 if (!is64Bit || isInt32(AM.Disp + Val)) { 00728 AM.Disp += Val; 00729 return false; 00730 } 00731 } 00732 return true; 00733 } 00734 00735 switch (N.getOpcode()) { 00736 default: break; 00737 case ISD::Constant: { 00738 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); 00739 if (!is64Bit || isInt32(AM.Disp + Val)) { 00740 AM.Disp += Val; 00741 return false; 00742 } 00743 break; 00744 } 00745 00746 case X86ISD::Wrapper: { 00747 DOUT << "Wrapper: 64bit " << is64Bit; 00748 DOUT << " AM "; DEBUG(AM.dump()); DOUT << "\n"; 00749 // Under X86-64 non-small code model, GV (and friends) are 64-bits. 00750 // Also, base and index reg must be 0 in order to use rip as base. 00751 if (is64Bit && (TM.getCodeModel() != CodeModel::Small || 00752 AM.Base.Reg.getNode() || AM.IndexReg.getNode())) 00753 break; 00754 if (AM.GV != 0 || AM.CP != 0 || AM.ES != 0 || AM.JT != -1) 00755 break; 00756 // If value is available in a register both base and index components have 00757 // been picked, we can't fit the result available in the register in the 00758 // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement. 00759 { 00760 SDValue N0 = N.getOperand(0); 00761 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 00762 uint64_t Offset = G->getOffset(); 00763 if (!is64Bit || isInt32(AM.Disp + Offset)) { 00764 GlobalValue *GV = G->getGlobal(); 00765 AM.GV = GV; 00766 AM.Disp += Offset; 00767 AM.isRIPRel = TM.symbolicAddressesAreRIPRel(); 00768 return false; 00769 } 00770 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 00771 uint64_t Offset = CP->getOffset(); 00772 if (!is64Bit || isInt32(AM.Disp + Offset)) { 00773 AM.CP = CP->getConstVal(); 00774 AM.Align = CP->getAlignment(); 00775 AM.Disp += Offset; 00776 AM.isRIPRel = TM.symbolicAddressesAreRIPRel(); 00777 return false; 00778 } 00779 } else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) { 00780 AM.ES = S->getSymbol(); 00781 AM.isRIPRel = TM.symbolicAddressesAreRIPRel(); 00782 return false; 00783 } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 00784 AM.JT = J->getIndex(); 00785 AM.isRIPRel = TM.symbolicAddressesAreRIPRel(); 00786 return false; 00787 } 00788 } 00789 break; 00790 } 00791 00792 case ISD::FrameIndex: 00793 if (AM.BaseType == X86ISelAddressMode::RegBase 00794 && AM.Base.Reg.getNode() == 0) { 00795 AM.BaseType = X86ISelAddressMode::FrameIndexBase; 00796 AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); 00797 return false; 00798 } 00799 break; 00800 00801 case ISD::SHL: 00802 if (AM.IndexReg.getNode() != 0 || AM.Scale != 1 || AM.isRIPRel) 00803 break; 00804 00805 if (ConstantSDNode 00806 *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) { 00807 unsigned Val = CN->getZExtValue(); 00808 if (Val == 1 || Val == 2 || Val == 3) { 00809 AM.Scale = 1 << Val; 00810 SDValue ShVal = N.getNode()->getOperand(0); 00811 00812 // Okay, we know that we have a scale by now. However, if the scaled 00813 // value is an add of something and a constant, we can fold the 00814 // constant into the disp field here. 00815 if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() && 00816 isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) { 00817 AM.IndexReg = ShVal.getNode()->getOperand(0); 00818 ConstantSDNode *AddVal = 00819 cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); 00820 uint64_t Disp = AM.Disp + (AddVal->getZExtValue() << Val); 00821 if (!is64Bit || isInt32(Disp)) 00822 AM.Disp = Disp; 00823 else 00824 AM.IndexReg = ShVal; 00825 } else { 00826 AM.IndexReg = ShVal; 00827 } 00828 return false; 00829 } 00830 break; 00831 } 00832 00833 case ISD::SMUL_LOHI: 00834 case ISD::UMUL_LOHI: 00835 // A mul_lohi where we need the low part can be folded as a plain multiply. 00836 if (N.getResNo() != 0) break; 00837 // FALL THROUGH 00838 case ISD::MUL: 00839 // X*[3,5,9] -> X+X*[2,4,8] 00840 if (AM.BaseType == X86ISelAddressMode::RegBase && 00841 AM.Base.Reg.getNode() == 0 && 00842 AM.IndexReg.getNode() == 0 && 00843 !AM.isRIPRel) { 00844 if (ConstantSDNode 00845 *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) 00846 if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || 00847 CN->getZExtValue() == 9) { 00848 AM.Scale = unsigned(CN->getZExtValue())-1; 00849 00850 SDValue MulVal = N.getNode()->getOperand(0); 00851 SDValue Reg; 00852 00853 // Okay, we know that we have a scale by now. However, if the scaled 00854 // value is an add of something and a constant, we can fold the 00855 // constant into the disp field here. 00856 if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && 00857 isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) { 00858 Reg = MulVal.getNode()->getOperand(0); 00859 ConstantSDNode *AddVal = 00860 cast<ConstantSDNode>(MulVal.getNode()->getOperand(1)); 00861 uint64_t Disp = AM.Disp + AddVal->getZExtValue() * 00862 CN->getZExtValue(); 00863 if (!is64Bit || isInt32(Disp)) 00864 AM.Disp = Disp; 00865 else 00866 Reg = N.getNode()->getOperand(0); 00867 } else { 00868 Reg = N.getNode()->getOperand(0); 00869 } 00870 00871 AM.IndexReg = AM.Base.Reg = Reg; 00872 return false; 00873 } 00874 } 00875 break; 00876 00877 case ISD::ADD: 00878 { 00879 X86ISelAddressMode Backup = AM; 00880 if (!MatchAddress(N.