LLVM API Documentation

X86ISelDAGToDAG.cpp

Go to the documentation of this file.
00001 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines a DAG pattern matching instruction selector for X86,
00011 // converting from a legalized dag to a X86 dag.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #define DEBUG_TYPE "x86-isel"
00016 #include "X86.h"
00017 #include "X86InstrBuilder.h"
00018 #include "X86ISelLowering.h"
00019 #include "X86MachineFunctionInfo.h"
00020 #include "X86RegisterInfo.h"
00021 #include "X86Subtarget.h"
00022 #include "X86TargetMachine.h"
00023 #include "llvm/GlobalValue.h"
00024 #include "llvm/Instructions.h"
00025 #include "llvm/Intrinsics.h"
00026 #include "llvm/Support/CFG.h"
00027 #include "llvm/Type.h"
00028 #include "llvm/CodeGen/MachineConstantPool.h"
00029 #include "llvm/CodeGen/MachineFunction.h"
00030 #include "llvm/CodeGen/MachineFrameInfo.h"
00031 #include "llvm/CodeGen/MachineInstrBuilder.h"
00032 #include "llvm/CodeGen/MachineRegisterInfo.h"
00033 #include "llvm/CodeGen/SelectionDAGISel.h"
00034 #include "llvm/Target/TargetMachine.h"
00035 #include "llvm/Target/TargetOptions.h"
00036 #include "llvm/Support/Compiler.h"
00037 #include "llvm/Support/Debug.h"
00038 #include "llvm/Support/MathExtras.h"
00039 #include "llvm/Support/Streams.h"
00040 #include "llvm/ADT/SmallPtrSet.h"
00041 #include "llvm/ADT/Statistic.h"
00042 using namespace llvm;
00043 
00044 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
00045 
00046 //===----------------------------------------------------------------------===//
00047 //                      Pattern Matcher Implementation
00048 //===----------------------------------------------------------------------===//
00049 
00050 namespace {
00051   /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
00052   /// SDValue's instead of register numbers for the leaves of the matched
00053   /// tree.
00054   struct X86ISelAddressMode {
00055     enum {
00056       RegBase,
00057       FrameIndexBase
00058     } BaseType;
00059 
00060     struct {            // This is really a union, discriminated by BaseType!
00061       SDValue Reg;
00062       int FrameIndex;
00063     } Base;
00064 
00065     bool isRIPRel;     // RIP as base?
00066     unsigned Scale;
00067     SDValue IndexReg; 
00068     int32_t Disp;
00069     GlobalValue *GV;
00070     Constant *CP;
00071     const char *ES;
00072     int JT;
00073     unsigned Align;    // CP alignment.
00074 
00075     X86ISelAddressMode()
00076       : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
00077         GV(0), CP(0), ES(0), JT(-1), Align(0) {
00078     }
00079     void dump() {
00080       cerr << "X86ISelAddressMode " << this << "\n";
00081       cerr << "Base.Reg ";
00082               if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump(); 
00083               else cerr << "nul";
00084       cerr << " Base.FrameIndex " << Base.FrameIndex << "\n";
00085       cerr << "isRIPRel " << isRIPRel << " Scale" << Scale << "\n";
00086       cerr << "IndexReg ";
00087               if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
00088               else cerr << "nul"; 
00089       cerr << " Disp " << Disp << "\n";
00090       cerr << "GV "; if (GV) GV->dump(); 
00091                      else cerr << "nul";
00092       cerr << " CP "; if (CP) CP->dump(); 
00093                      else cerr << "nul";
00094       cerr << "\n";
00095       cerr << "ES "; if (ES) cerr << ES; else cerr << "nul";
00096       cerr  << " JT" << JT << " Align" << Align << "\n";
00097     }
00098   };
00099 }
00100 
00101 namespace {
00102   //===--------------------------------------------------------------------===//
00103   /// ISel - X86 specific code to select X86 machine instructions for
00104   /// SelectionDAG operations.
00105   ///
00106   class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
00107     /// TM - Keep a reference to X86TargetMachine.
00108     ///
00109     X86TargetMachine &TM;
00110 
00111     /// X86Lowering - This object fully describes how to lower LLVM code to an
00112     /// X86-specific SelectionDAG.
00113     X86TargetLowering &X86Lowering;
00114 
00115     /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
00116     /// make the right decision when generating code for different targets.
00117     const X86Subtarget *Subtarget;
00118 
00119     /// CurBB - Current BB being isel'd.
00120     ///
00121     MachineBasicBlock *CurBB;
00122 
00123     /// OptForSize - If true, selector should try to optimize for code size
00124     /// instead of performance.
00125     bool OptForSize;
00126 
00127   public:
00128     X86DAGToDAGISel(X86TargetMachine &tm, bool fast)
00129       : SelectionDAGISel(*tm.getTargetLowering(), fast),
00130         TM(tm), X86Lowering(*TM.getTargetLowering()),
00131         Subtarget(&TM.getSubtarget<X86Subtarget>()),
00132         OptForSize(false) {}
00133 
00134     virtual const char *getPassName() const {
00135       return "X86 DAG->DAG Instruction Selection";
00136     }
00137 
00138     /// InstructionSelect - This callback is invoked by
00139     /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
00140     virtual void InstructionSelect();
00141 
00142     virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
00143 
00144     virtual
00145       bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const;
00146 
00147 // Include the pieces autogenerated from the target description.
00148 #include "X86GenDAGISel.inc"
00149 
00150   private:
00151     SDNode *Select(SDValue N);
00152     SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
00153 
00154     bool MatchAddress(SDValue N, X86ISelAddressMode &AM,
00155                       bool isRoot = true, unsigned Depth = 0);
00156     bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM,
00157                           bool isRoot, unsigned Depth);
00158     bool SelectAddr(SDValue Op, SDValue N, SDValue &Base,
00159                     SDValue &Scale, SDValue &Index, SDValue &Disp);
00160     bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base,
00161                        SDValue &Scale, SDValue &Index, SDValue &Disp);
00162     bool SelectScalarSSELoad(SDValue Op, SDValue Pred,
00163                              SDValue N, SDValue &Base, SDValue &Scale,
00164                              SDValue &Index, SDValue &Disp,
00165                              SDValue &InChain, SDValue &OutChain);
00166     bool TryFoldLoad(SDValue P, SDValue N,
00167                      SDValue &Base, SDValue &Scale,
00168                      SDValue &Index, SDValue &Disp);
00169     void PreprocessForRMW();
00170     void PreprocessForFPConvert();
00171 
00172     /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
00173     /// inline asm expressions.
00174     virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
00175                                               char ConstraintCode,
00176                                               std::vector<SDValue> &OutOps);
00177     
00178     void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
00179 
00180     inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, 
00181                                    SDValue &Scale, SDValue &Index,
00182                                    SDValue &Disp) {
00183       Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
00184         CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
00185         AM.Base.Reg;
00186       Scale = getI8Imm(AM.Scale);
00187       Index = AM.IndexReg;
00188       // These are 32-bit even in 64-bit mode since RIP relative offset
00189       // is 32-bit.
00190       if (AM.GV)
00191         Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
00192       else if (AM.CP)
00193         Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
00194                                              AM.Align, AM.Disp);
00195       else if (AM.ES)
00196         Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
00197       else if (AM.JT != -1)
00198         Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
00199       else
00200         Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
00201     }
00202 
00203     /// getI8Imm - Return a target constant with the specified value, of type
00204     /// i8.
00205     inline SDValue getI8Imm(unsigned Imm) {
00206       return CurDAG->getTargetConstant(Imm, MVT::i8);
00207     }
00208 
00209     /// getI16Imm - Return a target constant with the specified value, of type
00210     /// i16.
00211     inline SDValue getI16Imm(unsigned Imm) {
00212       return CurDAG->getTargetConstant(Imm, MVT::i16);
00213     }
00214 
00215     /// getI32Imm - Return a target constant with the specified value, of type
00216     /// i32.
00217     inline SDValue getI32Imm(unsigned Imm) {
00218       return CurDAG->getTargetConstant(Imm, MVT::i32);
00219     }
00220 
00221     /// getGlobalBaseReg - Return an SDNode that returns the value of
00222     /// the global base register. Output instructions required to
00223     /// initialize the global base register, if necessary.
00224     ///
00225     SDNode *getGlobalBaseReg();
00226 
00227     /// getTruncateTo8Bit - return an SDNode that implements a subreg based
00228     /// truncate of the specified operand to i8. This can be done with tablegen,
00229     /// except that this code uses MVT::Flag in a tricky way that happens to
00230     /// improve scheduling in some cases.
00231     SDNode *getTruncateTo8Bit(SDValue N0);
00232 
00233 #ifndef NDEBUG
00234     unsigned Indent;
00235 #endif
00236   };
00237 }
00238 
00239 /// findFlagUse - Return use of MVT::Flag value produced by the specified
00240 /// SDNode.
00241 ///
00242 static SDNode *findFlagUse(SDNode *N) {
00243   unsigned FlagResNo = N->getNumValues()-1;
00244   for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
00245     SDNode *User = *I;
00246     for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
00247       SDValue Op = User->getOperand(i);
00248       if (Op.getNode() == N && Op.getResNo() == FlagResNo)
00249         return User;
00250     }
00251   }
00252   return NULL;
00253 }
00254 
00255 /// findNonImmUse - Return true by reference in "found" if "Use" is an
00256 /// non-immediate use of "Def". This function recursively traversing
00257 /// up the operand chain ignoring certain nodes.
00258 static void findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
00259                           SDNode *Root, bool &found,
00260                           SmallPtrSet<SDNode*, 16> &Visited) {
00261   if (found ||
00262       Use->getNodeId() < Def->getNodeId() ||
00263       !Visited.insert(Use))
00264     return;
00265   
00266   for (unsigned i = 0, e = Use->getNumOperands(); !found && i != e; ++i) {
00267     SDNode *N = Use->getOperand(i).getNode();
00268     if (N == Def) {
00269       if (Use == ImmedUse || Use == Root)
00270         continue;  // We are not looking for immediate use.
00271       assert(N != Root);
00272       found = true;
00273       break;
00274     }
00275 
00276     // Traverse up the operand chain.
00277     findNonImmUse(N, Def, ImmedUse, Root, found, Visited);
00278   }
00279 }
00280 
00281 /// isNonImmUse - Start searching from Root up the DAG to check is Def can
00282 /// be reached. Return true if that's the case. However, ignore direct uses
00283 /// by ImmedUse (which would be U in the example illustrated in
00284 /// IsLegalAndProfitableToFold) and by Root (which can happen in the store
00285 /// case).
00286 /// FIXME: to be really generic, we should allow direct use by any node
00287 /// that is being folded. But realisticly since we only fold loads which
00288 /// have one non-chain use, we only need to watch out for load/op/store
00289 /// and load/op/cmp case where the root (store / cmp) may reach the load via
00290 /// its chain operand.
00291 static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) {
00292   SmallPtrSet<SDNode*, 16> Visited;
00293   bool found = false;
00294   findNonImmUse(Root, Def, ImmedUse, Root, found, Visited);
00295   return found;
00296 }
00297 
00298 
00299 bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
00300                                                  SDNode *Root) const {
00301   if (Fast) return false;
00302 
00303   if (U == Root)
00304     switch (U->getOpcode()) {
00305     default: break;
00306     case ISD::ADD:
00307     case ISD::ADDC:
00308     case ISD::ADDE:
00309     case ISD::AND:
00310     case ISD::OR:
00311     case ISD::XOR: {
00312       // If the other operand is a 8-bit immediate we should fold the immediate
00313       // instead. This reduces code size.
00314       // e.g.
00315       // movl 4(%esp), %eax
00316       // addl $4, %eax
00317       // vs.
00318       // movl $4, %eax
00319       // addl 4(%esp), %eax
00320       // The former is 2 bytes shorter. In case where the increment is 1, then
00321       // the saving can be 4 bytes (by using incl %eax).
00322       ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(U->getOperand(1));
00323       if (Imm) {
00324         if (U->getValueType(0) == MVT::i64) {
00325           if ((int32_t)Imm->getZExtValue() == (int64_t)Imm->getZExtValue())
00326             return false;
00327         } else {
00328           if ((int8_t)Imm->getZExtValue() == (int64_t)Imm->getZExtValue())
00329             return false;
00330         }
00331       }
00332     }
00333     }
00334 
00335   // If Root use can somehow reach N through a path that that doesn't contain
00336   // U then folding N would create a cycle. e.g. In the following
00337   // diagram, Root can reach N through X. If N is folded into into Root, then
00338   // X is both a predecessor and a successor of U.
00339   //
00340   //          [N*]           //
00341   //         ^   ^           //
00342   //        /     \          //
00343   //      [U*]    [X]?       //
00344   //        ^     ^          //
00345   //         \   /           //
00346   //          \ /            //
00347   //         [Root*]         //
00348   //
00349   // * indicates nodes to be folded together.
00350   //
00351   // If Root produces a flag, then it gets (even more) interesting. Since it
00352   // will be "glued" together with its flag use in the scheduler, we need to
00353   // check if it might reach N.
00354   //
00355   //          [N*]           //
00356   //         ^   ^           //
00357   //        /     \          //
00358   //      [U*]    [X]?       //
00359   //        ^       ^        //
00360   //         \       \       //
00361   //          \      |       //
00362   //         [Root*] |       //
00363   //          ^      |       //
00364   //          f      |       //
00365   //          |      /       //
00366   //         [Y]    /        //
00367   //           ^   /         //
00368   //           f  /          //
00369   //           | /           //
00370   //          [FU]           //
00371   //
00372   // If FU (flag use) indirectly reaches N (the load), and Root folds N
00373   // (call it Fold), then X is a predecessor of FU and a successor of
00374   // Fold. But since Fold and FU are flagged together, this will create
00375   // a cycle in the scheduling graph.
00376 
00377   MVT VT = Root->getValueType(Root->getNumValues()-1);
00378   while (VT == MVT::Flag) {
00379     SDNode *FU = findFlagUse(Root);
00380     if (FU == NULL)
00381       break;
00382     Root = FU;
00383     VT = Root->getValueType(Root->getNumValues()-1);
00384   }
00385 
00386   return !isNonImmUse(Root, N, U);
00387 }
00388 
00389 /// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
00390 /// and move load below the TokenFactor. Replace store's chain operand with
00391 /// load's chain result.
00392 static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load,
00393                                  SDValue Store, SDValue TF) {
00394   SmallVector<SDValue, 4> Ops;
00395   for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i)
00396     if (Load.getNode() == TF.getOperand(i).getNode())
00397       Ops.push_back(Load.getOperand(0));
00398     else
00399       Ops.push_back(TF.getOperand(i));
00400   CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
00401   CurDAG->UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
00402   CurDAG->UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
00403                              Store.getOperand(2), Store.getOperand(3));
00404 }
00405 
00406 /// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG.
00407 /// 
00408 static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
00409                       SDValue &Load) {
00410   if (N.getOpcode() == ISD::BIT_CONVERT)
00411     N = N.getOperand(0);
00412 
00413   LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
00414   if (!LD || LD->isVolatile())
00415     return false;
00416   if (LD->getAddressingMode() != ISD::UNINDEXED)
00417     return false;
00418 
00419   ISD::LoadExtType ExtType = LD->getExtensionType();
00420   if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD)
00421     return false;
00422 
00423   if (N.hasOneUse() &&
00424       N.getOperand(1) == Address &&
00425       N.getNode()->isOperandOf(Chain.getNode())) {
00426     Load = N;
00427     return true;
00428   }
00429   return false;
00430 }
00431 
00432 /// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain
00433 /// operand and move load below the call's chain operand.
00434 static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
00435                            SDValue Call, SDValue Chain) {
00436   SmallVector<SDValue, 8> Ops;
00437   for (unsigned i = 0, e = Chain.getNode()->getNumOperands(); i != e; ++i)
00438     if (Load.getNode() == Chain.getOperand(i).getNode())
00439       Ops.push_back(Load.getOperand(0));
00440     else
00441       Ops.push_back(Chain.getOperand(i));
00442   CurDAG->UpdateNodeOperands(Chain, &Ops[0], Ops.size());
00443   CurDAG->UpdateNodeOperands(Load, Call.getOperand(0),
00444                              Load.getOperand(1), Load.getOperand(2));
00445   Ops.clear();
00446   Ops.push_back(SDValue(Load.getNode(), 1));
00447   for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
00448     Ops.push_back(Call.getOperand(i));
00449   CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size());
00450 }
00451 
00452 /// isCalleeLoad - Return true if call address is a load and it can be
00453 /// moved below CALLSEQ_START and the chains leading up to the call.
00454 /// Return the CALLSEQ_START by reference as a second output.
00455 static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
00456   if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
00457     return false;
00458   LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
00459   if (!LD ||
00460       LD->isVolatile() ||
00461       LD->getAddressingMode() != ISD::UNINDEXED ||
00462       LD->getExtensionType() != ISD::NON_EXTLOAD)
00463     return false;
00464 
00465   // Now let's find the callseq_start.
00466   while (Chain.getOpcode() != ISD::CALLSEQ_START) {
00467     if (!Chain.hasOneUse())
00468       return false;
00469     Chain = Chain.getOperand(0);
00470   }
00471   return Chain.getOperand(0).getNode() == Callee.getNode();
00472 }
00473 
00474 
00475 /// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
00476 /// This is only run if not in -fast mode (aka -O0).
00477 /// This allows the instruction selector to pick more read-modify-write
00478 /// instructions. This is a common case:
00479 ///
00480 ///     [Load chain]
00481 ///         ^
00482 ///         |
00483 ///       [Load]
00484 ///       ^    ^
00485 ///       |    |
00486 ///      /      \-
00487 ///     /         |
00488 /// [TokenFactor] [Op]
00489 ///     ^          ^
00490 ///     |          |
00491 ///      \        /
00492 ///       \      /
00493 ///       [Store]
00494 ///
00495 /// The fact the store's chain operand != load's chain will prevent the
00496 /// (store (op (load))) instruction from being selected. We can transform it to:
00497 ///
00498 ///     [Load chain]
00499 ///         ^
00500 ///         |
00501 ///    [TokenFactor]
00502 ///         ^
00503 ///         |
00504 ///       [Load]
00505 ///       ^    ^
00506 ///       |    |
00507 ///       |     \- 
00508 ///       |       | 
00509 ///       |     [Op]
00510 ///       |       ^
00511 ///       |       |
00512 ///       \      /
00513 ///        \    /
00514 ///       [Store]
00515 void X86DAGToDAGISel::PreprocessForRMW() {
00516   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
00517          E = CurDAG->allnodes_end(); I != E; ++I) {
00518     if (I->getOpcode() == X86ISD::CALL) {
00519       /// Also try moving call address load from outside callseq_start to just
00520       /// before the call to allow it to be folded.
00521       ///
00522       ///     [Load chain]
00523       ///         ^
00524       ///         |
00525       ///       [Load]
00526       ///       ^    ^
00527       ///       |    |
00528       ///      /      \--
00529       ///     /          |
00530       ///[CALLSEQ_START] |
00531       ///     ^          |
00532       ///     |          |
00533       /// [LOAD/C2Reg]   |
00534       ///     |          |
00535       ///      \        /
00536       ///       \      /
00537       ///       [CALL]
00538       SDValue Chain = I->getOperand(0);
00539       SDValue Load  = I->getOperand(1);
00540       if (!isCalleeLoad(Load, Chain))
00541         continue;
00542       MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain);
00543       ++NumLoadMoved;
00544       continue;
00545     }
00546 
00547     if (!ISD::isNON_TRUNCStore(I))
00548       continue;
00549     SDValue Chain = I->getOperand(0);
00550 
00551     if (Chain.getNode()->getOpcode() != ISD::TokenFactor)
00552       continue;
00553 
00554     SDValue N1 = I->getOperand(1);
00555     SDValue N2 = I->getOperand(2);
00556     if ((N1.getValueType().isFloatingPoint() &&
00557          !N1.getValueType().isVector()) ||
00558         !N1.hasOneUse())
00559       continue;
00560 
00561     bool RModW = false;
00562     SDValue Load;
00563     unsigned Opcode = N1.getNode()->getOpcode();
00564     switch (Opcode) {
00565     case ISD::ADD:
00566     case ISD::MUL:
00567     case ISD::AND:
00568     case ISD::OR:
00569     case ISD::XOR:
00570     case ISD::ADDC:
00571     case ISD::ADDE:
00572     case ISD::VECTOR_SHUFFLE: {
00573       SDValue N10 = N1.getOperand(0);
00574       SDValue N11 = N1.getOperand(1);
00575       RModW = isRMWLoad(N10, Chain, N2, Load);
00576       if (!RModW)
00577         RModW = isRMWLoad(N11, Chain, N2, Load);
00578       break;
00579     }
00580     case ISD::SUB:
00581     case ISD::SHL:
00582     case ISD::SRA:
00583     case ISD::SRL:
00584     case ISD::ROTL:
00585     case ISD::ROTR:
00586     case ISD::SUBC:
00587     case ISD::SUBE:
00588     case X86ISD::SHLD:
00589     case X86ISD::SHRD: {
00590       SDValue N10 = N1.getOperand(0);
00591       RModW = isRMWLoad(N10, Chain, N2, Load);
00592       break;
00593     }
00594     }
00595 
00596     if (RModW) {
00597       MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain);
00598       ++NumLoadMoved;
00599     }
00600   }
00601 }
00602 
00603 
00604 /// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend
00605 /// nodes that target the FP stack to be store and load to the stack.  This is a
00606 /// gross hack.  We would like to simply mark these as being illegal, but when
00607 /// we do that, legalize produces these when it expands calls, then expands
00608 /// these in the same legalize pass.  We would like dag combine to be able to
00609 /// hack on these between the call expansion and the node legalization.  As such
00610 /// this pass basically does "really late" legalization of these inline with the
00611 /// X86 isel pass.
00612 void X86DAGToDAGISel::PreprocessForFPConvert() {
00613   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
00614        E = CurDAG->allnodes_end(); I != E; ) {
00615     SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
00616     if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
00617       continue;
00618     
00619     // If the source and destination are SSE registers, then this is a legal
00620     // conversion that should not be lowered.
00621     MVT SrcVT = N->getOperand(0).getValueType();
00622     MVT DstVT = N->getValueType(0);
00623     bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
00624     bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
00625     if (SrcIsSSE && DstIsSSE)
00626       continue;
00627 
00628     if (!SrcIsSSE && !DstIsSSE) {
00629       // If this is an FPStack extension, it is a noop.
00630       if (N->getOpcode() == ISD::FP_EXTEND)
00631         continue;
00632       // If this is a value-preserving FPStack truncation, it is a noop.
00633       if (N->getConstantOperandVal(1))
00634         continue;
00635     }
00636    
00637     // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
00638     // FPStack has extload and truncstore.  SSE can fold direct loads into other
00639     // operations.  Based on this, decide what we want to do.
00640     MVT MemVT;
00641     if (N->getOpcode() == ISD::FP_ROUND)
00642       MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
00643     else
00644       MemVT = SrcIsSSE ? SrcVT : DstVT;
00645     
00646     SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
00647     
00648     // FIXME: optimize the case where the src/dest is a load or store?
00649     SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(),
00650                                           N->getOperand(0),
00651                                           MemTmp, NULL, 0, MemVT);
00652     SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, DstVT, Store, MemTmp,
00653                                         NULL, 0, MemVT);
00654 
00655     // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
00656     // extload we created.  This will cause general havok on the dag because
00657     // anything below the conversion could be folded into other existing nodes.
00658     // To avoid invalidating 'I', back it up to the convert node.
00659     --I;
00660     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
00661     
00662     // Now that we did that, the node is dead.  Increment the iterator to the
00663     // next node to process, then delete N.
00664     ++I;
00665     CurDAG->DeleteNode(N);
00666   }  
00667 }
00668 
00669 /// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
00670 /// when it has created a SelectionDAG for us to codegen.
00671 void X86DAGToDAGISel::InstructionSelect() {
00672   CurBB = BB;  // BB can change as result of isel.
00673   const Function *F = CurDAG->getMachineFunction().getFunction();
00674   OptForSize = F->hasFnAttr(Attribute::OptimizeForSize);
00675 
00676   DEBUG(BB->dump());
00677   if (!Fast)
00678     PreprocessForRMW();
00679 
00680   // FIXME: This should only happen when not -fast.
00681   PreprocessForFPConvert();
00682 
00683   // Codegen the basic block.
00684 #ifndef NDEBUG
00685   DOUT << "===== Instruction selection begins:\n";
00686   Indent = 0;
00687 #endif
00688   SelectRoot(*CurDAG);
00689 #ifndef NDEBUG
00690   DOUT << "===== Instruction selection ends:\n";
00691 #endif
00692 
00693   CurDAG->RemoveDeadNodes();
00694 }
00695 
00696 /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
00697 /// the main function.
00698 void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
00699                                              MachineFrameInfo *MFI) {
00700   const TargetInstrInfo *TII = TM.getInstrInfo();
00701   if (Subtarget->isTargetCygMing())
00702     BuildMI(BB, TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
00703 }
00704 
00705 void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
00706   // If this is main, emit special code for main.
00707   MachineBasicBlock *BB = MF.begin();
00708   if (Fn.hasExternalLinkage() && Fn.getName() == "main")
00709     EmitSpecialCodeForMain(BB, MF.getFrameInfo());
00710 }
00711 
00712 /// MatchAddress - Add the specified node to the specified addressing mode,
00713 /// returning true if it cannot be done.  This just pattern matches for the
00714 /// addressing mode.
00715 bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
00716                                    bool isRoot, unsigned Depth) {
00717   bool is64Bit = Subtarget->is64Bit();
00718   DOUT << "MatchAddress: "; DEBUG(AM.dump());
00719   // Limit recursion.
00720   if (Depth > 5)
00721     return MatchAddressBase(N, AM, isRoot, Depth);
00722   
00723   // RIP relative addressing: %rip + 32-bit displacement!
00724   if (AM.isRIPRel) {
00725     if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
00726       uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
00727       if (!is64Bit || isInt32(AM.Disp + Val)) {
00728         AM.Disp += Val;
00729         return false;
00730       }
00731     }
00732     return true;
00733   }
00734 
00735   switch (N.getOpcode()) {
00736   default: break;
00737   case ISD::Constant: {
00738     uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
00739     if (!is64Bit || isInt32(AM.Disp + Val)) {
00740       AM.Disp += Val;
00741       return false;
00742     }
00743     break;
00744   }
00745 
00746   case X86ISD::Wrapper: {
00747     DOUT << "Wrapper: 64bit " << is64Bit;
00748     DOUT << " AM "; DEBUG(AM.dump()); DOUT << "\n";
00749     // Under X86-64 non-small code model, GV (and friends) are 64-bits.
00750     // Also, base and index reg must be 0 in order to use rip as base.
00751     if (is64Bit && (TM.getCodeModel() != CodeModel::Small ||
00752                     AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
00753       break;
00754     if (AM.GV != 0 || AM.CP != 0 || AM.ES != 0 || AM.JT != -1)
00755       break;
00756     // If value is available in a register both base and index components have
00757     // been picked, we can't fit the result available in the register in the
00758     // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
00759     {
00760       SDValue N0 = N.getOperand(0);
00761       if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
00762         uint64_t Offset = G->getOffset();
00763         if (!is64Bit || isInt32(AM.Disp + Offset)) {
00764           GlobalValue *GV = G->getGlobal();
00765           AM.GV = GV;
00766           AM.Disp += Offset;
00767           AM.isRIPRel = TM.symbolicAddressesAreRIPRel();
00768           return false;
00769         }
00770       } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
00771         uint64_t Offset = CP->getOffset();
00772         if (!is64Bit || isInt32(AM.Disp + Offset)) {
00773           AM.CP = CP->getConstVal();
00774           AM.Align = CP->getAlignment();
00775           AM.Disp += Offset;
00776           AM.isRIPRel = TM.symbolicAddressesAreRIPRel();
00777           return false;
00778         }
00779       } else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) {
00780         AM.ES = S->getSymbol();
00781         AM.isRIPRel = TM.symbolicAddressesAreRIPRel();
00782         return false;
00783       } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
00784         AM.JT = J->getIndex();
00785         AM.isRIPRel = TM.symbolicAddressesAreRIPRel();
00786         return false;
00787       }
00788     }
00789     break;
00790   }
00791 
00792   case ISD::FrameIndex:
00793     if (AM.BaseType == X86ISelAddressMode::RegBase
00794         && AM.Base.Reg.getNode() == 0) {
00795       AM.BaseType = X86ISelAddressMode::FrameIndexBase;
00796       AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
00797       return false;
00798     }
00799     break;
00800 
00801   case ISD::SHL:
00802     if (AM.IndexReg.getNode() != 0 || AM.Scale != 1 || AM.isRIPRel)
00803       break;
00804       
00805     if (ConstantSDNode
00806           *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
00807       unsigned Val = CN->getZExtValue();
00808       if (Val == 1 || Val == 2 || Val == 3) {
00809         AM.Scale = 1 << Val;
00810         SDValue ShVal = N.getNode()->getOperand(0);
00811 
00812         // Okay, we know that we have a scale by now.  However, if the scaled
00813         // value is an add of something and a constant, we can fold the
00814         // constant into the disp field here.
00815         if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
00816             isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) {
00817           AM.IndexReg = ShVal.getNode()->getOperand(0);
00818           ConstantSDNode *AddVal =
00819             cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
00820           uint64_t Disp = AM.Disp + (AddVal->getZExtValue() << Val);
00821           if (!is64Bit || isInt32(Disp))
00822             AM.Disp = Disp;
00823           else
00824             AM.IndexReg = ShVal;
00825         } else {
00826           AM.IndexReg = ShVal;
00827         }
00828         return false;
00829       }
00830     break;
00831     }
00832 
00833   case ISD::SMUL_LOHI:
00834   case ISD::UMUL_LOHI:
00835     // A mul_lohi where we need the low part can be folded as a plain multiply.
00836     if (N.getResNo() != 0) break;
00837     // FALL THROUGH
00838   case ISD::MUL:
00839     // X*[3,5,9] -> X+X*[2,4,8]
00840     if (AM.BaseType == X86ISelAddressMode::RegBase &&
00841         AM.Base.Reg.getNode() == 0 &&
00842         AM.IndexReg.getNode() == 0 &&
00843         !AM.isRIPRel) {
00844       if (ConstantSDNode
00845             *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
00846         if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
00847             CN->getZExtValue() == 9) {
00848           AM.Scale = unsigned(CN->getZExtValue())-1;
00849 
00850           SDValue MulVal = N.getNode()->getOperand(0);
00851           SDValue Reg;
00852 
00853           // Okay, we know that we have a scale by now.  However, if the scaled
00854           // value is an add of something and a constant, we can fold the
00855           // constant into the disp field here.
00856           if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
00857               isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
00858             Reg = MulVal.getNode()->getOperand(0);
00859             ConstantSDNode *AddVal =
00860               cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
00861             uint64_t Disp = AM.Disp + AddVal->getZExtValue() *
00862                                       CN->getZExtValue();
00863             if (!is64Bit || isInt32(Disp))
00864               AM.Disp = Disp;
00865             else
00866               Reg = N.getNode()->getOperand(0);
00867           } else {
00868             Reg = N.getNode()->getOperand(0);
00869           }
00870 
00871           AM.IndexReg = AM.Base.Reg = Reg;
00872           return false;
00873         }
00874     }
00875     break;
00876 
00877   case ISD::ADD:
00878     {
00879       X86ISelAddressMode Backup = AM;
00880       if (!MatchAddress(N.