LLVM API Documentation

X86FloatingPoint.cpp

Go to the documentation of this file.
00001 //===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines the pass which converts floating point instructions from
00011 // virtual registers into register stack instructions.  This pass uses live
00012 // variable information to indicate where the FPn registers are used and their
00013 // lifetimes.
00014 //
00015 // This pass is hampered by the lack of decent CFG manipulation routines for
00016 // machine code.  In particular, this wants to be able to split critical edges
00017 // as necessary, traverse the machine basic block CFG in depth-first order, and
00018 // allow there to be multiple machine basic blocks for each LLVM basicblock
00019 // (needed for critical edge splitting).
00020 //
00021 // In particular, this pass currently barfs on critical edges.  Because of this,
00022 // it requires the instruction selector to insert FP_REG_KILL instructions on
00023 // the exits of any basic block that has critical edges going from it, or which
00024 // branch to a critical basic block.
00025 //
00026 // FIXME: this is not implemented yet.  The stackifier pass only works on local
00027 // basic blocks.
00028 //
00029 //===----------------------------------------------------------------------===//
00030 
00031 #define DEBUG_TYPE "x86-codegen"
00032 #include "X86.h"
00033 #include "X86InstrInfo.h"
00034 #include "llvm/CodeGen/MachineFunctionPass.h"
00035 #include "llvm/CodeGen/MachineInstrBuilder.h"
00036 #include "llvm/CodeGen/MachineRegisterInfo.h"
00037 #include "llvm/CodeGen/Passes.h"
00038 #include "llvm/Target/TargetInstrInfo.h"
00039 #include "llvm/Target/TargetMachine.h"
00040 #include "llvm/Support/Debug.h"
00041 #include "llvm/Support/Compiler.h"
00042 #include "llvm/ADT/DepthFirstIterator.h"
00043 #include "llvm/ADT/SmallPtrSet.h"
00044 #include "llvm/ADT/SmallVector.h"
00045 #include "llvm/ADT/Statistic.h"
00046 #include "llvm/ADT/STLExtras.h"
00047 #include <algorithm>
00048 using namespace llvm;
00049 
00050 STATISTIC(NumFXCH, "Number of fxch instructions inserted");
00051 STATISTIC(NumFP  , "Number of floating point instructions");
00052 
00053 namespace {
00054   struct VISIBILITY_HIDDEN FPS : public MachineFunctionPass {
00055     static char ID;
00056     FPS() : MachineFunctionPass(&ID) {}
00057 
00058     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
00059       AU.addPreservedID(MachineLoopInfoID);
00060       AU.addPreservedID(MachineDominatorsID);
00061       MachineFunctionPass::getAnalysisUsage(AU);
00062     }
00063 
00064     virtual bool runOnMachineFunction(MachineFunction &MF);
00065 
00066     virtual const char *getPassName() const { return "X86 FP Stackifier"; }
00067 
00068   private:
00069     const TargetInstrInfo *TII; // Machine instruction info.
00070     MachineBasicBlock *MBB;     // Current basic block
00071     unsigned Stack[8];          // FP<n> Registers in each stack slot...
00072     unsigned RegMap[8];         // Track which stack slot contains each register
00073     unsigned StackTop;          // The current top of the FP stack.
00074 
00075     void dumpStack() const {
00076       cerr << "Stack contents:";
00077       for (unsigned i = 0; i != StackTop; ++i) {
00078         cerr << " FP" << Stack[i];
00079         assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
00080       }
00081       cerr << "\n";
00082     }
00083   private:
00084     /// isStackEmpty - Return true if the FP stack is empty.
00085     bool isStackEmpty() const {
00086       return StackTop == 0;
00087     }
00088     
00089     // getSlot - Return the stack slot number a particular register number is
00090     // in.
00091     unsigned getSlot(unsigned RegNo) const {
00092       assert(RegNo < 8 && "Regno out of range!");
00093       return RegMap[RegNo];
00094     }
00095 
00096     // getStackEntry - Return the X86::FP<n> register in register ST(i).
00097     unsigned getStackEntry(unsigned STi) const {
00098       assert(STi < StackTop && "Access past stack top!");
00099       return Stack[StackTop-1-STi];
00100     }
00101 
00102     // getSTReg - Return the X86::ST(i) register which contains the specified
00103     // FP<RegNo> register.
00104     unsigned getSTReg(unsigned RegNo) const {
00105       return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0;
00106     }
00107 
00108     // pushReg - Push the specified FP<n> register onto the stack.
00109     void pushReg(unsigned Reg) {
00110       assert(Reg < 8 && "Register number out of range!");
00111       assert(StackTop < 8 && "Stack overflow!");
00112       Stack[StackTop] = Reg;
00113       RegMap[Reg] = StackTop++;
00114     }
00115 
00116     bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
00117     void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
00118       if (isAtTop(RegNo)) return;
00119       
00120       unsigned STReg = getSTReg(RegNo);
00121       unsigned RegOnTop = getStackEntry(0);
00122 
00123       // Swap the slots the regs are in.
00124       std::swap(RegMap[RegNo], RegMap[RegOnTop]);
00125 
00126       // Swap stack slot contents.
00127       assert(RegMap[RegOnTop] < StackTop);
00128       std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
00129 
00130       // Emit an fxch to update the runtime processors version of the state.
00131       BuildMI(*MBB, I, TII->get(X86::XCH_F)).addReg(STReg);
00132       NumFXCH++;
00133     }
00134 
00135     void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
00136       unsigned STReg = getSTReg(RegNo);
00137       pushReg(AsReg);   // New register on top of stack
00138 
00139       BuildMI(*MBB, I, TII->get(X86::LD_Frr)).addReg(STReg);
00140     }
00141 
00142     // popStackAfter - Pop the current value off of the top of the FP stack
00143     // after the specified instruction.
00144     void popStackAfter(MachineBasicBlock::iterator &I);
00145 
00146     // freeStackSlotAfter - Free the specified register from the register stack,
00147     // so that it is no longer in a register.  If the register is currently at
00148     // the top of the stack, we just pop the current instruction, otherwise we
00149     // store the current top-of-stack into the specified slot, then pop the top
00150     // of stack.
00151     void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
00152 
00153     bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
00154 
00155     void handleZeroArgFP(MachineBasicBlock::iterator &I);
00156     void handleOneArgFP(MachineBasicBlock::iterator &I);
00157     void handleOneArgFPRW(MachineBasicBlock::iterator &I);
00158     void handleTwoArgFP(MachineBasicBlock::iterator &I);
00159     void handleCompareFP(MachineBasicBlock::iterator &I);
00160     void handleCondMovFP(MachineBasicBlock::iterator &I);
00161     void handleSpecialFP(MachineBasicBlock::iterator &I);
00162   };
00163   char FPS::ID = 0;
00164 }
00165 
00166 FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
00167 
00168 /// getFPReg - Return the X86::FPx register number for the specified operand.
00169 /// For example, this returns 3 for X86::FP3.
00170 static unsigned getFPReg(const MachineOperand &MO) {
00171   assert(MO.isReg() && "Expected an FP register!");
00172   unsigned Reg = MO.getReg();
00173   assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
00174   return Reg - X86::FP0;
00175 }
00176 
00177 
00178 /// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
00179 /// register references into FP stack references.
00180 ///
00181 bool FPS::runOnMachineFunction(MachineFunction &MF) {
00182   // We only need to run this pass if there are any FP registers used in this
00183   // function.  If it is all integer, there is nothing for us to do!
00184   bool FPIsUsed = false;
00185 
00186   assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!");
00187   for (unsigned i = 0; i <= 6; ++i)
00188     if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
00189       FPIsUsed = true;
00190       break;
00191     }
00192 
00193   // Early exit.
00194   if (!FPIsUsed) return false;
00195 
00196   TII = MF.getTarget().getInstrInfo();
00197   StackTop = 0;
00198 
00199   // Process the function in depth first order so that we process at least one
00200   // of the predecessors for every reachable block in the function.
00201   SmallPtrSet<MachineBasicBlock*, 8> Processed;
00202   MachineBasicBlock *Entry = MF.begin();
00203 
00204   bool Changed = false;
00205   for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 8> >
00206          I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed);
00207        I != E; ++I)
00208     Changed |= processBasicBlock(MF, **I);
00209 
00210   return Changed;
00211 }
00212 
00213 /// processBasicBlock - Loop over all of the instructions in the basic block,
00214 /// transforming FP instructions into their stack form.
00215 ///
00216 bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
00217   bool Changed = false;
00218   MBB = &BB;
00219 
00220   for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
00221     MachineInstr *MI = I;
00222     unsigned Flags = MI->getDesc().TSFlags;
00223     
00224     unsigned FPInstClass = Flags & X86II::FPTypeMask;
00225     if (MI->getOpcode() == TargetInstrInfo::INLINEASM)
00226       FPInstClass = X86II::SpecialFP;
00227     
00228     if (FPInstClass == X86II::NotFP)
00229       continue;  // Efficiently ignore non-fp insts!
00230 
00231     MachineInstr *PrevMI = 0;
00232     if (I != BB.begin())
00233       PrevMI = prior(I);
00234 
00235     ++NumFP;  // Keep track of # of pseudo instrs
00236     DOUT << "\nFPInst:\t" << *MI;
00237 
00238     // Get dead variables list now because the MI pointer may be deleted as part
00239     // of processing!
00240     SmallVector<unsigned, 8> DeadRegs;
00241     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
00242       const MachineOperand &MO = MI->getOperand(i);
00243       if (MO.isReg() && MO.isDead())
00244         DeadRegs.push_back(MO.getReg());
00245     }
00246 
00247     switch (FPInstClass) {
00248     case X86II::ZeroArgFP:  handleZeroArgFP(I); break;
00249     case X86II::OneArgFP:   handleOneArgFP(I);  break;  // fstp ST(0)
00250     case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
00251     case X86II::TwoArgFP:   handleTwoArgFP(I);  break;
00252     case X86II::CompareFP:  handleCompareFP(I); break;
00253     case X86II::CondMovFP:  handleCondMovFP(I); break;
00254     case X86II::SpecialFP:  handleSpecialFP(I); break;
00255     default: assert(0 && "Unknown FP Type!");
00256     }
00257 
00258     // Check to see if any of the values defined by this instruction are dead
00259     // after definition.  If so, pop them.
00260     for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
00261       unsigned Reg = DeadRegs[i];
00262       if (Reg >= X86::FP0 && Reg <= X86::FP6) {
00263         DOUT << "Register FP#" << Reg-X86::FP0 << " is dead!\n";
00264         freeStackSlotAfter(I, Reg-X86::FP0);
00265       }
00266     }
00267 
00268     // Print out all of the instructions expanded to if -debug
00269     DEBUG(
00270       MachineBasicBlock::iterator PrevI(PrevMI);
00271       if (I == PrevI) {
00272         cerr << "Just deleted pseudo instruction\n";
00273       } else {
00274         MachineBasicBlock::iterator Start = I;
00275         // Rewind to first instruction newly inserted.
00276         while (Start != BB.begin() && prior(Start) != PrevI) --Start;
00277         cerr << "Inserted instructions:\n\t";
00278         Start->print(*cerr.stream(), &MF.getTarget());
00279         while (++Start != next(I)) {}
00280       }
00281       dumpStack();
00282     );
00283 
00284     Changed = true;
00285   }
00286 
00287   assert(isStackEmpty() && "Stack not empty at end of basic block?");
00288   return Changed;
00289 }
00290 
00291 //===----------------------------------------------------------------------===//
00292 // Efficient Lookup Table Support
00293 //===----------------------------------------------------------------------===//
00294 
00295 namespace {
00296   struct TableEntry {
00297     unsigned from;
00298     unsigned to;
00299     bool operator<(const TableEntry &TE) const { return from < TE.from; }
00300     friend bool operator<(const TableEntry &TE, unsigned V) {
00301       return TE.from < V;
00302     }
00303     friend bool operator<(unsigned V, const TableEntry &TE) {
00304       return V < TE.from;
00305     }
00306   };
00307 }
00308 
00309 #ifndef NDEBUG
00310 static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
00311   for (unsigned i = 0; i != NumEntries-1; ++i)
00312     if (!(Table[i] < Table[i+1])) return false;
00313   return true;
00314 }
00315 #endif
00316 
00317 static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
00318   const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
00319   if (I != Table+N && I->from == Opcode)
00320     return I->to;
00321   return -1;
00322 }
00323 
00324 #ifdef NDEBUG
00325 #define ASSERT_SORTED(TABLE)
00326 #else
00327 #define ASSERT_SORTED(TABLE)                                              \
00328   { static bool TABLE##Checked = false;                                   \
00329     if (!TABLE##Checked) {                                                \
00330        assert(TableIsSorted(TABLE, array_lengthof(TABLE)) &&              \
00331               "All lookup tables must be sorted for efficient access!");  \
00332        TABLE##Checked = true;                                             \
00333     }                                                                     \
00334   }
00335 #endif
00336 
00337 //===----------------------------------------------------------------------===//
00338 // Register File -> Register Stack Mapping Methods
00339 //===----------------------------------------------------------------------===//
00340 
00341 // OpcodeTable - Sorted map of register instructions to their stack version.
00342 // The first element is an register file pseudo instruction, the second is the
00343 // concrete X86 instruction which uses the register stack.
00344 //
00345 static const TableEntry OpcodeTable[] = {
00346   { X86::ABS_Fp32     , X86::ABS_F     },
00347   { X86::ABS_Fp64     , X86::ABS_F     },
00348   { X86::ABS_Fp80     , X86::ABS_F     },
00349   { X86::ADD_Fp32m    , X86::ADD_F32m  },
00350   { X86::ADD_Fp64m    , X86::ADD_F64m  },
00351   { X86::ADD_Fp64m32  , X86::ADD_F32m  },
00352   { X86::ADD_Fp80m32  , X86::ADD_F32m  },
00353   { X86::ADD_Fp80m64  , X86::ADD_F64m  },
00354   { X86::ADD_FpI16m32 , X86::ADD_FI16m },
00355   { X86::ADD_FpI16m64 , X86::ADD_FI16m },
00356   { X86::ADD_FpI16m80 , X86::ADD_FI16m },
00357   { X86::ADD_FpI32m32 , X86::ADD_FI32m },
00358   { X86::ADD_FpI32m64 , X86::ADD_FI32m },
00359   { X86::ADD_FpI32m80 , X86::ADD_FI32m },
00360   { X86::CHS_Fp32     , X86::CHS_F     },
00361   { X86::CHS_Fp64     , X86::CHS_F     },
00362   { X86::CHS_Fp80     , X86::CHS_F     },
00363   { X86::CMOVBE_Fp32  , X86::CMOVBE_F  },
00364   { X86::CMOVBE_Fp64  , X86::CMOVBE_F  },
00365   { X86::CMOVBE_Fp80  , X86::CMOVBE_F  },
00366   { X86::CMOVB_Fp32   , X86::CMOVB_F   },
00367   { X86::CMOVB_Fp64   , X86::CMOVB_F  },
00368   { X86::CMOVB_Fp80   , X86::CMOVB_F  },
00369   { X86::CMOVE_Fp32   , X86::CMOVE_F  },
00370   { X86::CMOVE_Fp64   , X86::CMOVE_F   },
00371   { X86::CMOVE_Fp80   , X86::CMOVE_F   },
00372   { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
00373   { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
00374   { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
00375   { X86::CMOVNB_Fp32  , X86::CMOVNB_F  },
00376   { X86::CMOVNB_Fp64  , X86::CMOVNB_F  },
00377   { X86::CMOVNB_Fp80  , X86::CMOVNB_F  },
00378   { X86::CMOVNE_Fp32  , X86::CMOVNE_F  },
00379   { X86::CMOVNE_Fp64  , X86::CMOVNE_F  },
00380   { X86::CMOVNE_Fp80  , X86::CMOVNE_F  },
00381   { X86::CMOVNP_Fp32  , X86::CMOVNP_F  },
00382   { X86::CMOVNP_Fp64  , X86::CMOVNP_F  },
00383   { X86::CMOVNP_Fp80  , X86::CMOVNP_F  },
00384   { X86::CMOVP_Fp32   , X86::CMOVP_F   },
00385   { X86::CMOVP_Fp64   , X86::CMOVP_F   },
00386   { X86::CMOVP_Fp80   , X86::CMOVP_F   },
00387   { X86::COS_Fp32     , X86::COS_F     },
00388   { X86::COS_Fp64     , X86::COS_F     },
00389   { X86::COS_Fp80     , X86::COS_F     },
00390   { X86::DIVR_Fp32m   , X86::DIVR_F32m },
00391   { X86::DIVR_Fp64m   , X86::DIVR_F64m },
00392   { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
00393   { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
00394   { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
00395   { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
00396   { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
00397   { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
00398   { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
00399   { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
00400   { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
00401   { X86::DIV_Fp32m    , X86::DIV_F32m  },
00402   { X86::DIV_Fp64m    , X86::DIV_F64m  },
00403   { X86::DIV_Fp64m32  , X86::DIV_F32m  },
00404   { X86::DIV_Fp80m32  , X86::DIV_F32m  },
00405   { X86::DIV_Fp80m64  , X86::DIV_F64m  },
00406   { X86::DIV_FpI16m32 , X86::DIV_FI16m },
00407   { X86::DIV_FpI16m64 , X86::DIV_FI16m },
00408   { X86::DIV_FpI16m80 , X86::DIV_FI16m },
00409   { X86::DIV_FpI32m32 , X86::DIV_FI32m },
00410   { X86::DIV_FpI32m64 , X86::DIV_FI32m },
00411   { X86::DIV_FpI32m80 , X86::DIV_FI32m },
00412   { X86::ILD_Fp16m32  , X86::ILD_F16m  },
00413   { X86::ILD_Fp16m64  , X86::ILD_F16m  },
00414   { X86::ILD_Fp16m80  , X86::ILD_F16m  },
00415   { X86::ILD_Fp32m32  , X86::ILD_F32m  },
00416   { X86::ILD_Fp32m64  , X86::ILD_F32m  },
00417   { X86::ILD_Fp32m80  , X86::ILD_F32m  },
00418   { X86::ILD_Fp64m32  , X86::ILD_F64m  },
00419   { X86::ILD_Fp64m64  , X86::ILD_F64m  },
00420   { X86::ILD_Fp64m80  , X86::ILD_F64m  },
00421   { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
00422   { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
00423   { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
00424   { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
00425   { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
00426   { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
00427   { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
00428   { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
00429   { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
00430   { X86::IST_Fp16m32  , X86::IST_F16m  },
00431   { X86::IST_Fp16m64  , X86::IST_F16m  },
00432   { X86::IST_Fp16m80  , X86::IST_F16m  },
00433   { X86::IST_Fp32m32  , X86::IST_F32m  },
00434   { X86::IST_Fp32m64  , X86::IST_F32m  },
00435   { X86::IST_Fp32m80  , X86::IST_F32m  },
00436   { X86::IST_Fp64m32  , X86::IST_FP64m },
00437   { X86::IST_Fp64m64  , X86::IST_FP64m },
00438   { X86::IST_Fp64m80  , X86::IST_FP64m },
00439   { X86::LD_Fp032     , X86::LD_F0     },
00440   { X86::LD_Fp064     , X86::LD_F0     },
00441   { X86::LD_Fp080     , X86::LD_F0     },
00442   { X86::LD_Fp132     , X86::LD_F1     },
00443   { X86::LD_Fp164     , X86::LD_F1     },
00444   { X86::LD_Fp180     , X86::LD_F1     },
00445   { X86::LD_Fp32m     , X86::LD_F32m   },
00446   { X86::LD_Fp32m64   , X86::LD_F32m   },
00447   { X86::LD_Fp32m80   , X86::LD_F32m   },
00448   { X86::LD_Fp64m     , X86::LD_F64m   },
00449   { X86::LD_Fp64m80   , X86::LD_F64m   },
00450   { X86::LD_Fp80m     , X86::LD_F80m   },
00451   { X86::MUL_Fp32m    , X86::MUL_F32m  },
00452   { X86::MUL_Fp64m    , X86::MUL_F64m  },
00453   { X86::MUL_Fp64m32  , X86::MUL_F32m  },
00454   { X86::MUL_Fp80m32  , X86::MUL_F32m  },
00455   { X86::MUL_Fp80m64  , X86::MUL_F64m  },
00456   { X86::MUL_FpI16m32 , X86::MUL_FI16m },
00457   { X86::MUL_FpI16m64 , X86::MUL_FI16m },
00458   { X86::MUL_FpI16m80 , X86::MUL_FI16m },
00459   { X86::MUL_FpI32m32 , X86::MUL_FI32m },
00460   { X86::MUL_FpI32m64 , X86::MUL_FI32m },
00461   { X86::MUL_FpI32m80 , X86::MUL_FI32m },
00462   { X86::SIN_Fp32     , X86::SIN_F     },
00463   { X86::SIN_Fp64     , X86::SIN_F     },
00464   { X86::SIN_Fp80     , X86::SIN_F     },
00465   { X86::SQRT_Fp32    , X86::SQRT_F    },
00466   { X86::SQRT_Fp64    , X86::SQRT_F    },
00467   { X86::SQRT_Fp80    , X86::SQRT_F    },
00468   { X86::ST_Fp32m     , X86::ST_F32m   },
00469   { X86::ST_Fp64m     , X86::ST_F64m   },
00470   { X86::ST_Fp64m32   , X86::ST_F32m   },
00471   { X86::ST_Fp80m32   , X86::ST_F32m   },
00472   { X86::ST_Fp80m64   , X86::ST_F64m   },
00473   { X86::ST_FpP80m    , X86::ST_FP80m  },
00474   { X86::SUBR_Fp32m   , X86::SUBR_F32m },
00475   { X86::SUBR_Fp64m   , X86::SUBR_F64m },
00476   { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
00477   { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
00478   { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
00479   { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
00480   { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
00481   { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
00482   { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
00483   { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
00484   { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
00485   { X86::SUB_Fp32m    , X86::SUB_F32m  },
00486   { X86::SUB_Fp64m    , X86::SUB_F64m  },
00487   { X86::SUB_Fp64m32  , X86::SUB_F32m  },
00488   { X86::SUB_Fp80m32  , X86::SUB_F32m  },
00489   { X86::SUB_Fp80m64  , X86::SUB_F64m  },
00490   { X86::SUB_FpI16m32 , X86::SUB_FI16m },
00491   { X86::SUB_FpI16m64 , X86::SUB_FI16m },
00492   { X86::SUB_FpI16m80 , X86::SUB_FI16m },
00493   { X86::SUB_FpI32m32 , X86::SUB_FI32m },
00494   { X86::SUB_FpI32m64 , X86::SUB_FI32m },
00495   { X86::SUB_FpI32m80 , X86::SUB_FI32m },
00496   { X86::TST_Fp32     , X86::TST_F     },
00497   { X86::TST_Fp64     , X86::TST_F     },
00498   { X86::TST_Fp80     , X86::TST_F     },
00499   { X86::UCOM_FpIr32  , X86::UCOM_FIr  },
00500   { X86::UCOM_FpIr64  , X86::UCOM_FIr  },
00501   { X86::UCOM_FpIr80  , X86::UCOM_FIr  },
00502   { X86::UCOM_Fpr32   , X86::UCOM_Fr   },
00503   { X86::UCOM_Fpr64   , X86::UCOM_Fr   },
00504   { X86::UCOM_Fpr80   , X86::UCOM_Fr   },
00505 };
00506 
00507 static unsigned getConcreteOpcode(unsigned Opcode) {
00508   ASSERT_SORTED(OpcodeTable);
00509   int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode);
00510   assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
00511   return Opc;
00512 }
00513 
00514 //===----------------------------------------------------------------------===//
00515 // Helper Methods
00516 //===----------------------------------------------------------------------===//
00517 
00518 // PopTable - Sorted map of instructions to their popping version.  The first
00519 // element is an instruction, the second is the version which pops.
00520 //
00521 static const TableEntry PopTable[] = {
00522   { X86::ADD_FrST0 , X86::ADD_FPrST0  },
00523 
00524   { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
00525   { X86::DIV_FrST0 , X86::DIV_FPrST0  },
00526 
00527   { X86::IST_F16m  , X86::IST_FP16m   },
00528   { X86::IST_F32m  , X86::IST_FP32m   },
00529 
00530   { X86::MUL_FrST0 , X86::MUL_FPrST0  },
00531 
00532   { X86::ST_F32m   , X86::ST_FP32m    },
00533   { X86::ST_F64m   , X86::ST_FP64m    },
00534   { X86::ST_Frr    , X86::ST_FPrr     },
00535 
00536   { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
00537   { X86::SUB_FrST0 , X86::SUB_FPrST0  },
00538 
00539   { X86::UCOM_FIr  , X86::UCOM_FIPr   },
00540 
00541   { X86::UCOM_FPr  , X86::UCOM_FPPr   },
00542   { X86::UCOM_Fr   , X86::UCOM_FPr    },
00543 };
00544 
00545 /// popStackAfter - Pop the current value off of the top of the FP stack after
00546 /// the specified instruction.  This attempts to be sneaky and combine the pop
00547 /// into the instruction itself if possible.  The iterator is left pointing to
00548 /// the last instruction, be it a new pop instruction inserted, or the old
00549 /// instruction if it was modified in place.
00550 ///
00551 void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
00552   ASSERT_SORTED(PopTable);
00553   assert(StackTop > 0 && "Cannot pop empty stack!");
00554   RegMap[Stack[--StackTop]] = ~0;     // Update state
00555 
00556   // Check to see if there is a popping version of this instruction...
00557   int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode());
00558   if (Opcode != -1) {
00559     I->setDesc(TII->get(Opcode));
00560     if (Opcode == X86::UCOM_FPPr)
00561       I->RemoveOperand(0);
00562   } else {    // Insert an explicit pop
00563     I = BuildMI(*MBB, ++I, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
00564   }
00565 }
00566 
00567 /// freeStackSlotAfter - Free the specified register from the register stack, so
00568 /// that it is no longer in a register.  If the register is currently at the top
00569 /// of the stack, we just pop the current instruction, otherwise we store the
00570 /// current top-of-stack into the specified slot, then pop the top of stack.
00571 void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
00572   if (getStackEntry(0) == FPRegNo) {  // already at the top of stack? easy.
00573     popStackAfter(I);
00574     return;
00575   }
00576 
00577   // Otherwise, store the top of stack into the dead slot, killing the operand
00578   // without having to add in an explicit xchg then pop.
00579   //
00580   unsigned STReg    = getSTReg(FPRegNo);
00581   unsigned OldSlot  = getSlot(FPRegNo);
00582   unsigned TopReg   = Stack[StackTop-1];
00583   Stack[OldSlot]    = TopReg;
00584   RegMap[TopReg]    = OldSlot;
00585   RegMap[FPRegNo]   = ~0;
00586   Stack[--StackTop] = ~0;
00587   I = BuildMI(*MBB, ++I, TII->get(X86::ST_FPrr)).addReg(STReg);
00588 }
00589 
00590 
00591 //===----------------------------------------------------------------------===//
00592 // Instruction transformation implementation
00593 //===----------------------------------------------------------------------===//
00594 
00595 /// handleZeroArgFP - ST(0) = fld0    ST(0) = flds <mem>
00596 ///
00597 void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
00598   MachineInstr *MI = I;
00599   unsigned DestReg = getFPReg(MI->getOperand(0));
00600 
00601   // Change from the pseudo instruction to the concrete instruction.
00602   MI->RemoveOperand(0);   // Remove the explicit ST(0) operand
00603   MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
00604   
00605   // Result gets pushed on the stack.
00606   pushReg(DestReg);
00607 }
00608 
00609 /// handleOneArgFP - fst <mem>, ST(0)
00610 ///
00611 void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
00612   MachineInstr *MI = I;
00613   unsigned NumOps = MI->getDesc().getNumOperands();
00614   assert((NumOps == 5 || NumOps == 1) &&
00615          "Can only handle fst* & ftst instructions!");
00616 
00617   // Is this the last use of the source register?
00618   unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
00619   bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
00620 
00621   // FISTP64m is strange because there isn't a non-popping versions.
00622   // If we have one _and_ we don't want to pop the operand, duplicate the value
00623   // on the stack instead of moving it.  This ensure that popping the value is
00624   // always ok.
00625   // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
00626   //
00627   if (!KillsSrc &&
00628       (MI->getOpcode() == X86::IST_Fp64m32 ||
00629        MI->getOpcode() == X86::ISTT_Fp16m32 ||
00630        MI->getOpcode() == X86::ISTT_Fp32m32 ||
00631        MI->getOpcode() == X86::ISTT_Fp64m32 ||
00632        MI->getOpcode() == X86::IST_Fp64m64 ||
00633        MI->getOpcode() == X86::ISTT_Fp16m64 ||
00634        MI->getOpcode() == X86::ISTT_Fp32m64 ||
00635        MI->getOpcode() == X86::ISTT_Fp64m64 ||
00636        MI->getOpcode() == X86::IST_Fp64m80 ||
00637        MI->getOpcode() == X86::ISTT_Fp16m80 ||
00638        MI->getOpcode() == X86::ISTT_Fp32m80 ||
00639        MI->getOpcode() == X86::ISTT_Fp64m80 ||
00640        MI->getOpcode() == X86::ST_FpP80m)) {
00641     duplicateToTop(Reg, 7 /*temp register*/, I);
00642   } else {
00643     moveToTop(Reg, I);            // Move to the top of the stack...
00644   }
00645   
00646   // Convert from the pseudo instruction to the concrete instruction.
00647   MI->RemoveOperand(NumOps-1);    // Remove explicit ST(0) operand
00648   MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
00649 
00650   if (MI->getOpcode() == X86::IST_FP64m ||
00651       MI->getOpcode() == X86::ISTT_FP16m ||
00652       MI->getOpcode() == X86::ISTT_FP32m ||
00653       MI->getOpcode() == X86::ISTT_FP64m ||
00654       MI->getOpcode() == X86::ST_FP80m) {
00655     assert(StackTop > 0 && "Stack empty??");
00656     --StackTop;
00657   } else if (KillsSrc) { // Last use of operand?
00658     popStackAfter(I);
00659   }
00660 }
00661 
00662 
00663 /// handleOneArgFPRW: Handle instructions that read from the top of stack and
00664 /// replace the value with a newly computed value.  These instructions may have
00665 /// non-fp operands after their FP operands.
00666 ///
00667 ///  Examples:
00668 ///     R1 = fchs R2
00669 ///     R1 = fadd R2, [mem]
00670 ///
00671 void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
00672   MachineInstr *MI = I;
00673 #ifndef NDEBUG
00674   unsigned NumOps = MI->getDesc().getNumOperands();
00675   assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
00676 #endif
00677 
00678   // Is this the last use of the source register?
00679   unsigned Reg = getFPReg(MI->getOperand(1));
00680   bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
00681 
00682   if (KillsSrc) {
00683     // If this is the last use of the source register, just make sure it's on
00684     // the top of the stack.
00685     moveToTop(Reg, I);
00686     assert(StackTop > 0 && "Stack cannot be empty!");
00687     --StackTop;
00688     pushReg(getFPReg(MI->getOperand(0)));
00689   } else {
00690     // If this is not the last use of the source register, _copy_ it to the top
00691     // of the stack.
00692     duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I);
00693   }
00694 
00695   // Change from the pseudo instruction to the concrete instruction.
00696   MI->RemoveOperand(1);   // Drop the source operand.
00697   MI->RemoveOperand(0);   // Drop the destination operand.
00698   MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
00699 }
00700 
00701 
00702 //===----------------------------------------------------------------------===//
00703 // Define tables of various ways to map pseudo instructions
00704 //
00705 
00706 // ForwardST0Table - Map: A = B op C  into: ST(0) = ST(0) op ST(i)
00707 static const TableEntry ForwardST0Table[] = {
00708   { X86::ADD_Fp32  , X86::ADD_FST0r },
00709   { X86::ADD_Fp64  , X86::ADD_FST0r },
00710   { X86::ADD_Fp80  , X86::ADD_FST0r },
00711   { X86::DIV_Fp32  , X86::DIV_FST0r },
00712   { X86::DIV_Fp64  , X86::DIV_FST0r },
00713   { X86::DIV_Fp80  , X86::DIV_FST0r },
00714   { X86::MUL_Fp32  , X86::MUL_FST0r },
00715   { X86::MUL_Fp64  , X86::MUL_FST0r },
00716   { X86::MUL_Fp80  , X86::MUL_FST0r },
00717   { X86::SUB_Fp32  , X86::SUB_FST0r },
00718   { X86::SUB_Fp64  , X86::SUB_FST0r },
00719   { X86::SUB_Fp80  , X86::SUB_FST0r },
00720 };
00721 
00722 // ReverseST0Table - Map: A = B op C  into: ST(0) = ST(i) op ST(0)
00723 static const TableEntry ReverseST0Table[] = {
00724   { X86::ADD_Fp32  , X86::ADD_FST0r  },   // commutative
00725   { X86::ADD_Fp64  , X86::ADD_FST0r  },   // commutative
00726   { X86::ADD_Fp80  , X86::ADD_FST0r  },   // commutative
00727   { X86::DIV_Fp32  , X86::DIVR_FST0r },
00728   { X86::DIV_Fp64  , X86::DIVR_FST0r },
00729   { X86::DIV_Fp80  , X86::DIVR_FST0r },
00730   { X86::MUL_Fp32  , X86::MUL_FST0r  },   // commutative
00731   { X86::MUL_Fp64  , X86::MUL_FST0r  },   // commutative
00732   { X86::MUL_Fp80  , X86::MUL_FST0r  },   // commutative
00733   { X86::SUB_Fp32  , X86::SUBR_FST0r },
00734   { X86::SUB_Fp64  , X86::SUBR_FST0r },
00735   { X86::SUB_Fp80  , X86::SUBR_FST0r },
00736 };
00737 
00738 // ForwardSTiTable - Map: A = B op C  into: ST(i) = ST(0) op ST(i)
00739 static const TableEntry ForwardSTiTable[] = {
00740   { X86::ADD_Fp32  , X86::ADD_FrST0  },   // commutative
00741   { X86::ADD_Fp64  , X86::ADD_FrST0  },   // commutative
00742   { X86::ADD_Fp80  , X86::ADD_FrST0  },   // commutative
00743   { X86::DIV_Fp32  , X86::DIVR_FrST0 },
00744   { X86::DIV_Fp64  , X86::DIVR_FrST0 },
00745   { X86::DIV_Fp80  , X86::DIVR_FrST0 },
00746   { X86::MUL_Fp32  , X86::MUL_FrST0  },   // commutative
00747   { X86::MUL_Fp64  , X86::MUL_FrST0  },   // commutative
00748   { X86::MUL_Fp80  , X86::MUL_FrST0  },   // commutative
00749   { X86::SUB_Fp32  , X86::SUBR_FrST0 },
00750   { X86::SUB_Fp64  , X86::SUBR_FrST0 },
00751   { X86::SUB_Fp80  , X86::SUBR_FrST0 },
00752 };
00753 
00754 // ReverseSTiTable - Map: A = B op C  into: ST(i) = ST(i) op ST(0)
00755 static const TableEntry ReverseSTiTable[] = {
00756   { X86::ADD_Fp32  , X86::ADD_FrST0 },
00757   { X86::ADD_Fp64  , X86::ADD_FrST0 },
00758   { X86::ADD_Fp80  , X86::ADD_FrST0 },
00759   { X86::DIV_Fp32  , X86::DIV_FrST0 },
00760   { X86::DIV_Fp64  , X86::DIV_FrST0 },
00761   { X86::DIV_Fp80  , X86::DIV_FrST0 },
00762   { X86::MUL_Fp32  , X86::MUL_FrST0 },
00763   { X86::MUL_Fp64  , X86::MUL_FrST0 },
00764   { X86::MUL_Fp80  , X86::MUL_FrST0 },
00765   { X86::SUB_Fp32  , X86::SUB_FrST0 },
00766   { X86::SUB_Fp64  , X86::SUB_FrST0 },
00767   { X86::SUB_Fp80  , X86::SUB_FrST0 },
00768 };
00769 
00770 
00771 /// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
00772 /// instructions which need to be simplified and possibly transformed.
00773 ///
00774 /// Result: ST(0) = fsub  ST(0), ST(i)
00775 ///         ST(i) = fsub  ST(0), ST(i)
00776 ///         ST(0) = fsubr ST(0), ST(i)
00777 ///         ST(i) = fsubr ST(0), ST(i)
00778 ///
00779 void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
00780   ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
00781   ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
00782   MachineInstr *MI = I;
00783 
00784   unsigned NumOperands = MI->getDesc().getNumOperands();
00785   assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
00786   unsigned Dest = getFPReg(MI->getOperand(0));
00787   unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
00788   unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
00789   bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
00790   bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
00791 
00792   unsigned TOS = getStackEntry(0);
00793 
00794   // One of our operands must be on the top of the stack.  If neither is yet, we
00795   // need to move one.
00796   if (Op0 != TOS && Op1 != TOS) {   // No operand at TOS?
00797     // We can choose to move either operand to the top of the stack.  If one of
00798     // the operands is killed by this instruction, we want that one so that we
00799     // can update right on top of the old version.
00800     if (KillsOp0) {
00801       moveToTop(Op0, I);         // Move dead operand to TOS.
00802       TOS = Op0;
00803     } else if (KillsOp1) {
00804       moveToTop(Op1, I);
00805       TOS = Op1;
00806     } else {
00807       // All of the operands are live after this instruction executes, so we
00808       // cannot update on top of any operand.  Because of this, we must
00809       // duplicate one of the stack elements to the top.  It doesn't matter
00810       // which one we pick.
00811       //
00812       duplicateToTop(Op0, Dest, I);
00813       Op0 = TOS = Dest;
00814       KillsOp0 = true;
00815     }
00816   } else if (!KillsOp0 && !KillsOp1) {
00817     // If we DO have one of our operands at the top of the stack, but we don't
00818     // have a dead operand, we must duplicate one of the operands to a new slot
00819     // on the stack.
00820     duplicateToTop(Op0, Dest, I);
00821     Op0 = TOS = Dest;
00822     KillsOp0 = true;
00823   }
00824 
00825   // Now we know that one of our operands is on the top of the stack, and at
00826   // least one of our operands is killed by this instruction.
00827   assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
00828          "Stack conditions not set up right!");
00829 
00830   // We decide which form to use based on what is on the top of the stack, and
00831   // which operand is killed by this instruction.
00832   const TableEntry *InstTable;
00833   bool isForward = TOS == Op0;
00834   bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
00835   if (updateST0) {
00836     if (isForward)
00837       InstTable = ForwardST0Table;
00838     else
00839       InstTable = ReverseST0Table;
00840   } else {
00841     if (isForward)
00842       InstTable = ForwardSTiTable;
00843     else
00844       InstTable = ReverseSTiTable;
00845   }
00846 
00847   int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table),
00848                       MI->getOpcode());
00849   assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
00850 
00851   // NotTOS - The register which is not on the top of stack...
00852   unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
00853 
00854   // Replace the old instruction with a new instruction
00855   MBB->remove(I++);
00856   I = BuildMI(*MBB, I, TII->get(Opcode)).addReg(getSTReg(NotTOS));
00857 
00858   // If both operands are killed, pop one off of the stack in addition to
00859   // overwriting the other one.
00860   if (KillsOp0 && KillsOp1 && Op0 != Op1) {
00861     assert(!updateST0 && "Should have updated other operand!");
00862     popStackAfter(I);   // Pop the top of stack
00863   }
00864 
00865   // Update stack information so that we know the destination register is now on
00866   // the stack.
00867   unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
00868   assert(UpdatedSlot < StackTop && Dest < 7);
00869   Stack[UpdatedSlot]   = Dest;
00870   RegMap[Dest]         = UpdatedSlot;
00871   MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction
00872 }
00873 
00874 /// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
00875 /// register arguments and no explicit destinations.
00876 ///
00877 void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
00878   ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
00879   ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
00880   MachineInstr *MI = I;
00881 
00882   unsigned NumOperands = MI->getDesc().getNumOperands();
00883   assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
00884   unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
00885   unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
00886   bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
00887   bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
00888 
00889   // Make sure the first operand is on the top of stack, the other one can be
00890   // anywhere.
00891   moveToTop(Op0, I);
00892 
00893   // Change from the pseudo instruction to the concrete instruction.
00894   MI->getOperand(0).setReg(getSTReg(Op1));
00895   MI->RemoveOperand(1);
00896   MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
00897 
00898   // If any of the operands are killed by this instruction, free them.
00899   if (KillsOp0) freeStackSlotAfter(I, Op0);
00900   if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
00901 }
00902 
00903 /// handleCondMovFP - Handle two address conditional move instructions.  These
00904 /// instructions move a st(i) register to st(0) iff a condition is true.  These
00905 /// instructions require that the first operand is at the top of the stack, but
00906 /// otherwise don't modify the stack at all.
00907 void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
00908   MachineInstr *MI = I;
00909 
00910   unsigned Op0 = getFPReg(MI->getOperand(0));
00911   unsigned Op1 = getFPReg(MI->getOperand(2));
00912   bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
00913 
00914   // The first operand *must* be on the top of the stack.
00915   moveToTop(Op0, I);
00916 
00917   // Change the second operand to the stack register that the operand is in.
00918   // Change from the pseudo instruction to the concrete instruction.
00919   MI->RemoveOperand(0);
00920   MI->RemoveOperand(1);
00921   MI->getOperand(0).setReg(getSTReg(Op1));
00922   MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
00923   
00924   // If we kill the second operand, make sure to pop it from the stack.
00925   if (Op0 != Op1 && KillsOp1) {
00926     // Get this value off of the register stack.
00927     freeStackSlotAfter(I, Op1);
00928   }
00929 }
00930 
00931 
00932 /// handleSpecialFP - Handle special instructions which behave unlike other
00933 /// floating point instructions.  This is primarily intended for use by pseudo
00934 /// instructions.
00935 ///
00936 void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
00937   MachineInstr *MI = I;
00938   switch (MI->getOpcode()) {
00939   default: assert(0 && "Unknown SpecialFP instruction!");
00940   case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type!
00941   case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type!
00942   case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type!
00943     assert(StackTop == 0 && "Stack should be empty after a call!");
00944     pushReg(getFPReg(MI->getOperand(0)));
00945     break;
00946   case X86::FpGET_ST1_32:// Appears immediately after a call returning FP type!
00947   case X86::FpGET_ST1_64:// Appears immediately after a call returning FP type!
00948   case X86::FpGET_ST1_80:{// Appears immediately after a call returning FP type!
00949     // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm.
00950     // The pattern we expect is:
00951     //  CALL
00952     //  FP1 = FpGET_ST0
00953     //  FP4 = FpGET_ST1
00954     //
00955     // At this point, we've pushed FP1 on the top of stack, so it should be
00956     // present if it isn't dead.  If it was dead, we already emitted a pop to
00957     // remove it from the stack and StackTop = 0.
00958     
00959     // Push FP4 as top of stack next.
00960     pushReg(getFPReg(MI->getOperand(0)));
00961 
00962     // If StackTop was 0 before we pushed our operand, then ST(0) must have been
00963     // dead.  In this case, the ST(1) value is the only thing that is live, so
00964     // it should be on the TOS (after the pop that was emitted) and is.  Just
00965     // continue in this case.
00966     if (StackTop == 1)
00967       break;
00968     
00969     // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top
00970     // elements so that our accounting is correct.
00971     unsigned RegOnTop = getStackEntry(0);
00972     unsigned RegNo = getStackEntry(1);
00973     
00974     // Swap the slots the regs are in.
00975     std::swap(RegMap[RegNo], RegMap[RegOnTop]);
00976     
00977     // Swap stack slot contents.
00978     assert(RegMap[RegOnTop] < StackTop);
00979     std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
00980     break;
00981   }
00982   case X86::FpSET_ST0_32:
00983   case X86::FpSET_ST0_64:
00984   case X86::FpSET_ST0_80:
00985     assert(StackTop == 1 && "Stack should have one element on it to return!");
00986     --StackTop;   // "Forget" we have something on the top of stack!
00987     break;
00988   case X86::MOV_Fp3232:
00989   case X86::MOV_Fp3264:
00990   case X86::MOV_Fp6432:
00991   case X86::MOV_Fp6464: 
00992   case X86::MOV_Fp3280:
00993   case X86::MOV_Fp6480:
00994   case X86::MOV_Fp8032:
00995   case X86::MOV_Fp8064: 
00996   case X86::MOV_Fp8080: {
00997     unsigned SrcReg = getFPReg(MI->getOperand(1));
00998     unsigned DestReg = getFPReg(MI->getOperand(0));
00999 
01000     if (MI->killsRegister(X86::FP0+SrcReg)) {
01001       // If the input operand is killed, we can just change the owner of the
01002       // incoming stack slot into the result.
01003       unsigned Slot = getSlot(SrcReg);
01004       assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!");
01005       Stack[Slot] = DestReg;
01006       RegMap[DestReg] = Slot;
01007 
01008     } else {
01009       // For FMOV we just duplicate the specified value to a new stack slot.
01010       // This could be made better, but would require substantial changes.
01011       duplicateToTop(SrcReg, DestReg, I);
01012     }
01013     }
01014     break;
01015   case TargetInstrInfo::INLINEASM: {
01016     // The inline asm MachineInstr currently only *uses* FP registers for the
01017     // 'f' constraint.  These should be turned into the current ST(x) register
01018     // in the machine instr.  Also, any kills should be explicitly popped after
01019     // the inline asm.
01020     unsigned Kills[7];
01021     unsigned NumKills = 0;
01022     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
01023       MachineOperand &Op = MI->getOperand(i);
01024       if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
01025         continue;
01026       assert(Op.isUse() && "Only handle inline asm uses right now");
01027       
01028       unsigned FPReg = getFPReg(Op);
01029       Op.setReg(getSTReg(FPReg));
01030       
01031       // If we kill this operand, make sure to pop it from the stack after the
01032       // asm.  We just remember it for now, and pop them all off at the end in
01033       // a batch.
01034       if (Op.isKill())
01035         Kills[NumKills++] = FPReg;
01036     }
01037 
01038     // If this asm kills any FP registers (is the last use of them) we must
01039     // explicitly emit pop instructions for them.  Do this now after the asm has
01040     // executed so that the ST(x) numbers are not off (which would happen if we
01041     // did this inline with operand rewriting).
01042     //
01043     // Note: this might be a non-optimal pop sequence.  We might be able to do
01044     // better by trying to pop in stack order or something.
01045     MachineBasicBlock::iterator InsertPt = MI;
01046     while (NumKills)
01047       freeStackSlotAfter(InsertPt, Kills[--NumKills]);
01048 
01049     // Don't delete the inline asm!
01050     return;
01051   }
01052       
01053   case X86::RET:
01054   case X86::RETI:
01055     // If RET has an FP register use operand, pass the first one in ST(0) and
01056     // the second one in ST(1).
01057     if (isStackEmpty()) return;  // Quick check to see if any are possible.
01058     
01059     // Find the register operands.
01060     unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
01061     
01062     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
01063       MachineOperand &Op = MI->getOperand(i);
01064       if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
01065         continue;
01066       // FP Register uses must be kills unless there are two uses of the same
01067       // register, in which case only one will be a kill.
01068       assert(Op.isUse() &&
01069              (Op.isKill() ||                        // Marked kill.
01070               getFPReg(Op) == FirstFPRegOp ||       // Second instance.
01071               MI->killsRegister(Op.getReg())) &&    // Later use is marked kill.
01072              "Ret only defs operands, and values aren't live beyond it");
01073 
01074       if (FirstFPRegOp == ~0U)
01075         FirstFPRegOp = getFPReg(Op);
01076       else {
01077         assert(SecondFPRegOp == ~0U && "More than two fp operands!");
01078         SecondFPRegOp = getFPReg(Op);
01079       }
01080 
01081       // Remove the operand so that later passes don't see it.
01082       MI->RemoveOperand(i);
01083       --i, --e;
01084     }
01085     
01086     // There are only four possibilities here:
01087     // 1) we are returning a single FP value.  In this case, it has to be in
01088     //    ST(0) already, so just declare success by removing the value from the
01089     //    FP Stack.
01090     if (SecondFPRegOp == ~0U) {
01091       // Assert that the top of stack contains the right FP register.
01092       assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
01093              "Top of stack not the right register for RET!");
01094       
01095       // Ok, everything is good, mark the value as not being on the stack