LLVM API Documentation

Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

LoopStrengthReduce.cpp

Go to the documentation of this file.
00001 //===- LoopStrengthReduce.cpp - Strength Reduce GEPs in Loops -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This pass performs a strength reduction on array references inside loops that
00011 // have as one or more of their components the loop induction variable.  This is
00012 // accomplished by creating a new Value to hold the initial value of the array
00013 // access for the first iteration, and then creating a new GEP instruction in
00014 // the loop to increment the value by the appropriate amount.
00015 //
00016 //===----------------------------------------------------------------------===//
00017 
00018 #define DEBUG_TYPE "loop-reduce"
00019 #include "llvm/Transforms/Scalar.h"
00020 #include "llvm/Constants.h"
00021 #include "llvm/Instructions.h"
00022 #include "llvm/IntrinsicInst.h"
00023 #include "llvm/Type.h"
00024 #include "llvm/DerivedTypes.h"
00025 #include "llvm/Analysis/Dominators.h"
00026 #include "llvm/Analysis/LoopInfo.h"
00027 #include "llvm/Analysis/LoopPass.h"
00028 #include "llvm/Analysis/ScalarEvolutionExpander.h"
00029 #include "llvm/Support/CFG.h"
00030 #include "llvm/Support/GetElementPtrTypeIterator.h"
00031 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
00032 #include "llvm/Transforms/Utils/Local.h"
00033 #include "llvm/Target/TargetData.h"
00034 #include "llvm/ADT/SetVector.h"
00035 #include "llvm/ADT/SmallPtrSet.h"
00036 #include "llvm/ADT/Statistic.h"
00037 #include "llvm/Support/Debug.h"
00038 #include "llvm/Support/Compiler.h"
00039 #include "llvm/Target/TargetLowering.h"
00040 #include <algorithm>
00041 #include <set>
00042 using namespace llvm;
00043 
00044 STATISTIC(NumReduced ,    "Number of GEPs strength reduced");
00045 STATISTIC(NumInserted,    "Number of PHIs inserted");
00046 STATISTIC(NumVariable,    "Number of PHIs with variable strides");
00047 STATISTIC(NumEliminated,  "Number of strides eliminated");
00048 STATISTIC(NumShadow,      "Number of Shadow IVs optimized");
00049 
00050 namespace {
00051 
00052   struct BasedUser;
00053 
00054   /// IVStrideUse - Keep track of one use of a strided induction variable, where
00055   /// the stride is stored externally.  The Offset member keeps track of the 
00056   /// offset from the IV, User is the actual user of the operand, and
00057   /// 'OperandValToReplace' is the operand of the User that is the use.
00058   struct VISIBILITY_HIDDEN IVStrideUse {
00059     SCEVHandle Offset;
00060     Instruction *User;
00061     Value *OperandValToReplace;
00062 
00063     // isUseOfPostIncrementedValue - True if this should use the
00064     // post-incremented version of this IV, not the preincremented version.
00065     // This can only be set in special cases, such as the terminating setcc
00066     // instruction for a loop or uses dominated by the loop.
00067     bool isUseOfPostIncrementedValue;
00068     
00069     IVStrideUse(const SCEVHandle &Offs, Instruction *U, Value *O)
00070       : Offset(Offs), User(U), OperandValToReplace(O),
00071         isUseOfPostIncrementedValue(false) {}
00072   };
00073   
00074   /// IVUsersOfOneStride - This structure keeps track of all instructions that
00075   /// have an operand that is based on the trip count multiplied by some stride.
00076   /// The stride for all of these users is common and kept external to this
00077   /// structure.
00078   struct VISIBILITY_HIDDEN IVUsersOfOneStride {
00079     /// Users - Keep track of all of the users of this stride as well as the
00080     /// initial value and the operand that uses the IV.
00081     std::vector<IVStrideUse> Users;
00082     
00083     void addUser(const SCEVHandle &Offset,Instruction *User, Value *Operand) {
00084       Users.push_back(IVStrideUse(Offset, User, Operand));
00085     }
00086   };
00087 
00088   /// IVInfo - This structure keeps track of one IV expression inserted during
00089   /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as
00090   /// well as the PHI node and increment value created for rewrite.
00091   struct VISIBILITY_HIDDEN IVExpr {
00092     SCEVHandle  Stride;
00093     SCEVHandle  Base;
00094     PHINode    *PHI;
00095     Value      *IncV;
00096 
00097     IVExpr(const SCEVHandle &stride, const SCEVHandle &base, PHINode *phi,
00098            Value *incv)
00099       : Stride(stride), Base(base), PHI(phi), IncV(incv) {}
00100   };
00101 
00102   /// IVsOfOneStride - This structure keeps track of all IV expression inserted
00103   /// during StrengthReduceStridedIVUsers for a particular stride of the IV.
00104   struct VISIBILITY_HIDDEN IVsOfOneStride {
00105     std::vector<IVExpr> IVs;
00106 
00107     void addIV(const SCEVHandle &Stride, const SCEVHandle &Base, PHINode *PHI,
00108                Value *IncV) {
00109       IVs.push_back(IVExpr(Stride, Base, PHI, IncV));
00110     }
00111   };
00112 
00113   class VISIBILITY_HIDDEN LoopStrengthReduce : public LoopPass {
00114     LoopInfo *LI;
00115     DominatorTree *DT;
00116     ScalarEvolution *SE;
00117     const TargetData *TD;
00118     const Type *UIntPtrTy;
00119     bool Changed;
00120 
00121     /// IVUsesByStride - Keep track of all uses of induction variables that we
00122     /// are interested in.  The key of the map is the stride of the access.
00123     std::map<SCEVHandle, IVUsersOfOneStride> IVUsesByStride;
00124 
00125     /// IVsByStride - Keep track of all IVs that have been inserted for a
00126     /// particular stride.
00127     std::map<SCEVHandle, IVsOfOneStride> IVsByStride;
00128 
00129     /// StrideOrder - An ordering of the keys in IVUsesByStride that is stable:
00130     /// We use this to iterate over the IVUsesByStride collection without being
00131     /// dependent on random ordering of pointers in the process.
00132     SmallVector<SCEVHandle, 16> StrideOrder;
00133 
00134     /// CastedValues - As we need to cast values to uintptr_t, this keeps track
00135     /// of the casted version of each value.  This is accessed by
00136     /// getCastedVersionOf.
00137     DenseMap<Value*, Value*> CastedPointers;
00138 
00139     /// DeadInsts - Keep track of instructions we may have made dead, so that
00140     /// we can remove them after we are done working.
00141     SetVector<Instruction*> DeadInsts;
00142 
00143     /// TLI - Keep a pointer of a TargetLowering to consult for determining
00144     /// transformation profitability.
00145     const TargetLowering *TLI;
00146 
00147   public:
00148     static char ID; // Pass ID, replacement for typeid
00149     explicit LoopStrengthReduce(const TargetLowering *tli = NULL) : 
00150       LoopPass((intptr_t)&ID), TLI(tli) {
00151     }
00152 
00153     bool runOnLoop(Loop *L, LPPassManager &LPM);
00154 
00155     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
00156       // We split critical edges, so we change the CFG.  However, we do update
00157       // many analyses if they are around.
00158       AU.addPreservedID(LoopSimplifyID);
00159       AU.addPreserved<LoopInfo>();
00160       AU.addPreserved<DominanceFrontier>();
00161       AU.addPreserved<DominatorTree>();
00162 
00163       AU.addRequiredID(LoopSimplifyID);
00164       AU.addRequired<LoopInfo>();
00165       AU.addRequired<DominatorTree>();
00166       AU.addRequired<TargetData>();
00167       AU.addRequired<ScalarEvolution>();
00168       AU.addPreserved<ScalarEvolution>();
00169     }
00170     
00171     /// getCastedVersionOf - Return the specified value casted to uintptr_t.
00172     ///
00173     Value *getCastedVersionOf(Instruction::CastOps opcode, Value *V);
00174 private:
00175     bool AddUsersIfInteresting(Instruction *I, Loop *L,
00176                                SmallPtrSet<Instruction*,16> &Processed);
00177     SCEVHandle GetExpressionSCEV(Instruction *E);
00178     ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
00179                                   IVStrideUse* &CondUse,
00180                                   const SCEVHandle* &CondStride);
00181     void OptimizeIndvars(Loop *L);
00182 
00183     /// OptimizeShadowIV - If IV is used in a int-to-float cast
00184     /// inside the loop then try to eliminate the cast opeation.
00185     void OptimizeShadowIV(Loop *L);
00186 
00187     bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
00188                            const SCEVHandle *&CondStride);
00189     bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
00190     unsigned CheckForIVReuse(bool, bool, const SCEVHandle&,
00191                              IVExpr&, const Type*,
00192                              const std::vector<BasedUser>& UsersToProcess);
00193     bool ValidStride(bool, int64_t,
00194                      const std::vector<BasedUser>& UsersToProcess);
00195     SCEVHandle CollectIVUsers(const SCEVHandle &Stride,
00196                               IVUsersOfOneStride &Uses,
00197                               Loop *L,
00198                               bool &AllUsesAreAddresses,
00199                               std::vector<BasedUser> &UsersToProcess);
00200     void StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
00201                                       IVUsersOfOneStride &Uses,
00202                                       Loop *L, bool isOnlyStride);
00203     void DeleteTriviallyDeadInstructions(SetVector<Instruction*> &Insts);
00204   };
00205 }
00206 
00207 char LoopStrengthReduce::ID = 0;
00208 static RegisterPass<LoopStrengthReduce>
00209 X("loop-reduce", "Loop Strength Reduction");
00210 
00211 LoopPass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
00212   return new LoopStrengthReduce(TLI);
00213 }
00214 
00215 /// getCastedVersionOf - Return the specified value casted to uintptr_t. This
00216 /// assumes that the Value* V is of integer or pointer type only.
00217 ///
00218 Value *LoopStrengthReduce::getCastedVersionOf(Instruction::CastOps opcode, 
00219                                               Value *V) {
00220   if (V->getType() == UIntPtrTy) return V;
00221   if (Constant *CB = dyn_cast<Constant>(V))
00222     return ConstantExpr::getCast(opcode, CB, UIntPtrTy);
00223 
00224   Value *&New = CastedPointers[V];
00225   if (New) return New;
00226   
00227   New = SCEVExpander::InsertCastOfTo(opcode, V, UIntPtrTy);
00228   DeadInsts.insert(cast<Instruction>(New));
00229   return New;
00230 }
00231 
00232 
00233 /// DeleteTriviallyDeadInstructions - If any of the instructions is the
00234 /// specified set are trivially dead, delete them and see if this makes any of
00235 /// their operands subsequently dead.
00236 void LoopStrengthReduce::
00237 DeleteTriviallyDeadInstructions(SetVector<Instruction*> &Insts) {
00238   while (!Insts.empty()) {
00239     Instruction *I = Insts.back();
00240     Insts.pop_back();
00241 
00242     if (PHINode *PN = dyn_cast<PHINode>(I)) {
00243       // If all incoming values to the Phi are the same, we can replace the Phi
00244       // with that value.
00245       if (Value *PNV = PN->hasConstantValue()) {
00246         if (Instruction *U = dyn_cast<Instruction>(PNV))
00247           Insts.insert(U);
00248         SE->deleteValueFromRecords(PN);
00249         PN->replaceAllUsesWith(PNV);
00250         PN->eraseFromParent();
00251         Changed = true;
00252         continue;
00253       }
00254     }
00255 
00256     if (isInstructionTriviallyDead(I)) {
00257       for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
00258         if (Instruction *U = dyn_cast<Instruction>(*i))
00259           Insts.insert(U);
00260       SE->deleteValueFromRecords(I);
00261       I->eraseFromParent();
00262       Changed = true;
00263     }
00264   }
00265 }
00266 
00267 
00268 /// GetExpressionSCEV - Compute and return the SCEV for the specified
00269 /// instruction.
00270 SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp) {
00271   // Pointer to pointer bitcast instructions return the same value as their
00272   // operand.
00273   if (BitCastInst *BCI = dyn_cast<BitCastInst>(Exp)) {
00274     if (SE->hasSCEV(BCI) || !isa<Instruction>(BCI->getOperand(0)))
00275       return SE->getSCEV(BCI);
00276     SCEVHandle R = GetExpressionSCEV(cast<Instruction>(BCI->getOperand(0)));
00277     SE->setSCEV(BCI, R);
00278     return R;
00279   }
00280 
00281   // Scalar Evolutions doesn't know how to compute SCEV's for GEP instructions.
00282   // If this is a GEP that SE doesn't know about, compute it now and insert it.
00283   // If this is not a GEP, or if we have already done this computation, just let
00284   // SE figure it out.
00285   GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Exp);
00286   if (!GEP || SE->hasSCEV(GEP))
00287     return SE->getSCEV(Exp);
00288     
00289   // Analyze all of the subscripts of this getelementptr instruction, looking
00290   // for uses that are determined by the trip count of the loop.  First, skip
00291   // all operands the are not dependent on the IV.
00292 
00293   // Build up the base expression.  Insert an LLVM cast of the pointer to
00294   // uintptr_t first.
00295   SCEVHandle GEPVal = SE->getUnknown(
00296       getCastedVersionOf(Instruction::PtrToInt, GEP->getOperand(0)));
00297 
00298   gep_type_iterator GTI = gep_type_begin(GEP);
00299   
00300   for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
00301        i != e; ++i, ++GTI) {
00302     // If this is a use of a recurrence that we can analyze, and it comes before
00303     // Op does in the GEP operand list, we will handle this when we process this
00304     // operand.
00305     if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
00306       const StructLayout *SL = TD->getStructLayout(STy);
00307       unsigned Idx = cast<ConstantInt>(*i)->getZExtValue();
00308       uint64_t Offset = SL->getElementOffset(Idx);
00309       GEPVal = SE->getAddExpr(GEPVal,
00310                              SE->getIntegerSCEV(Offset, UIntPtrTy));
00311     } else {
00312       unsigned GEPOpiBits = 
00313         (*i)->getType()->getPrimitiveSizeInBits();
00314       unsigned IntPtrBits = UIntPtrTy->getPrimitiveSizeInBits();
00315       Instruction::CastOps opcode = (GEPOpiBits < IntPtrBits ? 
00316           Instruction::SExt : (GEPOpiBits > IntPtrBits ? Instruction::Trunc :
00317             Instruction::BitCast));
00318       Value *OpVal = getCastedVersionOf(opcode, *i);
00319       SCEVHandle Idx = SE->getSCEV(OpVal);
00320 
00321       uint64_t TypeSize = TD->getABITypeSize(GTI.getIndexedType());
00322       if (TypeSize != 1)
00323         Idx = SE->getMulExpr(Idx,
00324                             SE->getConstant(ConstantInt::get(UIntPtrTy,
00325                                                              TypeSize)));
00326       GEPVal = SE->getAddExpr(GEPVal, Idx);
00327     }
00328   }
00329 
00330   SE->setSCEV(GEP, GEPVal);
00331   return GEPVal;
00332 }
00333 
00334 /// getSCEVStartAndStride - Compute the start and stride of this expression,
00335 /// returning false if the expression is not a start/stride pair, or true if it
00336 /// is.  The stride must be a loop invariant expression, but the start may be
00337 /// a mix of loop invariant and loop variant expressions.
00338 static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L,
00339                                   SCEVHandle &Start, SCEVHandle &Stride,
00340                                   ScalarEvolution *SE) {
00341   SCEVHandle TheAddRec = Start;   // Initialize to zero.
00342 
00343   // If the outer level is an AddExpr, the operands are all start values except
00344   // for a nested AddRecExpr.
00345   if (SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) {
00346     for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i)
00347       if (SCEVAddRecExpr *AddRec =
00348              dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) {
00349         if (AddRec->getLoop() == L)
00350           TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
00351         else
00352           return false;  // Nested IV of some sort?
00353       } else {
00354         Start = SE->getAddExpr(Start, AE->getOperand(i));
00355       }
00356         
00357   } else if (isa<SCEVAddRecExpr>(SH)) {
00358     TheAddRec = SH;
00359   } else {
00360     return false;  // not analyzable.
00361   }
00362   
00363   SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(TheAddRec);
00364   if (!AddRec || AddRec->getLoop() != L) return false;
00365   
00366   // FIXME: Generalize to non-affine IV's.
00367   if (!AddRec->isAffine()) return false;
00368 
00369   Start = SE->getAddExpr(Start, AddRec->getOperand(0));
00370   
00371   if (!isa<SCEVConstant>(AddRec->getOperand(1)))
00372     DOUT << "[" << L->getHeader()->getName()
00373          << "] Variable stride: " << *AddRec << "\n";
00374 
00375   Stride = AddRec->getOperand(1);
00376   return true;
00377 }
00378 
00379 /// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
00380 /// and now we need to decide whether the user should use the preinc or post-inc
00381 /// value.  If this user should use the post-inc version of the IV, return true.
00382 ///
00383 /// Choosing wrong here can break dominance properties (if we choose to use the
00384 /// post-inc value when we cannot) or it can end up adding extra live-ranges to
00385 /// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
00386 /// should use the post-inc value).
00387 static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
00388                                        Loop *L, DominatorTree *DT, Pass *P,
00389                                        SetVector<Instruction*> &DeadInsts){
00390   // If the user is in the loop, use the preinc value.
00391   if (L->contains(User->getParent())) return false;
00392   
00393   BasicBlock *LatchBlock = L->getLoopLatch();
00394   
00395   // Ok, the user is outside of the loop.  If it is dominated by the latch
00396   // block, use the post-inc value.
00397   if (DT->dominates(LatchBlock, User->getParent()))
00398     return true;
00399 
00400   // There is one case we have to be careful of: PHI nodes.  These little guys
00401   // can live in blocks that do not dominate the latch block, but (since their
00402   // uses occur in the predecessor block, not the block the PHI lives in) should
00403   // still use the post-inc value.  Check for this case now.
00404   PHINode *PN = dyn_cast<PHINode>(User);
00405   if (!PN) return false;  // not a phi, not dominated by latch block.
00406   
00407   // Look at all of the uses of IV by the PHI node.  If any use corresponds to
00408   // a block that is not dominated by the latch block, give up and use the
00409   // preincremented value.
00410   unsigned NumUses = 0;
00411   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
00412     if (PN->getIncomingValue(i) == IV) {
00413       ++NumUses;
00414       if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
00415         return false;
00416     }
00417 
00418   // Okay, all uses of IV by PN are in predecessor blocks that really are
00419   // dominated by the latch block.  Split the critical edges and use the
00420   // post-incremented value.
00421   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
00422     if (PN->getIncomingValue(i) == IV) {
00423       SplitCriticalEdge(PN->getIncomingBlock(i), PN->getParent(), P, false);
00424       // Splitting the critical edge can reduce the number of entries in this
00425       // PHI.
00426       e = PN->getNumIncomingValues();
00427       if (--NumUses == 0) break;
00428     }
00429 
00430   // PHI node might have become a constant value after SplitCriticalEdge.
00431   DeadInsts.insert(User);
00432   
00433   return true;
00434 }
00435 
00436   
00437 
00438 /// AddUsersIfInteresting - Inspect the specified instruction.  If it is a
00439 /// reducible SCEV, recursively add its users to the IVUsesByStride set and
00440 /// return true.  Otherwise, return false.
00441 bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L,
00442                                       SmallPtrSet<Instruction*,16> &Processed) {
00443   if (!I->getType()->isInteger() && !isa<PointerType>(I->getType()))
00444     return false;   // Void and FP expressions cannot be reduced.
00445   if (!Processed.insert(I))
00446     return true;    // Instruction already handled.
00447   
00448   // Get the symbolic expression for this instruction.
00449   SCEVHandle ISE = GetExpressionSCEV(I);
00450   if (isa<SCEVCouldNotCompute>(ISE)) return false;
00451   
00452   // Get the start and stride for this expression.
00453   SCEVHandle Start = SE->getIntegerSCEV(0, ISE->getType());
00454   SCEVHandle Stride = Start;
00455   if (!getSCEVStartAndStride(ISE, L, Start, Stride, SE))
00456     return false;  // Non-reducible symbolic expression, bail out.
00457 
00458   std::vector<Instruction *> IUsers;
00459   // Collect all I uses now because IVUseShouldUsePostIncValue may 
00460   // invalidate use_iterator.
00461   for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
00462     IUsers.push_back(cast<Instruction>(*UI));
00463 
00464   for (unsigned iused_index = 0, iused_size = IUsers.size(); 
00465        iused_index != iused_size; ++iused_index) {
00466 
00467     Instruction *User = IUsers[iused_index];
00468 
00469     // Do not infinitely recurse on PHI nodes.
00470     if (isa<PHINode>(User) && Processed.count(User))
00471       continue;
00472 
00473     // If this is an instruction defined in a nested loop, or outside this loop,
00474     // don't recurse into it.
00475     bool AddUserToIVUsers = false;
00476     if (LI->getLoopFor(User->getParent()) != L) {
00477       DOUT << "FOUND USER in other loop: " << *User
00478            << "   OF SCEV: " << *ISE << "\n";
00479       AddUserToIVUsers = true;
00480     } else if (!AddUsersIfInteresting(User, L, Processed)) {
00481       DOUT << "FOUND USER: " << *User
00482            << "   OF SCEV: " << *ISE << "\n";
00483       AddUserToIVUsers = true;
00484     }
00485 
00486     if (AddUserToIVUsers) {
00487       IVUsersOfOneStride &StrideUses = IVUsesByStride[Stride];
00488       if (StrideUses.Users.empty())     // First occurance of this stride?
00489         StrideOrder.push_back(Stride);
00490       
00491       // Okay, we found a user that we cannot reduce.  Analyze the instruction
00492       // and decide what to do with it.  If we are a use inside of the loop, use
00493       // the value before incrementation, otherwise use it after incrementation.
00494       if (IVUseShouldUsePostIncValue(User, I, L, DT, this, DeadInsts)) {
00495         // The value used will be incremented by the stride more than we are
00496         // expecting, so subtract this off.
00497         SCEVHandle NewStart = SE->getMinusSCEV(Start, Stride);
00498         StrideUses.addUser(NewStart, User, I);
00499         StrideUses.Users.back().isUseOfPostIncrementedValue = true;
00500         DOUT << "   USING POSTINC SCEV, START=" << *NewStart<< "\n";
00501       } else {        
00502         StrideUses.addUser(Start, User, I);
00503       }
00504     }
00505   }
00506   return true;
00507 }
00508 
00509 namespace {
00510   /// BasedUser - For a particular base value, keep information about how we've
00511   /// partitioned the expression so far.
00512   struct BasedUser {
00513     /// SE - The current ScalarEvolution object.
00514     ScalarEvolution *SE;
00515 
00516     /// Base - The Base value for the PHI node that needs to be inserted for
00517     /// this use.  As the use is processed, information gets moved from this
00518     /// field to the Imm field (below).  BasedUser values are sorted by this
00519     /// field.
00520     SCEVHandle Base;
00521     
00522     /// Inst - The instruction using the induction variable.
00523     Instruction *Inst;
00524 
00525     /// OperandValToReplace - The operand value of Inst to replace with the
00526     /// EmittedBase.
00527     Value *OperandValToReplace;
00528 
00529     /// Imm - The immediate value that should be added to the base immediately
00530     /// before Inst, because it will be folded into the imm field of the
00531     /// instruction.
00532     SCEVHandle Imm;
00533 
00534     /// EmittedBase - The actual value* to use for the base value of this
00535     /// operation.  This is null if we should just use zero so far.
00536     Value *EmittedBase;
00537 
00538     // isUseOfPostIncrementedValue - True if this should use the
00539     // post-incremented version of this IV, not the preincremented version.
00540     // This can only be set in special cases, such as the terminating setcc
00541     // instruction for a loop and uses outside the loop that are dominated by
00542     // the loop.
00543     bool isUseOfPostIncrementedValue;
00544     
00545     BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
00546       : SE(se), Base(IVSU.Offset), Inst(IVSU.User), 
00547         OperandValToReplace(IVSU.OperandValToReplace), 
00548         Imm(SE->getIntegerSCEV(0, Base->getType())), EmittedBase(0),
00549         isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue) {}
00550 
00551     // Once we rewrite the code to insert the new IVs we want, update the
00552     // operands of Inst to use the new expression 'NewBase', with 'Imm' added
00553     // to it.
00554     void RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
00555                                         Instruction *InsertPt,
00556                                        SCEVExpander &Rewriter, Loop *L, Pass *P,
00557                                        SetVector<Instruction*> &DeadInsts);
00558     
00559     Value *InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, 
00560                                        SCEVExpander &Rewriter,
00561                                        Instruction *IP, Loop *L);
00562     void dump() const;
00563   };
00564 }
00565 
00566 void BasedUser::dump() const {
00567   cerr << " Base=" << *Base;
00568   cerr << " Imm=" << *Imm;
00569   if (EmittedBase)
00570     cerr << "  EB=" << *EmittedBase;
00571 
00572   cerr << "   Inst: " << *Inst;
00573 }
00574 
00575 Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, 
00576                                               SCEVExpander &Rewriter,
00577                                               Instruction *IP, Loop *L) {
00578   // Figure out where we *really* want to insert this code.  In particular, if
00579   // the user is inside of a loop that is nested inside of L, we really don't
00580   // want to insert this expression before the user, we'd rather pull it out as
00581   // many loops as possible.
00582   LoopInfo &LI = Rewriter.getLoopInfo();
00583   Instruction *BaseInsertPt = IP;
00584   
00585   // Figure out the most-nested loop that IP is in.
00586   Loop *InsertLoop = LI.getLoopFor(IP->getParent());
00587   
00588   // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out
00589   // the preheader of the outer-most loop where NewBase is not loop invariant.
00590   while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) {
00591     BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator();
00592     InsertLoop = InsertLoop->getParentLoop();
00593   }
00594   
00595   // If there is no immediate value, skip the next part.
00596   if (Imm->isZero())
00597     return Rewriter.expandCodeFor(NewBase, BaseInsertPt);
00598 
00599   Value *Base = Rewriter.expandCodeFor(NewBase, BaseInsertPt);
00600 
00601   // If we are inserting the base and imm values in the same block, make sure to
00602   // adjust the IP position if insertion reused a result.
00603   if (IP == BaseInsertPt)
00604     IP = Rewriter.getInsertionPoint();
00605   
00606   // Always emit the immediate (if non-zero) into the same block as the user.
00607   SCEVHandle NewValSCEV = SE->getAddExpr(SE->getUnknown(Base), Imm);
00608   return Rewriter.expandCodeFor(NewValSCEV, IP);
00609   
00610 }
00611 
00612 
00613 // Once we rewrite the code to insert the new IVs we want, update the
00614 // operands of Inst to use the new expression 'NewBase', with 'Imm' added
00615 // to it. NewBasePt is the last instruction which contributes to the
00616 // value of NewBase in the case that it's a diffferent instruction from
00617 // the PHI that NewBase is computed from, or null otherwise.
00618 //
00619 void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
00620                                                Instruction *NewBasePt,
00621                                       SCEVExpander &Rewriter, Loop *L, Pass *P,
00622                                       SetVector<Instruction*> &DeadInsts) {
00623   if (!isa<PHINode>(Inst)) {
00624     // By default, insert code at the user instruction.
00625     BasicBlock::iterator InsertPt = Inst;
00626     
00627     // However, if the Operand is itself an instruction, the (potentially
00628     // complex) inserted code may be shared by many users.  Because of this, we
00629     // want to emit code for the computation of the operand right before its old
00630     // computation.  This is usually safe, because we obviously used to use the
00631     // computation when it was computed in its current block.  However, in some
00632     // cases (e.g. use of a post-incremented induction variable) the NewBase
00633     // value will be pinned to live somewhere after the original computation.
00634     // In this case, we have to back off.
00635     if (!isUseOfPostIncrementedValue) {
00636       if (NewBasePt && isa<PHINode>(OperandValToReplace)) {
00637         InsertPt = NewBasePt;
00638         ++InsertPt;
00639       } else if (Instruction *OpInst
00640                  = dyn_cast<Instruction>(OperandValToReplace)) {
00641         InsertPt = OpInst;
00642         while (isa<PHINode>(InsertPt)) ++InsertPt;
00643       }
00644     }
00645     Value *NewVal = InsertCodeForBaseAtPosition(NewBase, Rewriter, InsertPt, L);
00646     // Adjust the type back to match the Inst. Note that we can't use InsertPt
00647     // here because the SCEVExpander may have inserted the instructions after
00648     // that point, in its efforts to avoid inserting redundant expressions.
00649     if (isa<PointerType>(OperandValToReplace->getType())) {
00650       NewVal = SCEVExpander::InsertCastOfTo(Instruction::IntToPtr,
00651                                             NewVal,
00652                                             OperandValToReplace->getType());
00653     }
00654     // Replace the use of the operand Value with the new Phi we just created.
00655     Inst->replaceUsesOfWith(OperandValToReplace, NewVal);
00656     DOUT << "    CHANGED: IMM =" << *Imm;
00657     DOUT << "  \tNEWBASE =" << *NewBase;
00658     DOUT << "  \tInst = " << *Inst;
00659     return;
00660   }
00661   
00662   // PHI nodes are more complex.  We have to insert one copy of the NewBase+Imm
00663   // expression into each operand block that uses it.  Note that PHI nodes can
00664   // have multiple entries for the same predecessor.  We use a map to make sure
00665   // that a PHI node only has a single Value* for each predecessor (which also
00666   // prevents us from inserting duplicate code in some blocks).
00667   DenseMap<BasicBlock*, Value*> InsertedCode;
00668   PHINode *PN = cast<PHINode>(Inst);
00669   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
00670     if (PN->getIncomingValue(i) == OperandValToReplace) {
00671       // If this is a critical edge, split the edge so that we do not insert the
00672       // code on all predecessor/successor paths.  We do this unless this is the
00673       // canonical backedge for this loop, as this can make some inserted code
00674       // be in an illegal position.
00675       BasicBlock *PHIPred = PN->getIncomingBlock(i);
00676       if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
00677           (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
00678         
00679         // First step, split the critical edge.
00680         SplitCriticalEdge(PHIPred, PN->getParent(), P, false);
00681             
00682         // Next step: move the basic block.  In particular, if the PHI node
00683         // is outside of the loop, and PredTI is in the loop, we want to
00684         // move the block to be immediately before the PHI block, not
00685         // immediately after PredTI.
00686         if (L->contains(PHIPred) && !L->contains(PN->getParent())) {
00687           BasicBlock *NewBB = PN->getIncomingBlock(i);
00688           NewBB->moveBefore(PN->getParent());
00689         }
00690         
00691         // Splitting the edge can reduce the number of PHI entries we have.
00692         e = PN->getNumIncomingValues();
00693       }
00694 
00695       Value *&Code = InsertedCode[PN->getIncomingBlock(i)];
00696       if (!Code) {
00697         // Insert the code into the end of the predecessor block.
00698         Instruction *InsertPt = PN->getIncomingBlock(i)->getTerminator();
00699         Code = InsertCodeForBaseAtPosition(NewBase, Rewriter, InsertPt, L);
00700 
00701         // Adjust the type back to match the PHI. Note that we can't use
00702         // InsertPt here because the SCEVExpander may have inserted its
00703         // instructions after that point, in its efforts to avoid inserting
00704         // redundant expressions.
00705         if (isa<PointerType>(PN->getType())) {
00706           Code = SCEVExpander::InsertCastOfTo(Instruction::IntToPtr,
00707                                               Code,
00708                                               PN->getType());
00709         }
00710       }
00711       
00712       // Replace the use of the operand Value with the new Phi we just created.
00713       PN->setIncomingValue(i, Code);
00714       Rewriter.clear();
00715     }
00716   }
00717 
00718   // PHI node might have become a constant value after SplitCriticalEdge.
00719   DeadInsts.insert(Inst);
00720 
00721   DOUT << "    CHANGED: IMM =" << *Imm << "  Inst = " << *Inst;
00722 }
00723 
00724 
00725 /// isTargetConstant - Return true if the following can be referenced by the
00726 /// immediate field of a target instruction.
00727 static bool isTargetConstant(const SCEVHandle &V, const Type *UseTy,
00728                              const TargetLowering *TLI) {
00729   if (SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) {
00730     int64_t VC = SC->getValue()->getSExtValue();
00731     if (TLI) {
00732       TargetLowering::AddrMode AM;
00733       AM.BaseOffs = VC;
00734       return TLI->isLegalAddressingMode(AM, UseTy);
00735     } else {
00736       // Defaults to PPC. PPC allows a sign-extended 16-bit immediate field.
00737       return (VC > -(1 << 16) && VC < (1 << 16)-1);
00738     }
00739   }
00740 
00741   if (SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V))
00742     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(SU->getValue()))
00743       if (TLI && CE->getOpcode() == Instruction::PtrToInt) {
00744         Constant *Op0 = CE->getOperand(0);
00745         if (GlobalValue *GV = dyn_cast<GlobalValue>(Op0)) {
00746           TargetLowering::AddrMode AM;
00747           AM.BaseGV = GV;
00748           return TLI->isLegalAddressingMode(AM, UseTy);
00749         }
00750       }
00751   return false;
00752 }
00753 
00754 /// MoveLoopVariantsToImediateField - Move any subexpressions from Val that are
00755 /// loop varying to the Imm operand.
00756 static void MoveLoopVariantsToImediateField(SCEVHandle &Val, SCEVHandle &Imm,
00757                                             Loop *L, ScalarEvolution *SE) {
00758   if (Val->isLoopInvariant(L)) return;  // Nothing to do.
00759   
00760   if (SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
00761     std::vector<SCEVHandle> NewOps;
00762     NewOps.reserve(SAE->getNumOperands());
00763     
00764     for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
00765       if (!SAE->getOperand(i)->isLoopInvariant(L)) {
00766         // If this is a loop-variant expression, it must stay in the immediate
00767         // field of the expression.
00768         Imm = SE->getAddExpr(Imm, SAE->getOperand(i));
00769       } else {
00770         NewOps.push_back(SAE->getOperand(i));
00771       }
00772 
00773     if (NewOps.empty())
00774       Val = SE->getIntegerSCEV(0, Val->getType());
00775     else
00776       Val = SE->getAddExpr(NewOps);
00777   } else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
00778     // Try to pull immediates out of the start value of nested addrec's.
00779     SCEVHandle Start = SARE->getStart();
00780     MoveLoopVariantsToImediateField(Start, Imm, L, SE);
00781     
00782     std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
00783     Ops[0] = Start;
00784     Val = SE->getAddRecExpr(Ops, SARE->getLoop());
00785   } else {
00786     // Otherwise, all of Val is variant, move the whole thing over.
00787     Imm = SE->getAddExpr(Imm, Val);
00788     Val = SE->getIntegerSCEV(0, Val->getType());
00789   }
00790 }
00791 
00792 
00793 /// MoveImmediateValues - Look at Val, and pull out any additions of constants
00794 /// that can fit into the immediate field of instructions in the target.
00795 /// Accumulate these immediate values into the Imm value.
00796 static void MoveImmediateValues(const TargetLowering *TLI,
00797                                 Instruction *User,
00798                                 SCEVHandle &Val, SCEVHandle &Imm,
00799                                 bool isAddress, Loop *L,
00800                                 ScalarEvolution *SE) {
00801   const Type *UseTy = User->getType();
00802   if (StoreInst *SI = dyn_cast<StoreInst>(User))
00803     UseTy = SI->getOperand(0)->getType();
00804 
00805   if (SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
00806     std::vector<SCEVHandle> NewOps;
00807     NewOps.reserve(SAE->getNumOperands());
00808     
00809     for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
00810       SCEVHandle NewOp = SAE->getOperand(i);
00811       MoveImmediateValues(TLI, User, NewOp, Imm, isAddress, L, SE);
00812       
00813       if (!NewOp->isLoopInvariant(L)) {
00814         // If this is a loop-variant expression, it must stay in the immediate
00815         // field of the expression.
00816         Imm = SE->getAddExpr(Imm, NewOp);
00817       } else {
00818         NewOps.push_back(NewOp);
00819       }
00820     }
00821 
00822     if (NewOps.empty())
00823       Val = SE->getIntegerSCEV(0, Val->getType());
00824     else
00825       Val = SE->getAddExpr(NewOps);
00826     return;
00827   } else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
00828     // Try to pull immediates out of the start value of nested addrec's.
00829     SCEVHandle Start = SARE->getStart();
00830     MoveImmediateValues(TLI, User, Start, Imm, isAddress, L, SE);
00831     
00832     if (Start != SARE->getStart()) {
00833       std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
00834       Ops[0] = Start;
00835       Val = SE->getAddRecExpr(Ops, SARE->getLoop());
00836     }
00837     return;
00838   } else if (SCEVMulExpr *SME = dyn_cast<SCEVMulExpr>(Val)) {
00839     // Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field.
00840     if (isAddress && isTargetConstant(SME->getOperand(0), UseTy, TLI) &&
00841         SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) {
00842 
00843       SCEVHandle SubImm = SE->getIntegerSCEV(0, Val->getType());
00844       SCEVHandle NewOp = SME->getOperand(1);
00845       MoveImmediateValues(TLI, User, NewOp, SubImm, isAddress, L, SE);
00846       
00847       // If we extracted something out of the subexpressions, see if we can 
00848       // simplify this!
00849       if (NewOp != SME->getOperand(1)) {
00850         // Scale SubImm up by "8".  If the result is a target constant, we are
00851         // good.
00852         SubImm = SE->getMulExpr(SubImm, SME->getOperand(0));
00853         if (isTargetConstant(SubImm, UseTy, TLI)) {
00854           // Accumulate the immediate.
00855           Imm = SE->getAddExpr(Imm, SubImm);
00856           
00857           // Update what is left of 'Val'.
00858           Val = SE->getMulExpr(SME->getOperand(0), NewOp);
00859           return;
00860         }
00861       }
00862     }
00863   }
00864 
00865   // Loop-variant expressions must stay in the immediate field of the
00866   // expression.
00867   if ((isAddress && isTargetConstant(Val, UseTy, TLI)) ||
00868       !Val->isLoopInvariant(L)) {
00869     Imm = SE->getAddExpr(Imm, Val);
00870     Val = SE->getIntegerSCEV(0, Val->getType());
00871     return;
00872   }
00873 
00874   // Otherwise, no immediates to move.
00875 }
00876 
00877 
00878 /// SeparateSubExprs - Decompose Expr into all of the subexpressions that are
00879 /// added together.  This is used to reassociate common addition subexprs
00880 /// together for maximal sharing when rewriting bases.
00881 static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs,
00882                              SCEVHandle Expr,
00883                              ScalarEvolution *SE) {
00884   if (SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) {
00885     for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j)
00886       SeparateSubExprs(SubExprs, AE->getOperand(j), SE);
00887   } else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Expr)) {
00888     SCEVHandle Zero = SE->getIntegerSCEV(0, Expr->getType());
00889     if (SARE->getOperand(0) == Zero) {
00890       SubExprs.push_back(Expr);
00891     } else {
00892       // Compute the addrec with zero as its base.
00893       std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
00894       Ops[0] = Zero;   // Start with zero base.
00895       SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
00896       
00897 
00898       SeparateSubExprs(SubExprs, SARE->getOperand(0), SE);
00899     }
00900   } else if (!Expr->isZero()) {
00901     // Do not add zero.
00902     SubExprs.push_back(Expr);
00903   }
00904 }
00905 
00906 
00907 /// RemoveCommonExpressionsFromUseBases - Look through all of the uses in Bases,
00908 /// removing any common subexpressions from it.  Anything truly common is
00909 /// removed, accumulated, and returned.  This looks for things like (a+b+c) and
00910 /// (a+c+d) -> (a+c).  The common expression is *removed* from the Bases.
00911 static SCEVHandle 
00912 RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
00913                                     ScalarEvolution *SE) {
00914   unsigned NumUses = Uses.size();
00915 
00916   // Only one use?  Use its base, regardless of what it is!
00917   SCEVHandle Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType());
00918   SCEVHandle Result = Zero;
00919   if (NumUses == 1) {
00920     std::swap(Result, Uses[0].Base);
00921     return Result;
00922   }
00923 
00924   // To find common subexpressions, count how many of Uses use each expression.
00925   // If any subexpressions are used Uses.size() times, they are common.
00926   std::map<SCEVHandle, unsigned> SubExpressionUseCounts;
00927   
00928   // UniqueSubExprs - Keep track of all of the subexpressions we see in the
00929   // order we see them.
00930   std::vector<SCEVHandle> UniqueSubExprs;
00931 
00932   std::vector<SCEVHandle> SubExprs;
00933   for (unsigned i = 0; i != NumUses; ++i) {
00934     // If the base is zero (which is common), return zero now, there are no
00935     // CSEs we can find.
00936     if (Uses[i].Base == Zero) return Zero;
00937 
00938     // Split the expression into subexprs.
00939     SeparateSubExprs(SubExprs, Uses[i].Base, SE);
00940     // Add one to SubExpressionUseCounts for each subexpr present.
00941     for (unsigned j = 0, e = SubExprs.size(); j != e; ++j)
00942       if (++SubExpressionUseCounts[SubExprs[j]] == 1)
00943         UniqueSubExprs.push_back(SubExprs[j]);
00944     SubExprs.clear();
00945   }
00946 
00947   // Now that we know how many times each is used, build Result.  Iterate over
00948   // UniqueSubexprs so that we have a stable ordering.
00949   for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) {
00950     std::map<SCEVHandle, unsigned>::iterator I = 
00951        SubExpressionUseCounts.find(UniqueSubExprs[i]);
00952     assert(I != SubExpressionUseCounts.end() && "Entry not found?");
00953     if (I->second == NumUses) {  // Found CSE!
00954       Result = SE->getAddExpr(Result, I->first);
00955     } else {
00956       // Remove non-cse's from SubExpressionUseCounts.
00957       SubExpressionUseCounts.erase(I);
00958     }
00959   }
00960   
00961   // If we found no CSE's, return now.
00962   if (Result == Zero) return Result;
00963   
00964   // Otherwise, remove all of the CSE's we found from each of the base values.
00965   for (unsigned i = 0; i != NumUses; ++i) {
00966     // Split the expression into subexprs.
00967     SeparateSubExprs(SubExprs, Uses[i].Base, SE);
00968 
00969     // Remove any common subexpressions.
00970     for (unsigned j = 0, e = SubExprs.size(); j != e; ++j)
00971       if (SubExpressionUseCounts.count(SubExprs[j])) {
00972         SubExprs.erase(SubExprs.begin()+j);
00973         --j; --e;
00974       }
00975     
00976     // Finally, the non-shared expressions together.
00977     if (SubExprs.empty())
00978       Uses[i].Base = Zero;
00979     else
00980       Uses[i].Base = SE->getAddExpr(SubExprs);
00981     SubExprs.clear();
00982   }
00983  
00984   return Result;
00985 }
00986 
00987 /// ValidStride - Check whether the given Scale is valid for all loads and 
00988 /// stores in UsersToProcess.
00989 ///
00990 bool LoopStrengthReduce::ValidStride(bool HasBaseReg,
00991                                int64_t Scale, 
00992                                const std::vector<BasedUser>& UsersToProcess) {
00993   if (!TLI)
00994     return true;
00995 
00996   for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) {
00997     // If this is a load or other access, pass the type of the access in.
00998     const Type *AccessTy = Type::VoidTy;
00999     if (StoreInst *SI = dyn_cast<StoreInst>(UsersToProcess[i].Inst))
01000       AccessTy = SI->getOperand(0)->getType();
01001     else if (LoadInst *LI = dyn_cast<LoadInst>(UsersToProcess[i].Inst))
01002       AccessTy = LI->getType();
01003     else if (isa<PHINode>(UsersToProcess[i].Inst))
01004       continue;
01005     
01006     TargetLowering::AddrMode AM;
01007     if (SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
01008       AM.BaseOffs = SC->getValue()->getSExtValue();
01009     AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero();
01010     AM.Scale = Scale;
01011 
01012     // If load[imm+r*scale] is illegal, bail out.
01013     if (!TLI->isLegalAddressingMode(AM, AccessTy))
01014       return false;
01015   }
01016   return true;
01017 }
01018 
01019 /// RequiresTypeConversion - Returns true if converting Ty to NewTy is not
01020 /// a nop.
01021 bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
01022                                                 const Type *Ty2) {
01023   if (Ty1 == Ty2)
01024     return false;
01025   if (TLI && TLI->isTruncateFree(Ty1, Ty2))
01026     return false;
01027   return (!Ty1->canLosslesslyBitCastTo(Ty2) &&
01028           !(isa<PointerType>(Ty2) &&
01029             Ty1->canLosslesslyBitCastTo(UIntPtrTy)) &&
01030           !(isa<PointerType>(Ty1) &&
01031             Ty2->canLosslesslyBitCastTo(UIntPtrTy)));
01032 }
01033 
01034 /// CheckForIVReuse - Returns the multiple if the stride is the multiple
01035 /// of a previous stride and it is a legal value for the target addressing
01036 /// mode scale component and optional base reg. This allows the users of
01037 /// this stride to be rewritten as prev iv * factor. It returns 0 if no
01038 /// reuse is possible.
01039 unsigned LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
01040                                 bool AllUsesAreAddresses,
01041                                 const SCEVHandle &Stride, 
01042                                 IVExpr &IV, const Type *Ty,
01043                                 const std::vector<BasedUser>& UsersToProcess) {
01044   if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
01045     int64_t SInt = SC->getValue()->getSExtValue();
01046     for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e;
01047          ++NewStride) {
01048       std::map<SCEVHandle, IVsOfOneStride>::iterator SI = 
01049                 IVsByStride.find(StrideOrder[NewStride]);
01050       if (SI == IVsByStride.end()) 
01051         continue;
01052       int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
01053       if (SI->first != Stride &&
01054           (unsigned(abs(SInt)) < SSInt || (SInt % SSInt) != 0))
01055         continue;
01056       int64_t Scale = SInt / SSInt;
01057       // Check that this stride is valid for all the types used for loads and
01058       // stores; if it can be used for some and not others, we might as well use
01059       // the original stride everywhere, since we have to create the IV for it
01060       // anyway. If the scale is 1, then we don't need to worry about folding
01061       // multiplications.
01062       if (Scale == 1 ||
01063           (AllUsesAreAddresses &&
01064            ValidStride(HasBaseReg, Scale, UsersToProcess)))
01065         for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
01066                IE = SI->second.IVs.end(); II != IE; ++II)
01067           // FIXME: Only handle base == 0 for now.
01068           // Only reuse previous IV if it would not require a type conversion.
01069           if (II->Base->isZero() &&
01070               !RequiresTypeConversion(II->Base->getType(), Ty)) {
01071             IV = *II;
01072             return Scale;
01073           }
01074     }
01075   }
01076   return 0;
01077 }
01078 
01079 /// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that
01080 /// returns true if Val's isUseOfPostIncrementedValue is true.
01081 static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {
01082   return Val.isUseOfPostIncrementedValue;
01083 }
01084 
01085 /// isNonConstantNegative - Return true if the specified scev is negated, but
01086 /// not a constant.
01087 static bool isNonConstantNegative(const SCEVHandle &Expr) {
01088   SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
01089   if (!Mul) return false;
01090   
01091   // If there is a constant factor, it will be first.
01092   SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
01093   if (!SC) return false;
01094   
01095   // Return true if the value is negative, this matches things like (-42 * V).
01096   return SC->getValue()->getValue().isNegative();
01097 }
01098 
01099 /// isAddress - Returns true if the specified instruction is using the
01100 /// specified value as an address.
01101 static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
01102   bool isAddress = isa<LoadInst>(Inst);
01103   if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
01104     if (SI->getOperand(1) == OperandVal)
01105       isAddress = true;
01106   } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
01107     // Addressing modes can also be folded into prefetches and a variety
01108     // of intrinsics.
01109     switch (II->getIntrinsicID()) {
01110       default: break;
01111       case Intrinsic::prefetch:
01112       case Intrinsic::x86_sse2_loadu_dq:
01113       case Intrinsic::x86_sse2_loadu_pd:
01114       case Intrinsic::x86_sse_loadu_ps:
01115       case Intrinsic::x86_sse_storeu_ps:
01116       case Intrinsic::x86_sse2_storeu_pd:
01117       case Intrinsic::x86_sse2_storeu_dq:
01118       case Intrinsic::x86_sse2_storel_dq:
01119         if (II->getOperand(1) == OperandVal)
01120           isAddress = true;
01121         break;
01122     }
01123   }
01124   return isAddress;
01125 }
01126 
01127 // CollectIVUsers - Transform our list of users and offsets to a bit more
01128 // complex table. In this new vector, each 'BasedUser' contains 'Base', the base
01129 // of the strided accesses, as well as the old information from Uses. We
01130 // progressively move information from the Base field to the Imm field, until
01131 // we eventually have the full access expression to rewrite the use.
01132 SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride,
01133                                               IVUsersOfOneStride &Uses,
01134                                               Loop *L,
01135                                               bool &AllUsesAreAddresses,
01136                                        std::vector<BasedUser> &UsersToProcess) {
01137   UsersToProcess.reserve(Uses.Users.size());
01138   for (unsigned i = 0, e = Uses.Users.size(); i != e; ++i) {
01139     UsersToProcess.push_back(BasedUser(Uses.Users[i], SE));
01140     
01141     // Move any loop invariant operands from the offset field to the immediate
01142     // field of the use, so that we don't try to use something before it is
01143     // computed.
01144     MoveLoopVariantsToImediateField(UsersToProcess.back().Base,
01145                                     UsersToProcess.back().Imm, L, SE);
01146     assert(UsersToProcess.back().Base->isLoopInvariant(L) &&
01147            "Base value is not loop invariant!");
01148   }
01149 
01150   // We now have a whole bunch of uses of like-strided induction variables, but
01151   // they might all have different bases.  We want to emit one PHI node for this
01152   // stride which we fold as many common expressions (between the IVs) into as
01153   // possible.  Start by identifying the common expressions in the base values 
01154   // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
01155   // "A+B"), emit it to the preheader, then remove the expression from the
01156   // UsersToProcess base values.
01157   SCEVHandle CommonExprs =
01158     RemoveCommonExpressionsFromUseBases(UsersToProcess, SE);
01159 
01160   // Next, figure out what we can represent in the immediate fields of
01161   // instructions.  If we can represent anything there, move it to the imm
01162   // fields of the BasedUsers.  We do this so that it increases the commonality
01163   // of the remaining uses.
01164   unsigned NumPHI = 0;
01165   for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
01166     // If the user is not in the current loop, this means it is using the exit
01167     // value of the IV.  Do not put anything in the base, make sure it's all in
01168     // the immediate field to allow as much factoring as possible.
01169     if (!L->contains(UsersToProcess[i].Inst->getParent())) {
01170       UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm,
01171                                              UsersToProcess[i].Base);
01172       UsersToProcess[i].Base = 
01173         SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType());
01174     } else {
01175       
01176       // Addressing modes can be folded into loads and stores.  Be careful that
01177       // the store is through the expression, not of the expression though.
01178       bool isPHI = false;
01179       bool isAddress = isAddressUse(UsersToProcess[i].Inst,
01180                                     UsersToProcess[i].OperandValToReplace);
01181       if (isa<PHINode>(UsersToProcess[i].Inst)) {
01182         isPHI = true;
01183         ++NumPHI;
01184       }
01185 
01186       // If this use isn't an address, then not all uses are addresses.
01187       if (!isAddress && !isPHI)
01188         AllUsesAreAddresses = false;
01189       
01190       MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
01191                           UsersToProcess[i].Imm, isAddress, L, SE);
01192     }
01193   }
01194 
01195   // If one of the use if a PHI node and all other uses are addresses, still
01196   // allow iv reuse. Essentially we are trading one constant multiplication
01197   // for one fewer iv.
01198   if (NumPHI > 1)
01199     AllUsesAreAddresses = false;
01200 
01201   return CommonExprs;
01202 }
01203 
01204 /// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
01205 /// stride of IV.  All of the users may have different starting values, and this
01206 /// may not be the only stride (we know it is if isOnlyStride is true).
01207 void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
01208                                                       IVUsersOfOneStride &Uses,
01209                                                       Loop *L,
01210                                                       bool isOnlyStride) {
01211   // If all the users are moved to another stride, then there is nothing to do.
01212   if (Uses.Users.empty())
01213     return;
01214 
01215   // Keep track if every use in UsersToProcess is an address. If they all are,
01216   // we may be able to rewrite the entire collection of them in terms of a
01217   // smaller-stride IV.
01218   bool AllUsesAreAddresses = true;
01219 
01220   // Transform our list of users and offsets to a bit more complex table.  In
01221   // this new vector, each 'BasedUser' contains 'Base' the base of the
01222   // strided accessas well as the old information from Uses.  We progressively
01223   // move information from the Base field to the Imm field, until we eventually
01224   // have the full access expression to rewrite the use.
01225   std::vector<BasedUser> UsersToProcess;
01226   SCEVHandle CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
01227                                           UsersToProcess);
01228 
01229   // If we managed to find some expressions in common, we'll need to carry
01230   // their value in a register and add it in for each use. This will take up
01231   // a register operand, which potentially restricts what stride values are
01232   // valid.
01233   bool HaveCommonExprs = !CommonExprs->isZero();
01234   
01235   // If all uses are addresses, check if it is possible to reuse an IV with a
01236   // stride that is a factor of this stride. And that the multiple is a number
01237   // that can be encoded in the scale field of the target addressing mode. And
01238   // that we will have a valid instruction after this substition, including the
01239   // immediate field, if any.
01240   PHINode *NewPHI = NULL;
01241   Value   *IncV   = NULL;
01242   IVExpr   ReuseIV(SE->getIntegerSCEV(0, Type::Int32Ty),
01243                    SE->getIntegerSCEV(0, Type::Int32Ty),
01244                    0, 0);
01245   unsigned RewriteFactor = 0;
01246   RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
01247                                   Stride, ReuseIV, CommonExprs->getType(),
01248                                   UsersToProcess);
01249   if (RewriteFactor != 0) {
01250     DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride
01251          << " and BASE " << *ReuseIV.Base << " :\n";
01252     NewPHI = ReuseIV.PHI;
01253     IncV   = ReuseIV.IncV;
01254   }
01255 
01256   const Type *ReplacedTy = CommonExprs->getType();
01257   
01258   // Now that we know what we need to do, insert the PHI node itself.
01259   //
01260   DOUT << "INSERTING IV of TYPE " << *ReplacedTy << " of STRIDE "
01261        << *Stride << " and BASE " << *CommonExprs << ": ";
01262 
01263   SCEVExpander Rewriter(*SE, *LI);
01264   SCEVExpander PreheaderRewriter(*SE, *LI);
01265   
01266   BasicBlock  *Preheader = L->getLoopPreheader();
01267   Instruction *PreInsertPt = Preheader->getTerminator();
01268   Instruction *PhiInsertBefore = L->getHeader()->begin();
01269   
01270   BasicBlock *LatchBlock = L->getLoopLatch();
01271 
01272 
01273   // Emit the initial base value into the loop preheader.
01274   Value *CommonBaseV
01275     = PreheaderRewriter.expandCodeFor(CommonExprs, PreInsertPt);
01276 
01277   if (RewriteFactor == 0) {
01278     // Create a new Phi for this base, and stick it in the loop header.
01279     NewPHI = PHINode::Create(ReplacedTy, "iv.", PhiInsertBefore);
01280     ++NumInserted;
01281   
01282     // Add common base to the new Phi node.
01283     NewPHI->addIncoming(CommonBaseV, Preheader);
01284 
01285     // If the stride is negative, insert a sub instead of an add for the
01286     // increment.
01287     bool isNegative = isNonConstantNegative(Stride);
01288     SCEVHandle IncAmount = Stride;
01289     if (isNegative)
01290       IncAmount = SE->getNegativeSCEV(Stride);
01291     
01292