LLVM API Documentation

ScalarReplAggregates.cpp

Go to the documentation of this file.
00001 //===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This transformation implements the well known scalar replacement of
00011 // aggregates transformation.  This xform breaks up alloca instructions of
00012 // aggregate type (structure or array) into individual alloca instructions for
00013 // each member (if possible).  Then, if possible, it transforms the individual
00014 // alloca instructions into nice clean scalar SSA form.
00015 //
00016 // This combines a simple SRoA algorithm with the Mem2Reg algorithm because
00017 // often interact, especially for C++ programs.  As such, iterating between
00018 // SRoA, then Mem2Reg until we run out of things to promote works well.
00019 //
00020 //===----------------------------------------------------------------------===//
00021 
00022 #define DEBUG_TYPE "scalarrepl"
00023 #include "llvm/Transforms/Scalar.h"
00024 #include "llvm/Constants.h"
00025 #include "llvm/DerivedTypes.h"
00026 #include "llvm/Function.h"
00027 #include "llvm/GlobalVariable.h"
00028 #include "llvm/Instructions.h"
00029 #include "llvm/IntrinsicInst.h"
00030 #include "llvm/Pass.h"
00031 #include "llvm/Analysis/Dominators.h"
00032 #include "llvm/Target/TargetData.h"
00033 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
00034 #include "llvm/Support/Debug.h"
00035 #include "llvm/Support/GetElementPtrTypeIterator.h"
00036 #include "llvm/Support/MathExtras.h"
00037 #include "llvm/Support/Compiler.h"
00038 #include "llvm/ADT/SmallVector.h"
00039 #include "llvm/ADT/Statistic.h"
00040 #include "llvm/ADT/StringExtras.h"
00041 using namespace llvm;
00042 
00043 STATISTIC(NumReplaced,  "Number of allocas broken up");
00044 STATISTIC(NumPromoted,  "Number of allocas promoted");
00045 STATISTIC(NumConverted, "Number of aggregates converted to scalar");
00046 STATISTIC(NumGlobals,   "Number of allocas copied from constant global");
00047 
00048 namespace {
00049   struct VISIBILITY_HIDDEN SROA : public FunctionPass {
00050     static char ID; // Pass identification, replacement for typeid
00051     explicit SROA(signed T = -1) : FunctionPass(&ID) {
00052       if (T == -1)
00053         SRThreshold = 128;
00054       else
00055         SRThreshold = T;
00056     }
00057 
00058     bool runOnFunction(Function &F);
00059 
00060     bool performScalarRepl(Function &F);
00061     bool performPromotion(Function &F);
00062 
00063     // getAnalysisUsage - This pass does not require any passes, but we know it
00064     // will not alter the CFG, so say so.
00065     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
00066       AU.addRequired<DominatorTree>();
00067       AU.addRequired<DominanceFrontier>();
00068       AU.addRequired<TargetData>();
00069       AU.setPreservesCFG();
00070     }
00071 
00072   private:
00073     TargetData *TD;
00074     
00075     /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
00076     /// information about the uses.  All these fields are initialized to false
00077     /// and set to true when something is learned.
00078     struct AllocaInfo {
00079       /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
00080       bool isUnsafe : 1;
00081       
00082       /// needsCanon - This is set to true if there is some use of the alloca
00083       /// that requires canonicalization.
00084       bool needsCanon : 1;
00085       
00086       /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
00087       bool isMemCpySrc : 1;
00088 
00089       /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
00090       bool isMemCpyDst : 1;
00091 
00092       AllocaInfo()
00093         : isUnsafe(false), needsCanon(false), 
00094           isMemCpySrc(false), isMemCpyDst(false) {}
00095     };
00096     
00097     unsigned SRThreshold;
00098 
00099     void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; }
00100 
00101     int isSafeAllocaToScalarRepl(AllocationInst *AI);
00102 
00103     void isSafeUseOfAllocation(Instruction *User, AllocationInst *AI,
00104                                AllocaInfo &Info);
00105     void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI,
00106                          AllocaInfo &Info);
00107     void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI,
00108                                         unsigned OpNo, AllocaInfo &Info);
00109     void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocationInst *AI,
00110                                         AllocaInfo &Info);
00111     
00112     void DoScalarReplacement(AllocationInst *AI, 
00113                              std::vector<AllocationInst*> &WorkList);
00114     void CanonicalizeAllocaUsers(AllocationInst *AI);
00115     AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocationInst *Base);
00116     
00117     void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
00118                                     SmallVector<AllocaInst*, 32> &NewElts);
00119     
00120     void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
00121                                       AllocationInst *AI,
00122                                       SmallVector<AllocaInst*, 32> &NewElts);
00123     void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocationInst *AI,
00124                                        SmallVector<AllocaInst*, 32> &NewElts);
00125     
00126     const Type *CanConvertToScalar(Value *V, bool &IsNotTrivial);
00127     void ConvertToScalar(AllocationInst *AI, const Type *Ty);
00128     void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset);
00129     Value *ConvertUsesOfLoadToScalar(LoadInst *LI, AllocaInst *NewAI, 
00130                                      unsigned Offset);
00131     Value *ConvertUsesOfStoreToScalar(StoreInst *SI, AllocaInst *NewAI, 
00132                                       unsigned Offset);
00133     static Instruction *isOnlyCopiedFromConstantGlobal(AllocationInst *AI);
00134   };
00135 }
00136 
00137 char SROA::ID = 0;
00138 static RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates");
00139 
00140 // Public interface to the ScalarReplAggregates pass
00141 FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { 
00142   return new SROA(Threshold);
00143 }
00144 
00145 
00146 bool SROA::runOnFunction(Function &F) {
00147   TD = &getAnalysis<TargetData>();
00148   
00149   bool Changed = performPromotion(F);
00150   while (1) {
00151     bool LocalChange = performScalarRepl(F);
00152     if (!LocalChange) break;   // No need to repromote if no scalarrepl
00153     Changed = true;
00154     LocalChange = performPromotion(F);
00155     if (!LocalChange) break;   // No need to re-scalarrepl if no promotion
00156   }
00157 
00158   return Changed;
00159 }
00160 
00161 
00162 bool SROA::performPromotion(Function &F) {
00163   std::vector<AllocaInst*> Allocas;
00164   DominatorTree         &DT = getAnalysis<DominatorTree>();
00165   DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
00166 
00167   BasicBlock &BB = F.getEntryBlock();  // Get the entry node for the function
00168 
00169   bool Changed = false;
00170 
00171   while (1) {
00172     Allocas.clear();
00173 
00174     // Find allocas that are safe to promote, by looking at all instructions in
00175     // the entry node
00176     for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
00177       if (AllocaInst *AI = dyn_cast<AllocaInst>(I))       // Is it an alloca?
00178         if (isAllocaPromotable(AI))
00179           Allocas.push_back(AI);
00180 
00181     if (Allocas.empty()) break;
00182 
00183     PromoteMemToReg(Allocas, DT, DF);
00184     NumPromoted += Allocas.size();
00185     Changed = true;
00186   }
00187 
00188   return Changed;
00189 }
00190 
00191 /// getNumSAElements - Return the number of elements in the specific struct or
00192 /// array.
00193 static uint64_t getNumSAElements(const Type *T) {
00194   if (const StructType *ST = dyn_cast<StructType>(T))
00195     return ST->getNumElements();
00196   return cast<ArrayType>(T)->getNumElements();
00197 }
00198 
00199 // performScalarRepl - This algorithm is a simple worklist driven algorithm,
00200 // which runs on all of the malloc/alloca instructions in the function, removing
00201 // them if they are only used by getelementptr instructions.
00202 //
00203 bool SROA::performScalarRepl(Function &F) {
00204   std::vector<AllocationInst*> WorkList;
00205 
00206   // Scan the entry basic block, adding any alloca's and mallocs to the worklist
00207   BasicBlock &BB = F.getEntryBlock();
00208   for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
00209     if (AllocationInst *A = dyn_cast<AllocationInst>(I))
00210       WorkList.push_back(A);
00211 
00212   // Process the worklist
00213   bool Changed = false;
00214   while (!WorkList.empty()) {
00215     AllocationInst *AI = WorkList.back();
00216     WorkList.pop_back();
00217     
00218     // Handle dead allocas trivially.  These can be formed by SROA'ing arrays
00219     // with unused elements.
00220     if (AI->use_empty()) {
00221       AI->eraseFromParent();
00222       continue;
00223     }
00224     
00225     // If we can turn this aggregate value (potentially with casts) into a
00226     // simple scalar value that can be mem2reg'd into a register value.
00227     bool IsNotTrivial = false;
00228     if (const Type *ActualType = CanConvertToScalar(AI, IsNotTrivial))
00229       if (IsNotTrivial && ActualType != Type::VoidTy) {
00230         ConvertToScalar(AI, ActualType);
00231         Changed = true;
00232         continue;
00233       }
00234 
00235     // Check to see if we can perform the core SROA transformation.  We cannot
00236     // transform the allocation instruction if it is an array allocation
00237     // (allocations OF arrays are ok though), and an allocation of a scalar
00238     // value cannot be decomposed at all.
00239     if (!AI->isArrayAllocation() &&
00240         (isa<StructType>(AI->getAllocatedType()) ||
00241          isa<ArrayType>(AI->getAllocatedType())) &&
00242         AI->getAllocatedType()->isSized() &&
00243         // Do not promote any struct whose size is larger than "128" bytes.
00244         TD->getABITypeSize(AI->getAllocatedType()) < SRThreshold &&
00245         // Do not promote any struct into more than "32" separate vars.
00246         getNumSAElements(AI->getAllocatedType()) < SRThreshold/4) {
00247       // Check that all of the users of the allocation are capable of being
00248       // transformed.
00249       switch (isSafeAllocaToScalarRepl(AI)) {
00250       default: assert(0 && "Unexpected value!");
00251       case 0:  // Not safe to scalar replace.
00252         break;
00253       case 1:  // Safe, but requires cleanup/canonicalizations first
00254         CanonicalizeAllocaUsers(AI);
00255         // FALL THROUGH.
00256       case 3:  // Safe to scalar replace.
00257         DoScalarReplacement(AI, WorkList);
00258         Changed = true;
00259         continue;
00260       }
00261     }
00262     
00263     // Check to see if this allocation is only modified by a memcpy/memmove from
00264     // a constant global.  If this is the case, we can change all users to use
00265     // the constant global instead.  This is commonly produced by the CFE by
00266     // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
00267     // is only subsequently read.
00268     if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
00269       DOUT << "Found alloca equal to global: " << *AI;
00270       DOUT << "  memcpy = " << *TheCopy;
00271       Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2));
00272       AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
00273       TheCopy->eraseFromParent();  // Don't mutate the global.
00274       AI->eraseFromParent();
00275       ++NumGlobals;
00276       Changed = true;
00277       continue;
00278     }
00279         
00280     // Otherwise, couldn't process this.
00281   }
00282 
00283   return Changed;
00284 }
00285 
00286 /// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
00287 /// predicate, do SROA now.
00288 void SROA::DoScalarReplacement(AllocationInst *AI, 
00289                                std::vector<AllocationInst*> &WorkList) {
00290   DOUT << "Found inst to SROA: " << *AI;
00291   SmallVector<AllocaInst*, 32> ElementAllocas;
00292   if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
00293     ElementAllocas.reserve(ST->getNumContainedTypes());
00294     for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
00295       AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, 
00296                                       AI->getAlignment(),
00297                                       AI->getName() + "." + utostr(i), AI);
00298       ElementAllocas.push_back(NA);
00299       WorkList.push_back(NA);  // Add to worklist for recursive processing
00300     }
00301   } else {
00302     const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
00303     ElementAllocas.reserve(AT->getNumElements());
00304     const Type *ElTy = AT->getElementType();
00305     for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
00306       AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
00307                                       AI->getName() + "." + utostr(i), AI);
00308       ElementAllocas.push_back(NA);
00309       WorkList.push_back(NA);  // Add to worklist for recursive processing
00310     }
00311   }
00312 
00313   // Now that we have created the alloca instructions that we want to use,
00314   // expand the getelementptr instructions to use them.
00315   //
00316   while (!AI->use_empty()) {
00317     Instruction *User = cast<Instruction>(AI->use_back());
00318     if (BitCastInst *BCInst = dyn_cast<BitCastInst>(User)) {
00319       RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas);
00320       BCInst->eraseFromParent();
00321       continue;
00322     }
00323     
00324     // Replace:
00325     //   %res = load { i32, i32 }* %alloc
00326     // with:
00327     //   %load.0 = load i32* %alloc.0
00328     //   %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 
00329     //   %load.1 = load i32* %alloc.1
00330     //   %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 
00331     // (Also works for arrays instead of structs)
00332     if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
00333       Value *Insert = UndefValue::get(LI->getType());
00334       for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {
00335         Value *Load = new LoadInst(ElementAllocas[i], "load", LI);
00336         Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
00337       }
00338       LI->replaceAllUsesWith(Insert);
00339       LI->eraseFromParent();
00340       continue;
00341     }
00342 
00343     // Replace:
00344     //   store { i32, i32 } %val, { i32, i32 }* %alloc
00345     // with:
00346     //   %val.0 = extractvalue { i32, i32 } %val, 0 
00347     //   store i32 %val.0, i32* %alloc.0
00348     //   %val.1 = extractvalue { i32, i32 } %val, 1 
00349     //   store i32 %val.1, i32* %alloc.1
00350     // (Also works for arrays instead of structs)
00351     if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
00352       Value *Val = SI->getOperand(0);
00353       for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {
00354         Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI);
00355         new StoreInst(Extract, ElementAllocas[i], SI);
00356       }
00357       SI->eraseFromParent();
00358       continue;
00359     }
00360     
00361     GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User);
00362     // We now know that the GEP is of the form: GEP <ptr>, 0, <cst>
00363     unsigned Idx =
00364        (unsigned)cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
00365 
00366     assert(Idx < ElementAllocas.size() && "Index out of range?");
00367     AllocaInst *AllocaToUse = ElementAllocas[Idx];
00368 
00369     Value *RepValue;
00370     if (GEPI->getNumOperands() == 3) {
00371       // Do not insert a new getelementptr instruction with zero indices, only
00372       // to have it optimized out later.
00373       RepValue = AllocaToUse;
00374     } else {
00375       // We are indexing deeply into the structure, so we still need a
00376       // getelement ptr instruction to finish the indexing.  This may be
00377       // expanded itself once the worklist is rerun.
00378       //
00379       SmallVector<Value*, 8> NewArgs;
00380       NewArgs.push_back(Constant::getNullValue(Type::Int32Ty));
00381       NewArgs.append(GEPI->op_begin()+3, GEPI->op_end());
00382       RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(),
00383                                            NewArgs.end(), "", GEPI);
00384       RepValue->takeName(GEPI);
00385     }
00386     
00387     // If this GEP is to the start of the aggregate, check for memcpys.
00388     if (Idx == 0 && GEPI->hasAllZeroIndices())
00389       RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas);
00390 
00391     // Move all of the users over to the new GEP.
00392     GEPI->replaceAllUsesWith(RepValue);
00393     // Delete the old GEP
00394     GEPI->eraseFromParent();
00395   }
00396 
00397   // Finally, delete the Alloca instruction
00398   AI->eraseFromParent();
00399   NumReplaced++;
00400 }
00401 
00402 
00403 /// isSafeElementUse - Check to see if this use is an allowed use for a
00404 /// getelementptr instruction of an array aggregate allocation.  isFirstElt
00405 /// indicates whether Ptr is known to the start of the aggregate.
00406 ///
00407 void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI,
00408                             AllocaInfo &Info) {
00409   for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end();
00410        I != E; ++I) {
00411     Instruction *User = cast<Instruction>(*I);
00412     switch (User->getOpcode()) {
00413     case Instruction::Load:  break;
00414     case Instruction::Store:
00415       // Store is ok if storing INTO the pointer, not storing the pointer
00416       if (User->getOperand(0) == Ptr) return MarkUnsafe(Info);
00417       break;
00418     case Instruction::GetElementPtr: {
00419       GetElementPtrInst *GEP = cast<GetElementPtrInst>(User);
00420       bool AreAllZeroIndices = isFirstElt;
00421       if (GEP->getNumOperands() > 1) {
00422         if (!isa<ConstantInt>(GEP->getOperand(1)) ||
00423             !cast<ConstantInt>(GEP->getOperand(1))->isZero())
00424           // Using pointer arithmetic to navigate the array.
00425           return MarkUnsafe(Info);
00426        
00427         if (AreAllZeroIndices)
00428           AreAllZeroIndices = GEP->hasAllZeroIndices();
00429       }
00430       isSafeElementUse(GEP, AreAllZeroIndices, AI, Info);
00431       if (Info.isUnsafe) return;
00432       break;
00433     }
00434     case Instruction::BitCast:
00435       if (isFirstElt) {
00436         isSafeUseOfBitCastedAllocation(cast<BitCastInst>(User), AI, Info);
00437         if (Info.isUnsafe) return;
00438         break;
00439       }
00440       DOUT << "  Transformation preventing inst: " << *User;
00441       return MarkUnsafe(Info);
00442     case Instruction::Call:
00443       if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
00444         if (isFirstElt) {
00445           isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info);
00446           if (Info.isUnsafe) return;
00447           break;
00448         }
00449       }
00450       DOUT << "  Transformation preventing inst: " << *User;
00451       return MarkUnsafe(Info);
00452     default:
00453       DOUT << "  Transformation preventing inst: " << *User;
00454       return MarkUnsafe(Info);
00455     }
00456   }
00457   return;  // All users look ok :)
00458 }
00459 
00460 /// AllUsersAreLoads - Return true if all users of this value are loads.
00461 static bool AllUsersAreLoads(Value *Ptr) {
00462   for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end();
00463        I != E; ++I)
00464     if (cast<Instruction>(*I)->getOpcode() != Instruction::Load)
00465       return false;
00466   return true;
00467 }
00468 
00469 /// isSafeUseOfAllocation - Check to see if this user is an allowed use for an
00470 /// aggregate allocation.
00471 ///
00472 void SROA::isSafeUseOfAllocation(Instruction *User, AllocationInst *AI,
00473                                  AllocaInfo &Info) {
00474   if (BitCastInst *C = dyn_cast<BitCastInst>(User))
00475     return isSafeUseOfBitCastedAllocation(C, AI, Info);
00476 
00477   if (isa<LoadInst>(User))
00478     return; // Loads (returning a first class aggregrate) are always rewritable
00479 
00480   if (isa<StoreInst>(User) && User->getOperand(0) != AI)
00481     return; // Store is ok if storing INTO the pointer, not storing the pointer
00482  
00483   GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User);
00484   if (GEPI == 0)
00485     return MarkUnsafe(Info);
00486 
00487   gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI);
00488 
00489   // The GEP is not safe to transform if not of the form "GEP <ptr>, 0, <cst>".
00490   if (I == E ||
00491       I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) {
00492     return MarkUnsafe(Info);
00493   }
00494 
00495   ++I;
00496   if (I == E) return MarkUnsafe(Info);  // ran out of GEP indices??
00497 
00498   bool IsAllZeroIndices = true;
00499   
00500   // If the first index is a non-constant index into an array, see if we can
00501   // handle it as a special case.
00502   if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) {
00503     if (!isa<ConstantInt>(I.getOperand())) {
00504       IsAllZeroIndices = 0;
00505       uint64_t NumElements = AT->getNumElements();
00506       
00507       // If this is an array index and the index is not constant, we cannot
00508       // promote... that is unless the array has exactly one or two elements in
00509       // it, in which case we CAN promote it, but we have to canonicalize this
00510       // out if this is the only problem.
00511       if ((NumElements == 1 || NumElements == 2) &&
00512           AllUsersAreLoads(GEPI)) {
00513         Info.needsCanon = true;
00514         return;  // Canonicalization required!
00515       }
00516       return MarkUnsafe(Info);
00517     }
00518   }
00519  
00520   // Walk through the GEP type indices, checking the types that this indexes
00521   // into.
00522   for (; I != E; ++I) {
00523     // Ignore struct elements, no extra checking needed for these.
00524     if (isa<StructType>(*I))
00525       continue;
00526     
00527     ConstantInt *IdxVal = dyn_cast<ConstantInt>(I.getOperand());
00528     if (!IdxVal) return MarkUnsafe(Info);
00529 
00530     // Are all indices still zero?
00531     IsAllZeroIndices &= IdxVal->isZero();
00532     
00533     if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) {
00534       // This GEP indexes an array.  Verify that this is an in-range constant
00535       // integer. Specifically, consider A[0][i]. We cannot know that the user
00536       // isn't doing invalid things like allowing i to index an out-of-range
00537       // subscript that accesses A[1].  Because of this, we have to reject SROA
00538       // of any accesses into structs where any of the components are variables. 
00539       if (IdxVal->getZExtValue() >= AT->getNumElements())
00540         return MarkUnsafe(Info);
00541     } else if (const VectorType *VT = dyn_cast<VectorType>(*I)) {
00542       if (IdxVal->getZExtValue() >= VT->getNumElements())
00543         return MarkUnsafe(Info);
00544     }
00545   }
00546   
00547   // If there are any non-simple uses of this getelementptr, make sure to reject
00548   // them.
00549   return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info);
00550 }
00551 
00552 /// isSafeMemIntrinsicOnAllocation - Return true if the specified memory
00553 /// intrinsic can be promoted by SROA.  At this point, we know that the operand
00554 /// of the memintrinsic is a pointer to the beginning of the allocation.
00555 void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI,
00556                                           unsigned OpNo, AllocaInfo &Info) {
00557   // If not constant length, give up.
00558   ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
00559   if (!Length) return MarkUnsafe(Info);
00560   
00561   // If not the whole aggregate, give up.
00562   if (Length->getZExtValue() !=
00563       TD->getABITypeSize(AI->getType()->getElementType()))
00564     return MarkUnsafe(Info);
00565   
00566   // We only know about memcpy/memset/memmove.
00567   if (!isa<MemCpyInst>(MI) && !isa<MemSetInst>(MI) && !isa<MemMoveInst>(MI))
00568     return MarkUnsafe(Info);
00569   
00570   // Otherwise, we can transform it.  Determine whether this is a memcpy/set
00571   // into or out of the aggregate.
00572   if (OpNo == 1)
00573     Info.isMemCpyDst = true;
00574   else {
00575     assert(OpNo == 2);
00576     Info.isMemCpySrc = true;
00577   }
00578 }
00579 
00580 /// isSafeUseOfBitCastedAllocation - Return true if all users of this bitcast
00581 /// are 
00582 void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocationInst *AI,
00583                                           AllocaInfo &Info) {
00584   for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end();
00585        UI != E; ++UI) {
00586     if (BitCastInst *BCU = dyn_cast<BitCastInst>(UI)) {
00587       isSafeUseOfBitCastedAllocation(BCU, AI, Info);
00588     } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) {
00589       isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info);
00590     } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
00591       // If storing the entire alloca in one chunk through a bitcasted pointer
00592       // to integer, we can transform it.  This happens (for example) when you
00593       // cast a {i32,i32}* to i64* and store through it.  This is similar to the
00594       // memcpy case and occurs in various "byval" cases and emulated memcpys.
00595       if (isa<IntegerType>(SI->getOperand(0)->getType()) &&
00596           TD->getABITypeSize(SI->getOperand(0)->getType()) == 
00597           TD->getABITypeSize(AI->getType()->getElementType())) {
00598         Info.isMemCpyDst = true;
00599         continue;
00600       }
00601       return MarkUnsafe(Info);
00602     } else {
00603       return MarkUnsafe(Info);
00604     }
00605     if (Info.isUnsafe) return;
00606   }
00607 }
00608 
00609 /// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes
00610 /// to its first element.  Transform users of the cast to use the new values
00611 /// instead.
00612 void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
00613                                       SmallVector<AllocaInst*, 32> &NewElts) {
00614   Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end();
00615   while (UI != UE) {
00616     Instruction *User = cast<Instruction>(*UI++);
00617     if (BitCastInst *BCU = dyn_cast<BitCastInst>(User)) {
00618       RewriteBitCastUserOfAlloca(BCU, AI, NewElts);
00619       if (BCU->use_empty()) BCU->eraseFromParent();
00620       continue;
00621     }
00622 
00623     if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
00624       // This must be memcpy/memmove/memset of the entire aggregate.
00625       // Split into one per element.
00626       RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts);
00627       continue;
00628     }
00629       
00630     if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
00631       // This must be a store of the entire alloca from an integer.
00632       RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
00633       continue;
00634     }
00635     
00636     // Otherwise it must be some other user of a gep of the first pointer.  Just
00637     // leave these alone.
00638     continue;
00639   }      
00640 }
00641 
00642 /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
00643 /// Rewrite it to copy or set the elements of the scalarized memory.
00644 void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
00645                                         AllocationInst *AI,
00646                                         SmallVector<AllocaInst*, 32> &NewElts) {
00647   
00648   // If this is a memcpy/memmove, construct the other pointer as the
00649   // appropriate type.
00650   Value *OtherPtr = 0;
00651   if (MemCpyInst *MCI = dyn_cast<MemCpyInst>(MI)) {
00652     if (BCInst == MCI->getRawDest())
00653       OtherPtr = MCI->getRawSource();
00654     else {
00655       assert(BCInst == MCI->getRawSource());
00656       OtherPtr = MCI->getRawDest();
00657     }
00658   } else if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
00659     if (BCInst == MMI->getRawDest())
00660       OtherPtr = MMI->getRawSource();
00661     else {
00662       assert(BCInst == MMI->getRawSource());
00663       OtherPtr = MMI->getRawDest();
00664     }
00665   }
00666   
00667   // If there is an other pointer, we want to convert it to the same pointer
00668   // type as AI has, so we can GEP through it safely.
00669   if (OtherPtr) {
00670     // It is likely that OtherPtr is a bitcast, if so, remove it.
00671     if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr))
00672       OtherPtr = BC->getOperand(0);
00673     // All zero GEPs are effectively bitcasts.
00674     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr))
00675       if (GEP->hasAllZeroIndices())
00676         OtherPtr = GEP->getOperand(0);
00677     
00678     if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr))
00679       if (BCE->getOpcode() == Instruction::BitCast)
00680         OtherPtr = BCE->getOperand(0);
00681     
00682     // If the pointer is not the right type, insert a bitcast to the right
00683     // type.
00684     if (OtherPtr->getType() != AI->getType())
00685       OtherPtr = new BitCastInst(OtherPtr, AI->getType(), OtherPtr->getName(),
00686                                  MI);
00687   }
00688   
00689   // Process each element of the aggregate.
00690   Value *TheFn = MI->getOperand(0);
00691   const Type *BytePtrTy = MI->getRawDest()->getType();
00692   bool SROADest = MI->getRawDest() == BCInst;
00693   
00694   Constant *Zero = Constant::getNullValue(Type::Int32Ty);
00695 
00696   for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
00697     // If this is a memcpy/memmove, emit a GEP of the other element address.
00698     Value *OtherElt = 0;
00699     if (OtherPtr) {
00700       Value *Idx[2] = { Zero, ConstantInt::get(Type::Int32Ty, i) };
00701       OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2,
00702                                            OtherPtr->getNameStr()+"."+utostr(i),
00703                                            MI);
00704     }
00705     
00706     Value *EltPtr = NewElts[i];
00707     const Type *EltTy =cast<PointerType>(EltPtr->getType())->getElementType();
00708     
00709     // If we got down to a scalar, insert a load or store as appropriate.
00710     if (EltTy->isSingleValueType()) {
00711       if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) {
00712         Value *Elt = new LoadInst(SROADest ? OtherElt : EltPtr, "tmp",
00713                                   MI);
00714         new StoreInst(Elt, SROADest ? EltPtr : OtherElt, MI);
00715         continue;
00716       }
00717       assert(isa<MemSetInst>(MI));
00718       
00719       // If the stored element is zero (common case), just store a null
00720       // constant.
00721       Constant *StoreVal;
00722       if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) {
00723         if (CI->isZero()) {
00724           StoreVal = Constant::getNullValue(EltTy);  // 0.0, null, 0, <0,0>
00725         } else {
00726           // If EltTy is a vector type, get the element type.
00727           const Type *ValTy = EltTy;
00728           if (const VectorType *VTy = dyn_cast<VectorType>(ValTy))
00729             ValTy = VTy->getElementType();
00730           
00731           // Construct an integer with the right value.
00732           unsigned EltSize = TD->getTypeSizeInBits(ValTy);
00733           APInt OneVal(EltSize, CI->getZExtValue());
00734           APInt TotalVal(OneVal);
00735           // Set each byte.
00736           for (unsigned i = 0; 8*i < EltSize; ++i) {
00737             TotalVal = TotalVal.shl(8);
00738             TotalVal |= OneVal;
00739           }
00740           
00741           // Convert the integer value to the appropriate type.
00742           StoreVal = ConstantInt::get(TotalVal);
00743           if (isa<PointerType>(ValTy))
00744             StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
00745           else if (ValTy->isFloatingPoint())
00746             StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
00747           assert(StoreVal->getType() == ValTy && "Type mismatch!");
00748           
00749           // If the requested value was a vector constant, create it.
00750           if (EltTy != ValTy) {
00751             unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
00752             SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
00753             StoreVal = ConstantVector::get(&Elts[0], NumElts);
00754           }
00755         }
00756         new StoreInst(StoreVal, EltPtr, MI);
00757         continue;
00758       }
00759       // Otherwise, if we're storing a byte variable, use a memset call for
00760       // this element.
00761     }
00762     
00763     // Cast the element pointer to BytePtrTy.
00764     if (EltPtr->getType() != BytePtrTy)
00765       EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getNameStr(), MI);
00766     
00767     // Cast the other pointer (if we have one) to BytePtrTy. 
00768     if (OtherElt && OtherElt->getType() != BytePtrTy)
00769       OtherElt = new BitCastInst(OtherElt, BytePtrTy,OtherElt->getNameStr(),
00770                                  MI);
00771     
00772     unsigned EltSize = TD->getABITypeSize(EltTy);
00773     
00774     // Finally, insert the meminst for this element.
00775     if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) {
00776       Value *Ops[] = {
00777         SROADest ? EltPtr : OtherElt,  // Dest ptr
00778         SROADest ? OtherElt : EltPtr,  // Src ptr
00779         ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
00780         Zero  // Align
00781       };
00782       CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
00783     } else {
00784       assert(isa<MemSetInst>(MI));
00785       Value *Ops[] = {
00786         EltPtr, MI->getOperand(2),  // Dest, Value,
00787         ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
00788         Zero  // Align
00789       };
00790       CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
00791     }
00792   }
00793   MI->eraseFromParent();
00794 }
00795 
00796 /// RewriteStoreUserOfWholeAlloca - We found an store of an integer that
00797 /// overwrites the entire allocation.  Extract out the pieces of the stored
00798 /// integer and store them individually.
00799 void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
00800                                          AllocationInst *AI,
00801                                          SmallVector<AllocaInst*, 32> &NewElts){
00802   // Extract each element out of the integer according to its structure offset
00803   // and store the element value to the individual alloca.
00804   Value *SrcVal = SI->getOperand(0);
00805   const Type *AllocaEltTy = AI->getType()->getElementType();
00806   uint64_t AllocaSizeBits = TD->getABITypeSizeInBits(AllocaEltTy);
00807   
00808   // If this isn't a store of an integer to the whole alloca, it may be a store
00809   // to the first element.  Just ignore the store in this case and normal SROA
00810   // will handle it.
00811   if (!isa<IntegerType>(SrcVal->getType()) ||
00812       TD->getABITypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
00813     return;
00814 
00815   DOUT << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << *SI;
00816 
00817   // There are two forms here: AI could be an array or struct.  Both cases
00818   // have different ways to compute the element offset.
00819   if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
00820     const StructLayout *Layout = TD->getStructLayout(EltSTy);
00821     
00822     for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
00823       // Get the number of bits to shift SrcVal to get the value.
00824       const Type *FieldTy = EltSTy->getElementType(i);
00825       uint64_t Shift = Layout->getElementOffsetInBits(i);
00826       
00827       if (TD->isBigEndian())
00828         Shift = AllocaSizeBits-Shift-TD->getABITypeSizeInBits(FieldTy);
00829       
00830       Value *EltVal = SrcVal;
00831       if (Shift) {
00832         Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
00833         EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
00834                                             "sroa.store.elt", SI);
00835       }
00836       
00837       // Truncate down to an integer of the right size.
00838       uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
00839       if (FieldSizeBits != AllocaSizeBits)
00840         EltVal = new TruncInst(EltVal, IntegerType::get(FieldSizeBits), "", SI);
00841       Value *DestField = NewElts[i];
00842       if (EltVal->getType() == FieldTy) {
00843         // Storing to an integer field of this size, just do it.
00844       } else if (FieldTy->isFloatingPoint() || isa<VectorType>(FieldTy)) {
00845         // Bitcast to the right element type (for fp/vector values).
00846         EltVal = new BitCastInst(EltVal, FieldTy, "", SI);
00847       } else {
00848         // Otherwise, bitcast the dest pointer (for aggregates).
00849         DestField = new BitCastInst(DestField,
00850                                     PointerType::getUnqual(EltVal->getType()),
00851                                     "", SI);
00852       }
00853       new StoreInst(EltVal, DestField, SI);
00854     }
00855     
00856   } else {
00857     const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
00858     const Type *ArrayEltTy = ATy->getElementType();
00859     uint64_t ElementOffset = TD->getABITypeSizeInBits(ArrayEltTy);
00860     uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
00861 
00862     uint64_t Shift;
00863     
00864     if (TD->isBigEndian())
00865       Shift = AllocaSizeBits-ElementOffset;
00866     else 
00867       Shift = 0;
00868     
00869     for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
00870       
00871       Value *EltVal = SrcVal;
00872       if (Shift) {
00873         Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
00874         EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
00875                                             "sroa.store.elt", SI);
00876       }
00877       
00878       // Truncate down to an integer of the right size.
00879       if (ElementSizeBits != AllocaSizeBits)
00880         EltVal = new TruncInst(EltVal, IntegerType::get(ElementSizeBits),"",SI);
00881       Value *DestField = NewElts[i];
00882       if (EltVal->getType() == ArrayEltTy) {
00883         // Storing to an integer field of this size, just do it.
00884       } else if (ArrayEltTy->isFloatingPoint() || isa<VectorType>(ArrayEltTy)) {
00885         // Bitcast to the right element type (for fp/vector values).
00886         EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI);
00887       } else {
00888         // Otherwise, bitcast the dest pointer (for aggregates).
00889         DestField = new BitCastInst(DestField,
00890                                     PointerType::getUnqual(EltVal->getType()),
00891                                     "", SI);
00892       }
00893       new StoreInst(EltVal, DestField, SI);
00894       
00895       if (TD->isBigEndian())
00896         Shift -= ElementOffset;
00897       else 
00898         Shift += ElementOffset;
00899     }
00900   }
00901   
00902   SI->eraseFromParent();
00903 }
00904 
00905 
00906 /// HasPadding - Return true if the specified type has any structure or
00907 /// alignment padding, false otherwise.
00908 static bool HasPadding(const Type *Ty, const TargetData &TD) {
00909   if (const StructType *STy = dyn_cast<StructType>(Ty)) {
00910     const StructLayout *SL = TD.getStructLayout(STy);
00911     unsigned PrevFieldBitOffset = 0;
00912     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
00913       unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
00914 
00915       // Padding in sub-elements?
00916       if (HasPadding(STy->getElementType(i), TD))
00917         return true;
00918 
00919       // Check to see if there is any padding between this element and the
00920       // previous one.
00921       if (i) {
00922         unsigned PrevFieldEnd =
00923         PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
00924         if (PrevFieldEnd < FieldBitOffset)
00925           return true;
00926       }
00927 
00928       PrevFieldBitOffset = FieldBitOffset;
00929     }
00930 
00931     //  Check for tail padding.
00932     if (unsigned EltCount = STy->getNumElements()) {
00933       unsigned PrevFieldEnd = PrevFieldBitOffset +
00934                    TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
00935       if (PrevFieldEnd < SL->getSizeInBits())
00936         return true;
00937     }
00938 
00939   } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
00940     return HasPadding(ATy->getElementType(), TD);
00941   } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
00942     return HasPadding(VTy->getElementType(), TD);
00943   }
00944   return TD.getTypeSizeInBits(Ty) != TD.getABITypeSizeInBits(Ty);
00945 }
00946 
00947 /// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
00948 /// an aggregate can be broken down into elements.  Return 0 if not, 3 if safe,
00949 /// or 1 if safe after canonicalization has been performed.
00950 ///
00951