LLVM API Documentation

Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

Andersens.cpp

Go to the documentation of this file.
00001 //===- Andersens.cpp - Andersen's Interprocedural Alias Analysis ----------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines an implementation of Andersen's interprocedural alias
00011 // analysis
00012 //
00013 // In pointer analysis terms, this is a subset-based, flow-insensitive,
00014 // field-sensitive, and context-insensitive algorithm pointer algorithm.
00015 //
00016 // This algorithm is implemented as three stages:
00017 //   1. Object identification.
00018 //   2. Inclusion constraint identification.
00019 //   3. Offline constraint graph optimization
00020 //   4. Inclusion constraint solving.
00021 //
00022 // The object identification stage identifies all of the memory objects in the
00023 // program, which includes globals, heap allocated objects, and stack allocated
00024 // objects.
00025 //
00026 // The inclusion constraint identification stage finds all inclusion constraints
00027 // in the program by scanning the program, looking for pointer assignments and
00028 // other statements that effect the points-to graph.  For a statement like "A =
00029 // B", this statement is processed to indicate that A can point to anything that
00030 // B can point to.  Constraints can handle copies, loads, and stores, and
00031 // address taking.
00032 //
00033 // The offline constraint graph optimization portion includes offline variable
00034 // substitution algorithms intended to compute pointer and location
00035 // equivalences.  Pointer equivalences are those pointers that will have the
00036 // same points-to sets, and location equivalences are those variables that
00037 // always appear together in points-to sets.  It also includes an offline
00038 // cycle detection algorithm that allows cycles to be collapsed sooner 
00039 // during solving.
00040 //
00041 // The inclusion constraint solving phase iteratively propagates the inclusion
00042 // constraints until a fixed point is reached.  This is an O(N^3) algorithm.
00043 //
00044 // Function constraints are handled as if they were structs with X fields.
00045 // Thus, an access to argument X of function Y is an access to node index
00046 // getNode(Y) + X.  This representation allows handling of indirect calls
00047 // without any issues.  To wit, an indirect call Y(a,b) is equivalent to
00048 // *(Y + 1) = a, *(Y + 2) = b.
00049 // The return node for a function is always located at getNode(F) +
00050 // CallReturnPos. The arguments start at getNode(F) + CallArgPos.
00051 //
00052 // Future Improvements:
00053 //   Use of BDD's.
00054 //===----------------------------------------------------------------------===//
00055 
00056 #define DEBUG_TYPE "anders-aa"
00057 #include "llvm/Constants.h"
00058 #include "llvm/DerivedTypes.h"
00059 #include "llvm/Instructions.h"
00060 #include "llvm/Module.h"
00061 #include "llvm/Pass.h"
00062 #include "llvm/Support/Compiler.h"
00063 #include "llvm/Support/InstIterator.h"
00064 #include "llvm/Support/InstVisitor.h"
00065 #include "llvm/Analysis/AliasAnalysis.h"
00066 #include "llvm/Analysis/Passes.h"
00067 #include "llvm/Support/Debug.h"
00068 #include "llvm/ADT/Statistic.h"
00069 #include "llvm/ADT/SparseBitVector.h"
00070 #include "llvm/ADT/DenseSet.h"
00071 #include <algorithm>
00072 #include <set>
00073 #include <list>
00074 #include <map>
00075 #include <stack>
00076 #include <vector>
00077 #include <queue>
00078 
00079 // Determining the actual set of nodes the universal set can consist of is very
00080 // expensive because it means propagating around very large sets.  We rely on
00081 // other analysis being able to determine which nodes can never be pointed to in
00082 // order to disambiguate further than "points-to anything".
00083 #define FULL_UNIVERSAL 0
00084 
00085 using namespace llvm;
00086 STATISTIC(NumIters      , "Number of iterations to reach convergence");
00087 STATISTIC(NumConstraints, "Number of constraints");
00088 STATISTIC(NumNodes      , "Number of nodes");
00089 STATISTIC(NumUnified    , "Number of variables unified");
00090 STATISTIC(NumErased     , "Number of redundant constraints erased");
00091 
00092 static const unsigned SelfRep = (unsigned)-1;
00093 static const unsigned Unvisited = (unsigned)-1;
00094 // Position of the function return node relative to the function node.
00095 static const unsigned CallReturnPos = 1;
00096 // Position of the function call node relative to the function node.
00097 static const unsigned CallFirstArgPos = 2;
00098 
00099 namespace {
00100   struct BitmapKeyInfo {
00101     static inline SparseBitVector<> *getEmptyKey() {
00102       return reinterpret_cast<SparseBitVector<> *>(-1);
00103     }
00104     static inline SparseBitVector<> *getTombstoneKey() {
00105       return reinterpret_cast<SparseBitVector<> *>(-2);
00106     }
00107     static unsigned getHashValue(const SparseBitVector<> *bitmap) {
00108       return bitmap->getHashValue();
00109     }
00110     static bool isEqual(const SparseBitVector<> *LHS,
00111                         const SparseBitVector<> *RHS) {
00112       if (LHS == RHS)
00113         return true;
00114       else if (LHS == getEmptyKey() || RHS == getEmptyKey()
00115                || LHS == getTombstoneKey() || RHS == getTombstoneKey())
00116         return false;
00117 
00118       return *LHS == *RHS;
00119     }
00120 
00121     static bool isPod() { return true; }
00122   };
00123 
00124   class VISIBILITY_HIDDEN Andersens : public ModulePass, public AliasAnalysis,
00125                                       private InstVisitor<Andersens> {
00126     struct Node;
00127 
00128     /// Constraint - Objects of this structure are used to represent the various
00129     /// constraints identified by the algorithm.  The constraints are 'copy',
00130     /// for statements like "A = B", 'load' for statements like "A = *B",
00131     /// 'store' for statements like "*A = B", and AddressOf for statements like
00132     /// A = alloca;  The Offset is applied as *(A + K) = B for stores,
00133     /// A = *(B + K) for loads, and A = B + K for copies.  It is
00134     /// illegal on addressof constraints (because it is statically
00135     /// resolvable to A = &C where C = B + K)
00136 
00137     struct Constraint {
00138       enum ConstraintType { Copy, Load, Store, AddressOf } Type;
00139       unsigned Dest;
00140       unsigned Src;
00141       unsigned Offset;
00142 
00143       Constraint(ConstraintType Ty, unsigned D, unsigned S, unsigned O = 0)
00144         : Type(Ty), Dest(D), Src(S), Offset(O) {
00145         assert((Offset == 0 || Ty != AddressOf) &&
00146                "Offset is illegal on addressof constraints");
00147       }
00148 
00149       bool operator==(const Constraint &RHS) const {
00150         return RHS.Type == Type
00151           && RHS.Dest == Dest
00152           && RHS.Src == Src
00153           && RHS.Offset == Offset;
00154       }
00155 
00156       bool operator!=(const Constraint &RHS) const {
00157         return !(*this == RHS);
00158       }
00159 
00160       bool operator<(const Constraint &RHS) const {
00161         if (RHS.Type != Type)
00162           return RHS.Type < Type;
00163         else if (RHS.Dest != Dest)
00164           return RHS.Dest < Dest;
00165         else if (RHS.Src != Src)
00166           return RHS.Src < Src;
00167         return RHS.Offset < Offset;
00168       }
00169     };
00170 
00171     // Information DenseSet requires implemented in order to be able to do
00172     // it's thing
00173     struct PairKeyInfo {
00174       static inline std::pair<unsigned, unsigned> getEmptyKey() {
00175         return std::make_pair(~0U, ~0U);
00176       }
00177       static inline std::pair<unsigned, unsigned> getTombstoneKey() {
00178         return std::make_pair(~0U - 1, ~0U - 1);
00179       }
00180       static unsigned getHashValue(const std::pair<unsigned, unsigned> &P) {
00181         return P.first ^ P.second;
00182       }
00183       static unsigned isEqual(const std::pair<unsigned, unsigned> &LHS,
00184                               const std::pair<unsigned, unsigned> &RHS) {
00185         return LHS == RHS;
00186       }
00187     };
00188     
00189     struct ConstraintKeyInfo {
00190       static inline Constraint getEmptyKey() {
00191         return Constraint(Constraint::Copy, ~0U, ~0U, ~0U);
00192       }
00193       static inline Constraint getTombstoneKey() {
00194         return Constraint(Constraint::Copy, ~0U - 1, ~0U - 1, ~0U - 1);
00195       }
00196       static unsigned getHashValue(const Constraint &C) {
00197         return C.Src ^ C.Dest ^ C.Type ^ C.Offset;
00198       }
00199       static bool isEqual(const Constraint &LHS,
00200                           const Constraint &RHS) {
00201         return LHS.Type == RHS.Type && LHS.Dest == RHS.Dest
00202           && LHS.Src == RHS.Src && LHS.Offset == RHS.Offset;
00203       }
00204     };
00205 
00206     // Node class - This class is used to represent a node in the constraint
00207     // graph.  Due to various optimizations, it is not always the case that
00208     // there is a mapping from a Node to a Value.  In particular, we add
00209     // artificial Node's that represent the set of pointed-to variables shared
00210     // for each location equivalent Node.
00211     struct Node {
00212     private:
00213       static unsigned Counter;
00214 
00215     public:
00216       Value *Val;
00217       SparseBitVector<> *Edges;
00218       SparseBitVector<> *PointsTo;
00219       SparseBitVector<> *OldPointsTo;
00220       std::list<Constraint> Constraints;
00221 
00222       // Pointer and location equivalence labels
00223       unsigned PointerEquivLabel;
00224       unsigned LocationEquivLabel;
00225       // Predecessor edges, both real and implicit
00226       SparseBitVector<> *PredEdges;
00227       SparseBitVector<> *ImplicitPredEdges;
00228       // Set of nodes that point to us, only use for location equivalence.
00229       SparseBitVector<> *PointedToBy;
00230       // Number of incoming edges, used during variable substitution to early
00231       // free the points-to sets
00232       unsigned NumInEdges;
00233       // True if our points-to set is in the Set2PEClass map
00234       bool StoredInHash;
00235       // True if our node has no indirect constraints (complex or otherwise)
00236       bool Direct;
00237       // True if the node is address taken, *or* it is part of a group of nodes
00238       // that must be kept together.  This is set to true for functions and
00239       // their arg nodes, which must be kept at the same position relative to
00240       // their base function node.
00241       bool AddressTaken;
00242 
00243       // Nodes in cycles (or in equivalence classes) are united together using a
00244       // standard union-find representation with path compression.  NodeRep
00245       // gives the index into GraphNodes for the representative Node.
00246       unsigned NodeRep;
00247 
00248       // Modification timestamp.  Assigned from Counter.
00249       // Used for work list prioritization.
00250       unsigned Timestamp;
00251 
00252       explicit Node(bool direct = true) :
00253         Val(0), Edges(0), PointsTo(0), OldPointsTo(0), 
00254         PointerEquivLabel(0), LocationEquivLabel(0), PredEdges(0),
00255         ImplicitPredEdges(0), PointedToBy(0), NumInEdges(0),
00256         StoredInHash(false), Direct(direct), AddressTaken(false),
00257         NodeRep(SelfRep), Timestamp(0) { }
00258 
00259       Node *setValue(Value *V) {
00260         assert(Val == 0 && "Value already set for this node!");
00261         Val = V;
00262         return this;
00263       }
00264 
00265       /// getValue - Return the LLVM value corresponding to this node.
00266       ///
00267       Value *getValue() const { return Val; }
00268 
00269       /// addPointerTo - Add a pointer to the list of pointees of this node,
00270       /// returning true if this caused a new pointer to be added, or false if
00271       /// we already knew about the points-to relation.
00272       bool addPointerTo(unsigned Node) {
00273         return PointsTo->test_and_set(Node);
00274       }
00275 
00276       /// intersects - Return true if the points-to set of this node intersects
00277       /// with the points-to set of the specified node.
00278       bool intersects(Node *N) const;
00279 
00280       /// intersectsIgnoring - Return true if the points-to set of this node
00281       /// intersects with the points-to set of the specified node on any nodes
00282       /// except for the specified node to ignore.
00283       bool intersectsIgnoring(Node *N, unsigned) const;
00284 
00285       // Timestamp a node (used for work list prioritization)
00286       void Stamp() {
00287         Timestamp = Counter++;
00288       }
00289 
00290       bool isRep() const {
00291         return( (int) NodeRep < 0 );
00292       }
00293     };
00294 
00295     struct WorkListElement {
00296       Node* node;
00297       unsigned Timestamp;
00298       WorkListElement(Node* n, unsigned t) : node(n), Timestamp(t) {}
00299 
00300       // Note that we reverse the sense of the comparison because we
00301       // actually want to give low timestamps the priority over high,
00302       // whereas priority is typically interpreted as a greater value is
00303       // given high priority.
00304       bool operator<(const WorkListElement& that) const {
00305         return( this->Timestamp > that.Timestamp );
00306       }
00307     };
00308 
00309     // Priority-queue based work list specialized for Nodes.
00310     class WorkList {
00311       std::priority_queue<WorkListElement> Q;
00312 
00313     public:
00314       void insert(Node* n) {
00315         Q.push( WorkListElement(n, n->Timestamp) );
00316       }
00317 
00318       // We automatically discard non-representative nodes and nodes
00319       // that were in the work list twice (we keep a copy of the
00320       // timestamp in the work list so we can detect this situation by
00321       // comparing against the node's current timestamp).
00322       Node* pop() {
00323         while( !Q.empty() ) {
00324           WorkListElement x = Q.top(); Q.pop();
00325           Node* INode = x.node;
00326 
00327           if( INode->isRep() &&
00328               INode->Timestamp == x.Timestamp ) {
00329             return(x.node);
00330           }
00331         }
00332         return(0);
00333       }
00334 
00335       bool empty() {
00336         return Q.empty();
00337       }
00338     };
00339 
00340     /// GraphNodes - This vector is populated as part of the object
00341     /// identification stage of the analysis, which populates this vector with a
00342     /// node for each memory object and fills in the ValueNodes map.
00343     std::vector<Node> GraphNodes;
00344 
00345     /// ValueNodes - This map indicates the Node that a particular Value* is
00346     /// represented by.  This contains entries for all pointers.
00347     DenseMap<Value*, unsigned> ValueNodes;
00348 
00349     /// ObjectNodes - This map contains entries for each memory object in the
00350     /// program: globals, alloca's and mallocs.
00351     DenseMap<Value*, unsigned> ObjectNodes;
00352 
00353     /// ReturnNodes - This map contains an entry for each function in the
00354     /// program that returns a value.
00355     DenseMap<Function*, unsigned> ReturnNodes;
00356 
00357     /// VarargNodes - This map contains the entry used to represent all pointers
00358     /// passed through the varargs portion of a function call for a particular
00359     /// function.  An entry is not present in this map for functions that do not
00360     /// take variable arguments.
00361     DenseMap<Function*, unsigned> VarargNodes;
00362 
00363 
00364     /// Constraints - This vector contains a list of all of the constraints
00365     /// identified by the program.
00366     std::vector<Constraint> Constraints;
00367 
00368     // Map from graph node to maximum K value that is allowed (for functions,
00369     // this is equivalent to the number of arguments + CallFirstArgPos)
00370     std::map<unsigned, unsigned> MaxK;
00371 
00372     /// This enum defines the GraphNodes indices that correspond to important
00373     /// fixed sets.
00374     enum {
00375       UniversalSet = 0,
00376       NullPtr      = 1,
00377       NullObject   = 2,
00378       NumberSpecialNodes
00379     };
00380     // Stack for Tarjan's
00381     std::stack<unsigned> SCCStack;
00382     // Map from Graph Node to DFS number
00383     std::vector<unsigned> Node2DFS;
00384     // Map from Graph Node to Deleted from graph.
00385     std::vector<bool> Node2Deleted;
00386     // Same as Node Maps, but implemented as std::map because it is faster to
00387     // clear 
00388     std::map<unsigned, unsigned> Tarjan2DFS;
00389     std::map<unsigned, bool> Tarjan2Deleted;
00390     // Current DFS number
00391     unsigned DFSNumber;
00392 
00393     // Work lists.
00394     WorkList w1, w2;
00395     WorkList *CurrWL, *NextWL; // "current" and "next" work lists
00396 
00397     // Offline variable substitution related things
00398 
00399     // Temporary rep storage, used because we can't collapse SCC's in the
00400     // predecessor graph by uniting the variables permanently, we can only do so
00401     // for the successor graph.
00402     std::vector<unsigned> VSSCCRep;
00403     // Mapping from node to whether we have visited it during SCC finding yet.
00404     std::vector<bool> Node2Visited;
00405     // During variable substitution, we create unknowns to represent the unknown
00406     // value that is a dereference of a variable.  These nodes are known as
00407     // "ref" nodes (since they represent the value of dereferences).
00408     unsigned FirstRefNode;
00409     // During HVN, we create represent address taken nodes as if they were
00410     // unknown (since HVN, unlike HU, does not evaluate unions).
00411     unsigned FirstAdrNode;
00412     // Current pointer equivalence class number
00413     unsigned PEClass;
00414     // Mapping from points-to sets to equivalence classes
00415     typedef DenseMap<SparseBitVector<> *, unsigned, BitmapKeyInfo> BitVectorMap;
00416     BitVectorMap Set2PEClass;
00417     // Mapping from pointer equivalences to the representative node.  -1 if we
00418     // have no representative node for this pointer equivalence class yet.
00419     std::vector<int> PEClass2Node;
00420     // Mapping from pointer equivalences to representative node.  This includes
00421     // pointer equivalent but not location equivalent variables. -1 if we have
00422     // no representative node for this pointer equivalence class yet.
00423     std::vector<int> PENLEClass2Node;
00424     // Union/Find for HCD
00425     std::vector<unsigned> HCDSCCRep;
00426     // HCD's offline-detected cycles; "Statically DeTected"
00427     // -1 if not part of such a cycle, otherwise a representative node.
00428     std::vector<int> SDT;
00429     // Whether to use SDT (UniteNodes can use it during solving, but not before)
00430     bool SDTActive;
00431 
00432   public:
00433     static char ID;
00434     Andersens() : ModulePass((intptr_t)&ID) {}
00435 
00436     bool runOnModule(Module &M) {
00437       InitializeAliasAnalysis(this);
00438       IdentifyObjects(M);
00439       CollectConstraints(M);
00440 #undef DEBUG_TYPE
00441 #define DEBUG_TYPE "anders-aa-constraints"
00442       DEBUG(PrintConstraints());
00443 #undef DEBUG_TYPE
00444 #define DEBUG_TYPE "anders-aa"
00445       SolveConstraints();
00446       DEBUG(PrintPointsToGraph());
00447 
00448       // Free the constraints list, as we don't need it to respond to alias
00449       // requests.
00450       std::vector<Constraint>().swap(Constraints);
00451       //These are needed for Print() (-analyze in opt)
00452       //ObjectNodes.clear();
00453       //ReturnNodes.clear();
00454       //VarargNodes.clear();
00455       return false;
00456     }
00457 
00458     void releaseMemory() {
00459       // FIXME: Until we have transitively required passes working correctly,
00460       // this cannot be enabled!  Otherwise, using -count-aa with the pass
00461       // causes memory to be freed too early. :(
00462 #if 0
00463       // The memory objects and ValueNodes data structures at the only ones that
00464       // are still live after construction.
00465       std::vector<Node>().swap(GraphNodes);
00466       ValueNodes.clear();
00467 #endif
00468     }
00469 
00470     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
00471       AliasAnalysis::getAnalysisUsage(AU);
00472       AU.setPreservesAll();                         // Does not transform code
00473     }
00474 
00475     //------------------------------------------------
00476     // Implement the AliasAnalysis API
00477     //
00478     AliasResult alias(const Value *V1, unsigned V1Size,
00479                       const Value *V2, unsigned V2Size);
00480     virtual ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
00481     virtual ModRefResult getModRefInfo(CallSite CS1, CallSite CS2);
00482     void getMustAliases(Value *P, std::vector<Value*> &RetVals);
00483     bool pointsToConstantMemory(const Value *P);
00484 
00485     virtual void deleteValue(Value *V) {
00486       ValueNodes.erase(V);
00487       getAnalysis<AliasAnalysis>().deleteValue(V);
00488     }
00489 
00490     virtual void copyValue(Value *From, Value *To) {
00491       ValueNodes[To] = ValueNodes[From];
00492       getAnalysis<AliasAnalysis>().copyValue(From, To);
00493     }
00494 
00495   private:
00496     /// getNode - Return the node corresponding to the specified pointer scalar.
00497     ///
00498     unsigned getNode(Value *V) {
00499       if (Constant *C = dyn_cast<Constant>(V))
00500         if (!isa<GlobalValue>(C))
00501           return getNodeForConstantPointer(C);
00502 
00503       DenseMap<Value*, unsigned>::iterator I = ValueNodes.find(V);
00504       if (I == ValueNodes.end()) {
00505 #ifndef NDEBUG
00506         V->dump();
00507 #endif
00508         assert(0 && "Value does not have a node in the points-to graph!");
00509       }
00510       return I->second;
00511     }
00512 
00513     /// getObject - Return the node corresponding to the memory object for the
00514     /// specified global or allocation instruction.
00515     unsigned getObject(Value *V) const {
00516       DenseMap<Value*, unsigned>::iterator I = ObjectNodes.find(V);
00517       assert(I != ObjectNodes.end() &&
00518              "Value does not have an object in the points-to graph!");
00519       return I->second;
00520     }
00521 
00522     /// getReturnNode - Return the node representing the return value for the
00523     /// specified function.
00524     unsigned getReturnNode(Function *F) const {
00525       DenseMap<Function*, unsigned>::iterator I = ReturnNodes.find(F);
00526       assert(I != ReturnNodes.end() && "Function does not return a value!");
00527       return I->second;
00528     }
00529 
00530     /// getVarargNode - Return the node representing the variable arguments
00531     /// formal for the specified function.
00532     unsigned getVarargNode(Function *F) const {
00533       DenseMap<Function*, unsigned>::iterator I = VarargNodes.find(F);
00534       assert(I != VarargNodes.end() && "Function does not take var args!");
00535       return I->second;
00536     }
00537 
00538     /// getNodeValue - Get the node for the specified LLVM value and set the
00539     /// value for it to be the specified value.
00540     unsigned getNodeValue(Value &V) {
00541       unsigned Index = getNode(&V);
00542       GraphNodes[Index].setValue(&V);
00543       return Index;
00544     }
00545 
00546     unsigned UniteNodes(unsigned First, unsigned Second,
00547                         bool UnionByRank = true);
00548     unsigned FindNode(unsigned Node);
00549     unsigned FindNode(unsigned Node) const;
00550 
00551     void IdentifyObjects(Module &M);
00552     void CollectConstraints(Module &M);
00553     bool AnalyzeUsesOfFunction(Value *);
00554     void CreateConstraintGraph();
00555     void OptimizeConstraints();
00556     unsigned FindEquivalentNode(unsigned, unsigned);
00557     void ClumpAddressTaken();
00558     void RewriteConstraints();
00559     void HU();
00560     void HVN();
00561     void HCD();
00562     void Search(unsigned Node);
00563     void UnitePointerEquivalences();
00564     void SolveConstraints();
00565     bool QueryNode(unsigned Node);
00566     void Condense(unsigned Node);
00567     void HUValNum(unsigned Node);
00568     void HVNValNum(unsigned Node);
00569     unsigned getNodeForConstantPointer(Constant *C);
00570     unsigned getNodeForConstantPointerTarget(Constant *C);
00571     void AddGlobalInitializerConstraints(unsigned, Constant *C);
00572 
00573     void AddConstraintsForNonInternalLinkage(Function *F);
00574     void AddConstraintsForCall(CallSite CS, Function *F);
00575     bool AddConstraintsForExternalCall(CallSite CS, Function *F);
00576 
00577 
00578     void PrintNode(const Node *N) const;
00579     void PrintConstraints() const ;
00580     void PrintConstraint(const Constraint &) const;
00581     void PrintLabels() const;
00582     void PrintPointsToGraph() const;
00583 
00584     //===------------------------------------------------------------------===//
00585     // Instruction visitation methods for adding constraints
00586     //
00587     friend class InstVisitor<Andersens>;
00588     void visitReturnInst(ReturnInst &RI);
00589     void visitInvokeInst(InvokeInst &II) { visitCallSite(CallSite(&II)); }
00590     void visitCallInst(CallInst &CI) { visitCallSite(CallSite(&CI)); }
00591     void visitCallSite(CallSite CS);
00592     void visitAllocationInst(AllocationInst &AI);
00593     void visitLoadInst(LoadInst &LI);
00594     void visitStoreInst(StoreInst &SI);
00595     void visitGetElementPtrInst(GetElementPtrInst &GEP);
00596     void visitPHINode(PHINode &PN);
00597     void visitCastInst(CastInst &CI);
00598     void visitICmpInst(ICmpInst &ICI) {} // NOOP!
00599     void visitFCmpInst(FCmpInst &ICI) {} // NOOP!
00600     void visitSelectInst(SelectInst &SI);
00601     void visitVAArg(VAArgInst &I);
00602     void visitInstruction(Instruction &I);
00603 
00604     //===------------------------------------------------------------------===//
00605     // Implement Analyize interface
00606     //
00607     void print(std::ostream &O, const Module* M) const {
00608       PrintPointsToGraph();
00609     }
00610   };
00611 }
00612 
00613 char Andersens::ID = 0;
00614 static RegisterPass<Andersens>
00615 X("anders-aa", "Andersen's Interprocedural Alias Analysis", false, true);
00616 static RegisterAnalysisGroup<AliasAnalysis> Y(X);
00617 
00618 // Initialize Timestamp Counter (static).
00619 unsigned Andersens::Node::Counter = 0;
00620 
00621 ModulePass *llvm::createAndersensPass() { return new Andersens(); }
00622 
00623 //===----------------------------------------------------------------------===//
00624 //                  AliasAnalysis Interface Implementation
00625 //===----------------------------------------------------------------------===//
00626 
00627 AliasAnalysis::AliasResult Andersens::alias(const Value *V1, unsigned V1Size,
00628                                             const Value *V2, unsigned V2Size) {
00629   Node *N1 = &GraphNodes[FindNode(getNode(const_cast<Value*>(V1)))];
00630   Node *N2 = &GraphNodes[FindNode(getNode(const_cast<Value*>(V2)))];
00631 
00632   // Check to see if the two pointers are known to not alias.  They don't alias
00633   // if their points-to sets do not intersect.
00634   if (!N1->intersectsIgnoring(N2, NullObject))
00635     return NoAlias;
00636 
00637   return AliasAnalysis::alias(V1, V1Size, V2, V2Size);
00638 }
00639 
00640 AliasAnalysis::ModRefResult
00641 Andersens::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
00642   // The only thing useful that we can contribute for mod/ref information is
00643   // when calling external function calls: if we know that memory never escapes
00644   // from the program, it cannot be modified by an external call.
00645   //
00646   // NOTE: This is not really safe, at least not when the entire program is not
00647   // available.  The deal is that the external function could call back into the
00648   // program and modify stuff.  We ignore this technical niggle for now.  This
00649   // is, after all, a "research quality" implementation of Andersen's analysis.
00650   if (Function *F = CS.getCalledFunction())
00651     if (F->isDeclaration()) {
00652       Node *N1 = &GraphNodes[FindNode(getNode(P))];
00653 
00654       if (N1->PointsTo->empty())
00655         return NoModRef;
00656 #if FULL_UNIVERSAL
00657       if (!UniversalSet->PointsTo->test(FindNode(getNode(P))))
00658         return NoModRef;  // Universal set does not contain P
00659 #else
00660       if (!N1->PointsTo->test(UniversalSet))
00661         return NoModRef;  // P doesn't point to the universal set.
00662 #endif
00663     }
00664 
00665   return AliasAnalysis::getModRefInfo(CS, P, Size);
00666 }
00667 
00668 AliasAnalysis::ModRefResult
00669 Andersens::getModRefInfo(CallSite CS1, CallSite CS2) {
00670   return AliasAnalysis::getModRefInfo(CS1,CS2);
00671 }
00672 
00673 /// getMustAlias - We can provide must alias information if we know that a
00674 /// pointer can only point to a specific function or the null pointer.
00675 /// Unfortunately we cannot determine must-alias information for global
00676 /// variables or any other memory memory objects because we do not track whether
00677 /// a pointer points to the beginning of an object or a field of it.
00678 void Andersens::getMustAliases(Value *P, std::vector<Value*> &RetVals) {
00679   Node *N = &GraphNodes[FindNode(getNode(P))];
00680   if (N->PointsTo->count() == 1) {
00681     Node *Pointee = &GraphNodes[N->PointsTo->find_first()];
00682     // If a function is the only object in the points-to set, then it must be
00683     // the destination.  Note that we can't handle global variables here,
00684     // because we don't know if the pointer is actually pointing to a field of
00685     // the global or to the beginning of it.
00686     if (Value *V = Pointee->getValue()) {
00687       if (Function *F = dyn_cast<Function>(V))
00688         RetVals.push_back(F);
00689     } else {
00690       // If the object in the points-to set is the null object, then the null
00691       // pointer is a must alias.
00692       if (Pointee == &GraphNodes[NullObject])
00693         RetVals.push_back(Constant::getNullValue(P->getType()));
00694     }
00695   }
00696   AliasAnalysis::getMustAliases(P, RetVals);
00697 }
00698 
00699 /// pointsToConstantMemory - If we can determine that this pointer only points
00700 /// to constant memory, return true.  In practice, this means that if the
00701 /// pointer can only point to constant globals, functions, or the null pointer,
00702 /// return true.
00703 ///
00704 bool Andersens::pointsToConstantMemory(const Value *P) {
00705   Node *N = &GraphNodes[FindNode(getNode(const_cast<Value*>(P)))];
00706   unsigned i;
00707 
00708   for (SparseBitVector<>::iterator bi = N->PointsTo->begin();
00709        bi != N->PointsTo->end();
00710        ++bi) {
00711     i = *bi;
00712     Node *Pointee = &GraphNodes[i];
00713     if (Value *V = Pointee->getValue()) {
00714       if (!isa<GlobalValue>(V) || (isa<GlobalVariable>(V) &&
00715                                    !cast<GlobalVariable>(V)->isConstant()))
00716         return AliasAnalysis::pointsToConstantMemory(P);
00717     } else {
00718       if (i != NullObject)
00719         return AliasAnalysis::pointsToConstantMemory(P);
00720     }
00721   }
00722 
00723   return true;
00724 }
00725 
00726 //===----------------------------------------------------------------------===//
00727 //                       Object Identification Phase
00728 //===----------------------------------------------------------------------===//
00729 
00730 /// IdentifyObjects - This stage scans the program, adding an entry to the
00731 /// GraphNodes list for each memory object in the program (global stack or
00732 /// heap), and populates the ValueNodes and ObjectNodes maps for these objects.
00733 ///
00734 void Andersens::IdentifyObjects(Module &M) {
00735   unsigned NumObjects = 0;
00736 
00737   // Object #0 is always the universal set: the object that we don't know
00738   // anything about.
00739   assert(NumObjects == UniversalSet && "Something changed!");
00740   ++NumObjects;
00741 
00742   // Object #1 always represents the null pointer.
00743   assert(NumObjects == NullPtr && "Something changed!");
00744   ++NumObjects;
00745 
00746   // Object #2 always represents the null object (the object pointed to by null)
00747   assert(NumObjects == NullObject && "Something changed!");
00748   ++NumObjects;
00749 
00750   // Add all the globals first.
00751   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
00752        I != E; ++I) {
00753     ObjectNodes[I] = NumObjects++;
00754     ValueNodes[I] = NumObjects++;
00755   }
00756 
00757   // Add nodes for all of the functions and the instructions inside of them.
00758   for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
00759     // The function itself is a memory object.
00760     unsigned First = NumObjects;
00761     ValueNodes[F] = NumObjects++;
00762     if (isa<PointerType>(F->getFunctionType()->getReturnType()))
00763       ReturnNodes[F] = NumObjects++;
00764     if (F->getFunctionType()->isVarArg())
00765       VarargNodes[F] = NumObjects++;
00766 
00767 
00768     // Add nodes for all of the incoming pointer arguments.
00769     for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
00770          I != E; ++I)
00771       {
00772         if (isa<PointerType>(I->getType()))
00773           ValueNodes[I] = NumObjects++;
00774       }
00775     MaxK[First] = NumObjects - First;
00776 
00777     // Scan the function body, creating a memory object for each heap/stack
00778     // allocation in the body of the function and a node to represent all
00779     // pointer values defined by instructions and used as operands.
00780     for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
00781       // If this is an heap or stack allocation, create a node for the memory
00782       // object.
00783       if (isa<PointerType>(II->getType())) {
00784         ValueNodes[&*II] = NumObjects++;
00785         if (AllocationInst *AI = dyn_cast<AllocationInst>(&*II))
00786           ObjectNodes[AI] = NumObjects++;
00787       }
00788 
00789       // Calls to inline asm need to be added as well because the callee isn't
00790       // referenced anywhere else.
00791       if (CallInst *CI = dyn_cast<CallInst>(&*II)) {
00792         Value *Callee = CI->getCalledValue();
00793         if (isa<InlineAsm>(Callee))
00794           ValueNodes[Callee] = NumObjects++;
00795       }
00796     }
00797   }
00798 
00799   // Now that we know how many objects to create, make them all now!
00800   GraphNodes.resize(NumObjects);
00801   NumNodes += NumObjects;
00802 }
00803 
00804 //===----------------------------------------------------------------------===//
00805 //                     Constraint Identification Phase
00806 //===----------------------------------------------------------------------===//
00807 
00808 /// getNodeForConstantPointer - Return the node corresponding to the constant
00809 /// pointer itself.
00810 unsigned Andersens::getNodeForConstantPointer(Constant *C) {
00811   assert(isa<PointerType>(C->getType()) && "Not a constant pointer!");
00812 
00813   if (isa<ConstantPointerNull>(C) || isa<UndefValue>(C))
00814     return NullPtr;
00815   else if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
00816     return getNode(GV);
00817   else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
00818     switch (CE->getOpcode()) {
00819     case Instruction::GetElementPtr:
00820       return getNodeForConstantPointer(CE->getOperand(0));
00821     case Instruction::IntToPtr:
00822       return UniversalSet;
00823     case Instruction::BitCast:
00824       return getNodeForConstantPointer(CE->getOperand(0));
00825     default:
00826       cerr << "Constant Expr not yet handled: " << *CE << "\n";
00827       assert(0);
00828     }
00829   } else {
00830     assert(0 && "Unknown constant pointer!");
00831   }
00832   return 0;
00833 }
00834 
00835 /// getNodeForConstantPointerTarget - Return the node POINTED TO by the
00836 /// specified constant pointer.
00837 unsigned Andersens::getNodeForConstantPointerTarget(Constant *C) {
00838   assert(isa<PointerType>(C->getType()) && "Not a constant pointer!");
00839 
00840   if (isa<ConstantPointerNull>(C))
00841     return NullObject;
00842   else if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
00843     return getObject(GV);
00844   else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
00845     switch (CE->getOpcode()) {
00846     case Instruction::GetElementPtr:
00847       return getNodeForConstantPointerTarget(CE->getOperand(0));
00848     case Instruction::IntToPtr:
00849       return UniversalSet;
00850     case Instruction::BitCast:
00851       return getNodeForConstantPointerTarget(CE->getOperand(0));
00852     default:
00853       cerr << "Constant Expr not yet handled: " << *CE << "\n";
00854       assert(0);
00855     }
00856   } else {
00857     assert(0 && "Unknown constant pointer!");
00858   }
00859   return 0;
00860 }
00861 
00862 /// AddGlobalInitializerConstraints - Add inclusion constraints for the memory
00863 /// object N, which contains values indicated by C.
00864 void Andersens::AddGlobalInitializerConstraints(unsigned NodeIndex,
00865                                                 Constant *C) {
00866   if (C->getType()->isSingleValueType()) {
00867     if (isa<PointerType>(C->getType()))
00868       Constraints.push_back(Constraint(Constraint::Copy, NodeIndex,
00869                                        getNodeForConstantPointer(C)));
00870   } else if (C->isNullValue()) {
00871     Constraints.push_back(Constraint(Constraint::Copy, NodeIndex,
00872                                      NullObject));
00873     return;
00874   } else if (!isa<UndefValue>(C)) {
00875     // If this is an array or struct, include constraints for each element.
00876     assert(isa<ConstantArray>(C) || isa<ConstantStruct>(C));
00877     for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
00878       AddGlobalInitializerConstraints(NodeIndex,
00879                                       cast<Constant>(C->getOperand(i)));
00880   }
00881 }
00882 
00883 /// AddConstraintsForNonInternalLinkage - If this function does not have
00884 /// internal linkage, realize that we can't trust anything passed into or
00885 /// returned by this function.
00886 void Andersens::AddConstraintsForNonInternalLinkage(Function *F) {
00887   for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
00888     if (isa<PointerType>(I->getType()))
00889       // If this is an argument of an externally accessible function, the
00890       // incoming pointer might point to anything.
00891       Constraints.push_back(Constraint(Constraint::Copy, getNode(I),
00892                                        UniversalSet));
00893 }
00894 
00895 /// AddConstraintsForCall - If this is a call to a "known" function, add the
00896 /// constraints and return true.  If this is a call to an unknown function,
00897 /// return false.
00898 bool Andersens::AddConstraintsForExternalCall(CallSite CS, Function *F) {
00899   assert(F->isDeclaration() && "Not an external function!");
00900 
00901   // These functions don't induce any points-to constraints.
00902   if (F->getName() == "atoi" || F->getName() == "atof" ||
00903       F->getName() == "atol" || F->getName() == "atoll" ||
00904       F->getName() == "remove" || F->getName() == "unlink" ||
00905       F->getName() == "rename" || F->getName() == "memcmp" ||
00906       F->getName() == "llvm.memset.i32" ||
00907       F->getName() == "llvm.memset.i64" ||
00908       F->getName() == "strcmp" || F->getName() == "strncmp" ||
00909       F->getName() == "execl" || F->getName() == "execlp" ||
00910       F->getName() == "execle" || F->getName() == "execv" ||
00911       F->getName() == "execvp" || F->getName() == "chmod" ||
00912       F->getName() == "puts" || F->getName() == "write" ||
00913       F->getName() == "open" || F->getName() == "create" ||
00914       F->getName() == "truncate" || F->getName() == "chdir" ||
00915       F->getName() == "mkdir" || F->getName() == "rmdir" ||
00916       F->getName() == "read" || F->getName() == "pipe" ||
00917       F->getName() == "wait" || F->getName() == "time" ||
00918       F->getName() == "stat" || F->getName() == "fstat" ||
00919       F->getName() == "lstat" || F->getName() == "strtod" ||
00920       F->getName() == "strtof" || F->getName() == "strtold" ||
00921       F->getName() == "fopen" || F->getName() == "fdopen" ||
00922       F->getName() == "freopen" ||
00923       F->getName() == "fflush" || F->getName() == "feof" ||
00924       F->getName() == "fileno" || F->getName() == "clearerr" ||
00925       F->getName() == "rewind" || F->getName() == "ftell" ||
00926       F->getName() == "ferror" || F->getName() == "fgetc" ||
00927       F->getName() == "fgetc" || F->getName() == "_IO_getc" ||
00928       F->getName() == "fwrite" || F->getName() == "fread" ||
00929       F->getName() == "fgets" || F->getName() == "ungetc" ||
00930       F->getName() == "fputc" ||
00931       F->getName() == "fputs" || F->getName() == "putc" ||
00932       F->getName() == "ftell" || F->getName() == "rewind" ||
00933       F->getName() == "_IO_putc" || F->getName() == "fseek" ||
00934       F->getName() == "fgetpos" || F->getName() == "fsetpos" ||
00935       F->getName() == "printf" || F->getName() == "fprintf" ||
00936       F->getName() == "sprintf" || F->getName() == "vprintf" ||
00937       F->getName() == "vfprintf" || F->getName() == "vsprintf" ||
00938       F->getName() == "scanf" || F->getName() == "fscanf" ||
00939       F->getName() == "sscanf" || F->getName() == "__assert_fail" ||
00940       F->getName() == "modf")
00941     return true;
00942 
00943 
00944   // These functions do induce points-to edges.
00945   if (F->getName() == "llvm.memcpy.i32" || F->getName() == "llvm.memcpy.i64" ||
00946       F->getName() == "llvm.memmove.i32" ||F->getName() == "llvm.memmove.i64" ||
00947       F->getName() == "memmove") {
00948 
00949     // *Dest = *Src, which requires an artificial graph node to represent the
00950     // constraint.  It is broken up into *Dest = temp, temp = *Src
00951     unsigned FirstArg = getNode(CS.getArgument(0));
00952     unsigned SecondArg = getNode(CS.getArgument(1));
00953     unsigned TempArg = GraphNodes.size();
00954     GraphNodes.push_back(Node());
00955     Constraints.push_back(Constraint(Constraint::Store,
00956                                      FirstArg, TempArg));
00957     Constraints.push_back(Constraint(Constraint::Load,
00958                                      TempArg, SecondArg));
00959     // In addition, Dest = Src
00960     Constraints.push_back(Constraint(Constraint::Copy,
00961                                      FirstArg, SecondArg));
00962     return true;
00963   }
00964 
00965   // Result = Arg0
00966   if (F->getName() == "realloc" || F->getName() == "strchr" ||
00967       F->getName() == "strrchr" || F->getName() == "strstr" ||
00968       F->getName() == "strtok") {
00969     Constraints.push_back(Constraint(Constraint::Copy,
00970                                      getNode(CS.getInstruction()),
00971                                      getNode(CS.getArgument(0))));
00972     return true;
00973   }
00974 
00975   return false;
00976 }
00977 
00978 
00979 
00980 /// AnalyzeUsesOfFunction - Look at all of the users of the specified function.
00981 /// If this is used by anything complex (i.e., the address escapes), return
00982 /// true.
00983 bool Andersens::AnalyzeUsesOfFunction(Value *V) {
00984 
00985   if (!isa<PointerType>(V->getType())) return true;
00986 
00987   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
00988     if (dyn_cast<LoadInst>(*UI)) {
00989       return false;
00990     } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
00991       if (V == SI->getOperand(1)) {
00992         return false;
00993       } else if (SI->getOperand(1)) {
00994         return true;  // Storing the pointer
00995       }
00996     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
00997       if (AnalyzeUsesOfFunction(GEP)) return true;
00998     } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
00999       // Make sure that this is just the function being called, not that it is
01000       // passing into the function.
01001       for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i)
01002         if (CI->getOperand(i) == V) return true;
01003     } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
01004       // Make sure that this is just the function being called, not that it is
01005       // passing into the function.
01006       for (unsigned i = 3, e = II->getNumOperands(); i != e; ++i)
01007         if (II->getOperand(i) == V) return true;
01008     } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) {
01009       if (CE->getOpcode() == Instruction::GetElementPtr ||
01010           CE->getOpcode() == Instruction::BitCast) {
01011         if (AnalyzeUsesOfFunction(CE))
01012           return true;
01013       } else {
01014         return true;
01015       }
01016     } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) {
01017       if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
01018         return true;  // Allow comparison against null.
01019     } else if (dyn_cast<FreeInst>(*UI)) {
01020       return false;
01021     } else {
01022       return true;
01023     }
01024   return false;
01025 }
01026 
01027 /// CollectConstraints - This stage scans the program, adding a constraint to
01028 /// the Constraints list for each instruction in the program that induces a
01029 /// constraint, and setting up the initial points-to graph.
01030 ///
01031 void Andersens::CollectConstraints(Module &M) {
01032   // First, the universal set points to itself.
01033   Constraints.push_back(Constraint(Constraint::AddressOf, UniversalSet,
01034                                    UniversalSet));
01035   Constraints.push_back(Constraint(Constraint::Store, UniversalSet,
01036                                    UniversalSet));
01037 
01038   // Next, the null pointer points to the null object.
01039   Constraints.push_back(Constraint(Constraint::AddressOf, NullPtr, NullObject));
01040 
01041   // Next, add any constraints on global variables and their initializers.
01042   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
01043        I != E; ++I) {
01044     // Associate the address of the global object as pointing to the memory for
01045     // the global: &G = <G memory>
01046     unsigned ObjectIndex = getObject(I);
01047     Node *Object = &GraphNodes[ObjectIndex];
01048     Object->setValue(I);
01049     Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(*I),
01050                                      ObjectIndex));
01051 
01052     if (I->hasInitializer()) {
01053       AddGlobalInitializerConstraints(ObjectIndex, I->getInitializer());
01054     } else {
01055       // If it doesn't have an initializer (i.e. it's defined in another
01056       // translation unit), it points to the universal set.
01057       Constraints.push_back(Constraint(Constraint::Copy, ObjectIndex,
01058                                        UniversalSet));
01059     }
01060   }
01061 
01062   for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
01063     // Set up the return value node.
01064     if (isa<PointerType>(F->getFunctionType()->getReturnType()))
01065       GraphNodes[getReturnNode(F)].setValue(F);
01066     if (F->getFunctionType()->isVarArg())
01067       GraphNodes[getVarargNode(F)].setValue(F);
01068 
01069     // Set up incoming argument nodes.
01070     for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
01071          I != E; ++I)
01072       if (isa<PointerType>(I->getType()))
01073         getNodeValue(*I);
01074 
01075     // At some point we should just add constraints for the escaping functions
01076     // at solve time, but this slows down solving. For now, we simply mark
01077     // address taken functions as escaping and treat them as external.
01078     if (!F->hasInternalLinkage() || AnalyzeUsesOfFunction(F))
01079       AddConstraintsForNonInternalLinkage(F);
01080 
01081     if (!F->isDeclaration()) {
01082       // Scan the function body, creating a memory object for each heap/stack
01083       // allocation in the body of the function and a node to represent all
01084       // pointer values defined by instructions and used as operands.
01085       visit(F);
01086     } else {
01087       // External functions that return pointers return the universal set.
01088       if (isa<PointerType>(F->getFunctionType()->getReturnType()))
01089         Constraints.push_back(Constraint(Constraint::Copy,
01090                                          getReturnNode(F),
01091                                          UniversalSet));
01092 
01093       // Any pointers that are passed into the function have the universal set
01094       // stored into them.
01095       for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
01096            I != E; ++I)
01097         if (isa<PointerType>(I->getType())) {
01098           // Pointers passed into external functions could have anything stored
01099           // through them.
01100           Constraints.push_back(Constraint(Constraint::Store, getNode(I),
01101                                            UniversalSet));
01102           // Memory objects passed into external function calls can have the
01103           // universal set point to them.
01104 #if FULL_UNIVERSAL
01105           Constraints.push_back(Constraint(Constraint::Copy,
01106                                            UniversalSet,
01107                                            getNode(I)));
01108 #else
01109           Constraints.push_back(Constraint(Constraint::Copy,
01110                                            getNode(I),
01111                                            UniversalSet));
01112 #endif
01113         }
01114 
01115       // If this is an external varargs function, it can also store pointers
01116       // into any pointers passed through the varargs section.
01117       if (F->getFunctionType()->isVarArg())
01118         Constraints.push_back(Constraint(Constraint::Store, getVarargNode(F),
01119                                          UniversalSet));
01120     }
01121   }
01122   NumConstraints += Constraints.size();
01123 }
01124 
01125 
01126 void Andersens::visitInstruction(Instruction &I) {
01127 #ifdef NDEBUG
01128   return;          // This function is just a big assert.
01129 #endif
01130   if (isa<BinaryOperator>(I))
01131     return;
01132   // Most instructions don't have any effect on pointer values.
01133   switch (I.getOpcode()) {
01134   case Instruction::Br:
01135   case Instruction::Switch:
01136   case Instruction::Unwind:
01137   case Instruction::Unreachable:
01138   case Instruction::Free:
01139   case Instruction::ICmp:
01140   case Instruction::FCmp:
01141     return;
01142   default:
01143     // Is this something we aren't handling yet?
01144     cerr << "Unknown instruction: " << I;
01145     abort();
01146   }
01147 }
01148 
01149 void Andersens::visitAllocationInst(AllocationInst &AI) {
01150   unsigned ObjectIndex = getObject(&AI);
01151   GraphNodes[ObjectIndex].setValue(&AI);
01152   Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(AI),
01153                                    ObjectIndex));
01154 }
01155 
01156 void Andersens::visitReturnInst(ReturnInst &RI) {
01157   if (RI.getNumOperands() && isa<PointerType>(RI.getOperand(0)->getType()))
01158     // return V   -->   <Copy/retval{F}/v>
01159     Constraints.push_back(Constraint(Constraint::Copy,
01160                                      getReturnNode(RI.getParent()->getParent()),
01161                                      getNode(RI.getOperand(0))));
01162 }
01163 
01164 void Andersens::visitLoadInst(LoadInst &LI) {
01165   if (isa<PointerType>(LI.getType()))
01166     // P1 = load P2  -->  <Load/P1/P2>
01167     Constraints.push_back(Constraint(Constraint::Load, getNodeValue(LI),
01168                                      getNode(LI.getOperand(0))));
01169 }
01170 
01171 void Andersens::visitStoreInst(StoreInst &SI) {
01172   if (isa<PointerType>(SI.getOperand(0)->getType()))
01173     // store P1, P2  -->  <Store/P2/P1>
01174     Constraints.push_back(Constraint(Constraint::Store,
01175                                      getNode(SI.getOperand(1)),
01176                                      getNode(SI.getOperand(0))));
01177 }
01178 
01179 void Andersens::visitGetElementPtrInst(GetElementPtrInst &GEP) {
01180   // P1 = getelementptr P2, ... --> <Copy/P1/P2>
01181   Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(GEP),
01182                                    getNode(GEP.getOperand(0))));
01183 }
01184 
01185 void Andersens::visitPHINode(PHINode &PN) {
01186   if (isa<PointerType>(PN.getType())) {
01187     unsigned PNN = getNodeValue(PN);
01188     for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
01189       // P1 = phi P2, P3  -->  <Copy/P1/P2>, <Copy/P1/P3>, ...
01190       Constraints.push_back(Constraint(Constraint::Copy, PNN,
01191                                        getNode(PN.getIncomingValue(i))));
01192   }
01193 }
01194 
01195 void Andersens::visitCastInst(CastInst &CI) {
01196   Value *Op = CI.getOperand(0);
01197   if (isa<PointerType>(CI.getType())) {
01198     if (isa<PointerType>(Op->getType())) {
01199       // P1 = cast P2  --> <Copy/P1/P2>
01200       Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI),
01201                                        getNode(CI.getOperand(0))));
01202     } else {
01203       // P1 = cast int --> <Copy/P1/Univ>
01204 #if 0
01205       Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI),
01206                                        UniversalSet));
01207 #else
01208       getNodeValue(CI);
01209 #endif
01210     }
01211   } else if (isa<PointerType>(Op->getType())) {
01212     // int = cast P1 --> <Copy/Univ/P1>
01213 #if 0
01214     Constraints.push_back(Constraint(Constraint::Copy,
01215                                      UniversalSet,
01216                                      getNode(CI.getOperand(0))));
01217 #else
01218     getNode(CI.getOperand(0));
01219 #endif
01220   }
01221 }
01222 
01223 void Andersens::visitSelectInst(SelectInst &SI) {
01224   if (isa<PointerType>(SI.getType())) {
01225     unsigned SIN = getNodeValue(SI);
01226     // P1 = select C, P2, P3   ---> <Copy/P1/P2>, <Copy/P1/P3>
01227     Constraints.push_back(Constraint(Constraint::Copy, SIN,
01228                                      getNode(SI.getOperand(1))));
01229     Constraints.push_back(Constraint(Constraint::Copy, SIN,
01230                                      getNode(SI.getOperand(2))));
01231   }
01232 }
01233 
01234 void Andersens::visitVAArg(VAArgInst &I) {
01235   assert(0 && "vaarg not handled yet!");
01236 }
01237 
01238 /// AddConstraintsForCall - Add constraints for a call with actual arguments
01239 /// specified by CS to the function specified by F.  Note that the types of
01240 /// arguments might not match up in the case where this is an indirect call and
01241 /// the function pointer has been casted.  If this is the case, do something
01242 /// reasonable.
01243 void Andersens::AddConstraintsForCall(CallSite CS, Function *F) {
01244   Value *CallValue = CS.getCalledValue();
01245   bool IsDeref = F == NULL;
01246 
01247   // If this is a call to an external function, try to handle it directly to get
01248   // some taste of context sensitivity.
01249   if (F && F->isDeclaration() && AddConstraintsForExternalCall(CS, F))
01250     return;
01251 
01252   if (isa<PointerType>(CS.getType())) {
01253     unsigned CSN = getNode(CS.getInstruction());
01254     if (!F || isa<PointerType>(F->getFunctionType()->getReturnType())) {
01255       if (IsDeref)
01256         Constraints.push_back(Constraint(Constraint::Load, CSN,
01257                                          getNode(CallValue), CallReturnPos));
01258       else
01259         Constraints.push_back(Constraint(Constraint::Copy, CSN,
01260                                          getNode(CallValue) + CallReturnPos));
01261     } else {
01262       // If the function returns a non-pointer value, handle this just like we
01263       // treat a nonpointer cast to pointer.
01264       Constraints.push_back(Constraint(Constraint::Copy, CSN,
01265                                        UniversalSet));
01266     }
01267   } else if (F && isa<PointerType>(F->getFunctionType()->getReturnType())) {
01268 #if FULL_UNIVERSAL
01269     Constraints.push_back(Constraint(Constraint::Copy,
01270                                      UniversalSet,
01271                                      getNode(CallValue) + CallReturnPos));
01272 #else
01273     Constraints.push_back(Constraint(Constraint::Copy,
01274                                       getNode(CallValue) + CallReturnPos,
01275                                       UniversalSet));
01276 #endif
01277                           
01278     
01279   }
01280 
01281   CallSite::arg_iterator ArgI = CS.arg_begin(), ArgE = CS.arg_end();
01282   bool external = !F ||  F->isDeclaration();
01283   if (F) {
01284     // Direct Call
01285     Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
01286     for (; AI != AE && ArgI != ArgE; ++AI, ++ArgI) 
01287       {
01288 #if !FULL_UNIVERSAL
01289         if (external && isa<PointerType>((*ArgI)->getType())) 
01290           {
01291             // Add constraint that ArgI can now point to anything due to
01292             // escaping, as can everything it points to. The second portion of
01293             // this should be taken care of by universal = *universal
01294             Constraints.push_back(Constraint(Constraint::Copy,
01295                                              getNode(*ArgI),
01296                                              UniversalSet));
01297           }
01298 #endif
01299         if (isa<PointerType>(AI->getType())) {
01300           if (isa<PointerType>((*ArgI)->getType())) {
01301             // Copy the actual argument into the formal argument.
01302             Constraints.push_back(Constraint(Constraint::Copy, getNode(AI),
01303                                              getNode(*ArgI)));
01304           } else {
01305             Constraints.push_back(Constraint(Constraint::Copy, getNode(AI),
01306                                              UniversalSet));
01307           }
01308         } else if (isa<PointerType>((*ArgI)->getType())) {
01309 #if FULL_UNIVERSAL
01310           Constraints.push_back(Constraint(Constraint::Copy,
01311                                            UniversalSet,
01312                                            getNode(*ArgI)));
01313 #else
01314           Constraints.push_back(Constraint(Constraint::Copy,
01315                                            getNode(*ArgI),
01316                                            UniversalSet));
01317 #endif
01318         }
01319       }
01320   } else {
01321     //Indirect Call
01322     unsigned ArgPos = CallFirstArgPos;
01323     for (; ArgI != ArgE; ++ArgI) {
01324       if (isa<PointerType>((*ArgI)->getType())) {
01325         // Copy the actual argument into the formal argument.
01326         Constraints.push_back(Constraint(Constraint::Store,
01327                                          getNode(CallValue),
01328                                          getNode(*ArgI), ArgPos++));
01329       } else {
01330         Constraints.push_back(Constraint(Constraint::Store,
01331                                          getNode (CallValue),
01332                                          UniversalSet, ArgPos++));
01333       }
01334     }
01335   }
01336   // Copy all pointers passed through the varargs section to the varargs node.
01337   if (F && F->getFunctionType()->isVarArg())
01338     for (; ArgI != ArgE; ++ArgI)
01339       if (isa<PointerType>((*ArgI)->getType()))
01340         Constraints.push_back(Constraint(Constraint::Copy, getVarargNode(F),
01341                                          getNode(*ArgI)));
01342   // If more arguments are passed in than we track, just drop them on the floor.
01343 }
01344 
01345 void Andersens::visitCallSite(CallSite CS) {
01346   if (isa<PointerType>(CS.getType()))
01347     getNodeValue(*CS.getInstruction());
01348 
01349   if (Function *F = CS.getCalledFunction()) {
01350     AddConstraintsForCall(CS, F);
01351   } else {
01352     AddConstraintsForCall(CS, NULL);
01353   }
01354 }
01355 
01356 //===----------------------------------------------------------------------===//
01357 //                         Constraint Solving Phase
01358 //===----------------------------------------------------------------------===//
01359 
01360 /// intersects - Return true if the points-to set of this node intersects
01361 /// with the points-to set of the specified node.
01362 bool Andersens::Node::intersects(Node *N) const {
01363   return PointsTo->intersects(N->PointsTo);
01364 }
01365 
01366 /// intersectsIgnoring - Return true if the points-to set of this node
01367 /// intersects with the points-to set of the specified node on any nodes
01368 /// except for the specified node to ignore.
01369 bool Andersens::Node::intersectsIgnoring(Node *N, unsigned Ignoring) const {
01370   // TODO: If we are only going to call this with the same value for Ignoring,
01371   // we should move the special values out of the points-to bitmap.
01372   bool WeHadIt = PointsTo->test(Ignoring);
01373   bool NHadIt = N->PointsTo->test(Ignoring);
01374   bool Result = false;
01375   if (WeHadIt)
01376     PointsTo->reset(Ignoring);
01377   if (NHadIt)
01378     N->PointsTo->reset(Ignoring);
01379   Result = PointsTo->intersects(N->PointsTo);
01380   if (WeHadIt)
01381     PointsTo->set(Ignoring);
01382   if (NHadIt)
01383     N->PointsTo->set(Ignoring);
01384   return Result;
01385 }
01386 
01387 void dumpToDOUT(SparseBitVector<> *bitmap) {
01388 #ifndef NDEBUG
01389   dump(*bitmap, DOUT);
01390 #endif
01391 }
01392 
01393 
01394 /// Clump together address taken variables so that the points-to sets use up
01395 /// less space and can be operated on faster.
01396 
01397 void Andersens::ClumpAddressTaken() {
01398 #undef DEBUG_TYPE
01399 #define DEBUG_TYPE "anders-aa-renumber"
01400   std::vector<unsigned> Translate;
01401   std::vector<Node> NewGraphNodes;
01402 
01403   Translate.resize(GraphNodes.size());
01404   unsigned NewPos = 0;
01405 
01406   for (unsigned i = 0; i < Constraints.size(); ++i) {
01407     Constraint &C = Constraints[i];
01408     if (C.Type == Constraint::AddressOf) {
01409       GraphNodes[C.Src].AddressTaken = true;
01410     }
01411   }
01412   for (unsigned i = 0; i < NumberSpecialNodes; ++i) {
01413     unsigned Pos = NewPos++;
01414     Translate[i] = Pos;
01415     NewGraphNodes.push_back(GraphNodes[i]);
01416     DOUT << "Renumbering node " << i << " to node " << Pos << "\n";
01417   }
01418 
01419   // I believe this ends up being faster than making two vectors and splicing
01420   // them.
01421   for (unsigned i = NumberSpecialNodes; i < GraphNodes.size(); ++i) {
01422     if (GraphNodes[i].AddressTaken) {
01423       unsigned Pos = NewPos++;
01424       Translate[i] = Pos;
01425       NewGraphNodes.push_back(GraphNodes[i]);
01426       DOUT << "Renumbering node " << i << " to node " << Pos << "\n";
01427     }
01428   }
01429 
01430   for (unsigned i = NumberSpecialNodes; i < GraphNodes.size(); ++i) {
01431     if (!GraphNodes[i].AddressTaken) {
01432       unsigned Pos = N