LLVM API Documentation
00001 //===- Andersens.cpp - Andersen's Interprocedural Alias Analysis ----------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines an implementation of Andersen's interprocedural alias 00011 // analysis 00012 // 00013 // In pointer analysis terms, this is a subset-based, flow-insensitive, 00014 // field-sensitive, and context-insensitive algorithm pointer algorithm. 00015 // 00016 // This algorithm is implemented as three stages: 00017 // 1. Object identification. 00018 // 2. Inclusion constraint identification. 00019 // 3. Offline constraint graph optimization 00020 // 4. Inclusion constraint solving. 00021 // 00022 // The object identification stage identifies all of the memory objects in the 00023 // program, which includes globals, heap allocated objects, and stack allocated 00024 // objects. 00025 // 00026 // The inclusion constraint identification stage finds all inclusion constraints 00027 // in the program by scanning the program, looking for pointer assignments and 00028 // other statements that effect the points-to graph. For a statement like "A = 00029 // B", this statement is processed to indicate that A can point to anything that 00030 // B can point to. Constraints can handle copies, loads, and stores, and 00031 // address taking. 00032 // 00033 // The offline constraint graph optimization portion includes offline variable 00034 // substitution algorithms intended to compute pointer and location 00035 // equivalences. Pointer equivalences are those pointers that will have the 00036 // same points-to sets, and location equivalences are those variables that 00037 // always appear together in points-to sets. It also includes an offline 00038 // cycle detection algorithm that allows cycles to be collapsed sooner 00039 // during solving. 00040 // 00041 // The inclusion constraint solving phase iteratively propagates the inclusion 00042 // constraints until a fixed point is reached. This is an O(N^3) algorithm. 00043 // 00044 // Function constraints are handled as if they were structs with X fields. 00045 // Thus, an access to argument X of function Y is an access to node index 00046 // getNode(Y) + X. This representation allows handling of indirect calls 00047 // without any issues. To wit, an indirect call Y(a,b) is equivalent to 00048 // *(Y + 1) = a, *(Y + 2) = b. 00049 // The return node for a function is always located at getNode(F) + 00050 // CallReturnPos. The arguments start at getNode(F) + CallArgPos. 00051 // 00052 // Future Improvements: 00053 // Use of BDD's. 00054 //===----------------------------------------------------------------------===// 00055 00056 #define DEBUG_TYPE "anders-aa" 00057 #include "llvm/Constants.h" 00058 #include "llvm/DerivedTypes.h" 00059 #include "llvm/Instructions.h" 00060 #include "llvm/Module.h" 00061 #include "llvm/Pass.h" 00062 #include "llvm/Support/Compiler.h" 00063 #include "llvm/Support/InstIterator.h" 00064 #include "llvm/Support/InstVisitor.h" 00065 #include "llvm/Analysis/AliasAnalysis.h" 00066 #include "llvm/Analysis/Passes.h" 00067 #include "llvm/Support/Debug.h" 00068 #include "llvm/ADT/Statistic.h" 00069 #include "llvm/ADT/SparseBitVector.h" 00070 #include "llvm/ADT/DenseSet.h" 00071 #include <algorithm> 00072 #include <set> 00073 #include <list> 00074 #include <map> 00075 #include <stack> 00076 #include <vector> 00077 #include <queue> 00078 00079 // Determining the actual set of nodes the universal set can consist of is very 00080 // expensive because it means propagating around very large sets. We rely on 00081 // other analysis being able to determine which nodes can never be pointed to in 00082 // order to disambiguate further than "points-to anything". 00083 #define FULL_UNIVERSAL 0 00084 00085 using namespace llvm; 00086 STATISTIC(NumIters , "Number of iterations to reach convergence"); 00087 STATISTIC(NumConstraints, "Number of constraints"); 00088 STATISTIC(NumNodes , "Number of nodes"); 00089 STATISTIC(NumUnified , "Number of variables unified"); 00090 STATISTIC(NumErased , "Number of redundant constraints erased"); 00091 00092 static const unsigned SelfRep = (unsigned)-1; 00093 static const unsigned Unvisited = (unsigned)-1; 00094 // Position of the function return node relative to the function node. 00095 static const unsigned CallReturnPos = 1; 00096 // Position of the function call node relative to the function node. 00097 static const unsigned CallFirstArgPos = 2; 00098 00099 namespace { 00100 struct BitmapKeyInfo { 00101 static inline SparseBitVector<> *getEmptyKey() { 00102 return reinterpret_cast<SparseBitVector<> *>(-1); 00103 } 00104 static inline SparseBitVector<> *getTombstoneKey() { 00105 return reinterpret_cast<SparseBitVector<> *>(-2); 00106 } 00107 static unsigned getHashValue(const SparseBitVector<> *bitmap) { 00108 return bitmap->getHashValue(); 00109 } 00110 static bool isEqual(const SparseBitVector<> *LHS, 00111 const SparseBitVector<> *RHS) { 00112 if (LHS == RHS) 00113 return true; 00114 else if (LHS == getEmptyKey() || RHS == getEmptyKey() 00115 || LHS == getTombstoneKey() || RHS == getTombstoneKey()) 00116 return false; 00117 00118 return *LHS == *RHS; 00119 } 00120 00121 static bool isPod() { return true; } 00122 }; 00123 00124 class VISIBILITY_HIDDEN Andersens : public ModulePass, public AliasAnalysis, 00125 private InstVisitor<Andersens> { 00126 struct Node; 00127 00128 /// Constraint - Objects of this structure are used to represent the various 00129 /// constraints identified by the algorithm. The constraints are 'copy', 00130 /// for statements like "A = B", 'load' for statements like "A = *B", 00131 /// 'store' for statements like "*A = B", and AddressOf for statements like 00132 /// A = alloca; The Offset is applied as *(A + K) = B for stores, 00133 /// A = *(B + K) for loads, and A = B + K for copies. It is 00134 /// illegal on addressof constraints (because it is statically 00135 /// resolvable to A = &C where C = B + K) 00136 00137 struct Constraint { 00138 enum ConstraintType { Copy, Load, Store, AddressOf } Type; 00139 unsigned Dest; 00140 unsigned Src; 00141 unsigned Offset; 00142 00143 Constraint(ConstraintType Ty, unsigned D, unsigned S, unsigned O = 0) 00144 : Type(Ty), Dest(D), Src(S), Offset(O) { 00145 assert((Offset == 0 || Ty != AddressOf) && 00146 "Offset is illegal on addressof constraints"); 00147 } 00148 00149 bool operator==(const Constraint &RHS) const { 00150 return RHS.Type == Type 00151 && RHS.Dest == Dest 00152 && RHS.Src == Src 00153 && RHS.Offset == Offset; 00154 } 00155 00156 bool operator!=(const Constraint &RHS) const { 00157 return !(*this == RHS); 00158 } 00159 00160 bool operator<(const Constraint &RHS) const { 00161 if (RHS.Type != Type) 00162 return RHS.Type < Type; 00163 else if (RHS.Dest != Dest) 00164 return RHS.Dest < Dest; 00165 else if (RHS.Src != Src) 00166 return RHS.Src < Src; 00167 return RHS.Offset < Offset; 00168 } 00169 }; 00170 00171 // Information DenseSet requires implemented in order to be able to do 00172 // it's thing 00173 struct PairKeyInfo { 00174 static inline std::pair<unsigned, unsigned> getEmptyKey() { 00175 return std::make_pair(~0U, ~0U); 00176 } 00177 static inline std::pair<unsigned, unsigned> getTombstoneKey() { 00178 return std::make_pair(~0U - 1, ~0U - 1); 00179 } 00180 static unsigned getHashValue(const std::pair<unsigned, unsigned> &P) { 00181 return P.first ^ P.second; 00182 } 00183 static unsigned isEqual(const std::pair<unsigned, unsigned> &LHS, 00184 const std::pair<unsigned, unsigned> &RHS) { 00185 return LHS == RHS; 00186 } 00187 }; 00188 00189 struct ConstraintKeyInfo { 00190 static inline Constraint getEmptyKey() { 00191 return Constraint(Constraint::Copy, ~0U, ~0U, ~0U); 00192 } 00193 static inline Constraint getTombstoneKey() { 00194 return Constraint(Constraint::Copy, ~0U - 1, ~0U - 1, ~0U - 1); 00195 } 00196 static unsigned getHashValue(const Constraint &C) { 00197 return C.Src ^ C.Dest ^ C.Type ^ C.Offset; 00198 } 00199 static bool isEqual(const Constraint &LHS, 00200 const Constraint &RHS) { 00201 return LHS.Type == RHS.Type && LHS.Dest == RHS.Dest 00202 && LHS.Src == RHS.Src && LHS.Offset == RHS.Offset; 00203 } 00204 }; 00205 00206 // Node class - This class is used to represent a node in the constraint 00207 // graph. Due to various optimizations, it is not always the case that 00208 // there is a mapping from a Node to a Value. In particular, we add 00209 // artificial Node's that represent the set of pointed-to variables shared 00210 // for each location equivalent Node. 00211 struct Node { 00212 private: 00213 static unsigned Counter; 00214 00215 public: 00216 Value *Val; 00217 SparseBitVector<> *Edges; 00218 SparseBitVector<> *PointsTo; 00219 SparseBitVector<> *OldPointsTo; 00220 std::list<Constraint> Constraints; 00221 00222 // Pointer and location equivalence labels 00223 unsigned PointerEquivLabel; 00224 unsigned LocationEquivLabel; 00225 // Predecessor edges, both real and implicit 00226 SparseBitVector<> *PredEdges; 00227 SparseBitVector<> *ImplicitPredEdges; 00228 // Set of nodes that point to us, only use for location equivalence. 00229 SparseBitVector<> *PointedToBy; 00230 // Number of incoming edges, used during variable substitution to early 00231 // free the points-to sets 00232 unsigned NumInEdges; 00233 // True if our points-to set is in the Set2PEClass map 00234 bool StoredInHash; 00235 // True if our node has no indirect constraints (complex or otherwise) 00236 bool Direct; 00237 // True if the node is address taken, *or* it is part of a group of nodes 00238 // that must be kept together. This is set to true for functions and 00239 // their arg nodes, which must be kept at the same position relative to 00240 // their base function node. 00241 bool AddressTaken; 00242 00243 // Nodes in cycles (or in equivalence classes) are united together using a 00244 // standard union-find representation with path compression. NodeRep 00245 // gives the index into GraphNodes for the representative Node. 00246 unsigned NodeRep; 00247 00248 // Modification timestamp. Assigned from Counter. 00249 // Used for work list prioritization. 00250 unsigned Timestamp; 00251 00252 explicit Node(bool direct = true) : 00253 Val(0), Edges(0), PointsTo(0), OldPointsTo(0), 00254 PointerEquivLabel(0), LocationEquivLabel(0), PredEdges(0), 00255 ImplicitPredEdges(0), PointedToBy(0), NumInEdges(0), 00256 StoredInHash(false), Direct(direct), AddressTaken(false), 00257 NodeRep(SelfRep), Timestamp(0) { } 00258 00259 Node *setValue(Value *V) { 00260 assert(Val == 0 && "Value already set for this node!"); 00261 Val = V; 00262 return this; 00263 } 00264 00265 /// getValue - Return the LLVM value corresponding to this node. 00266 /// 00267 Value *getValue() const { return Val; } 00268 00269 /// addPointerTo - Add a pointer to the list of pointees of this node, 00270 /// returning true if this caused a new pointer to be added, or false if 00271 /// we already knew about the points-to relation. 00272 bool addPointerTo(unsigned Node) { 00273 return PointsTo->test_and_set(Node); 00274 } 00275 00276 /// intersects - Return true if the points-to set of this node intersects 00277 /// with the points-to set of the specified node. 00278 bool intersects(Node *N) const; 00279 00280 /// intersectsIgnoring - Return true if the points-to set of this node 00281 /// intersects with the points-to set of the specified node on any nodes 00282 /// except for the specified node to ignore. 00283 bool intersectsIgnoring(Node *N, unsigned) const; 00284 00285 // Timestamp a node (used for work list prioritization) 00286 void Stamp() { 00287 Timestamp = Counter++; 00288 } 00289 00290 bool isRep() const { 00291 return( (int) NodeRep < 0 ); 00292 } 00293 }; 00294 00295 struct WorkListElement { 00296 Node* node; 00297 unsigned Timestamp; 00298 WorkListElement(Node* n, unsigned t) : node(n), Timestamp(t) {} 00299 00300 // Note that we reverse the sense of the comparison because we 00301 // actually want to give low timestamps the priority over high, 00302 // whereas priority is typically interpreted as a greater value is 00303 // given high priority. 00304 bool operator<(const WorkListElement& that) const { 00305 return( this->Timestamp > that.Timestamp ); 00306 } 00307 }; 00308 00309 // Priority-queue based work list specialized for Nodes. 00310 class WorkList { 00311 std::priority_queue<WorkListElement> Q; 00312 00313 public: 00314 void insert(Node* n) { 00315 Q.push( WorkListElement(n, n->Timestamp) ); 00316 } 00317 00318 // We automatically discard non-representative nodes and nodes 00319 // that were in the work list twice (we keep a copy of the 00320 // timestamp in the work list so we can detect this situation by 00321 // comparing against the node's current timestamp). 00322 Node* pop() { 00323 while( !Q.empty() ) { 00324 WorkListElement x = Q.top(); Q.pop(); 00325 Node* INode = x.node; 00326 00327 if( INode->isRep() && 00328 INode->Timestamp == x.Timestamp ) { 00329 return(x.node); 00330 } 00331 } 00332 return(0); 00333 } 00334 00335 bool empty() { 00336 return Q.empty(); 00337 } 00338 }; 00339 00340 /// GraphNodes - This vector is populated as part of the object 00341 /// identification stage of the analysis, which populates this vector with a 00342 /// node for each memory object and fills in the ValueNodes map. 00343 std::vector<Node> GraphNodes; 00344 00345 /// ValueNodes - This map indicates the Node that a particular Value* is 00346 /// represented by. This contains entries for all pointers. 00347 DenseMap<Value*, unsigned> ValueNodes; 00348 00349 /// ObjectNodes - This map contains entries for each memory object in the 00350 /// program: globals, alloca's and mallocs. 00351 DenseMap<Value*, unsigned> ObjectNodes; 00352 00353 /// ReturnNodes - This map contains an entry for each function in the 00354 /// program that returns a value. 00355 DenseMap<Function*, unsigned> ReturnNodes; 00356 00357 /// VarargNodes - This map contains the entry used to represent all pointers 00358 /// passed through the varargs portion of a function call for a particular 00359 /// function. An entry is not present in this map for functions that do not 00360 /// take variable arguments. 00361 DenseMap<Function*, unsigned> VarargNodes; 00362 00363 00364 /// Constraints - This vector contains a list of all of the constraints 00365 /// identified by the program. 00366 std::vector<Constraint> Constraints; 00367 00368 // Map from graph node to maximum K value that is allowed (for functions, 00369 // this is equivalent to the number of arguments + CallFirstArgPos) 00370 std::map<unsigned, unsigned> MaxK; 00371 00372 /// This enum defines the GraphNodes indices that correspond to important 00373 /// fixed sets. 00374 enum { 00375 UniversalSet = 0, 00376 NullPtr = 1, 00377 NullObject = 2, 00378 NumberSpecialNodes 00379 }; 00380 // Stack for Tarjan's 00381 std::stack<unsigned> SCCStack; 00382 // Map from Graph Node to DFS number 00383 std::vector<unsigned> Node2DFS; 00384 // Map from Graph Node to Deleted from graph. 00385 std::vector<bool> Node2Deleted; 00386 // Same as Node Maps, but implemented as std::map because it is faster to 00387 // clear 00388 std::map<unsigned, unsigned> Tarjan2DFS; 00389 std::map<unsigned, bool> Tarjan2Deleted; 00390 // Current DFS number 00391 unsigned DFSNumber; 00392 00393 // Work lists. 00394 WorkList w1, w2; 00395 WorkList *CurrWL, *NextWL; // "current" and "next" work lists 00396 00397 // Offline variable substitution related things 00398 00399 // Temporary rep storage, used because we can't collapse SCC's in the 00400 // predecessor graph by uniting the variables permanently, we can only do so 00401 // for the successor graph. 00402 std::vector<unsigned> VSSCCRep; 00403 // Mapping from node to whether we have visited it during SCC finding yet. 00404 std::vector<bool> Node2Visited; 00405 // During variable substitution, we create unknowns to represent the unknown 00406 // value that is a dereference of a variable. These nodes are known as 00407 // "ref" nodes (since they represent the value of dereferences). 00408 unsigned FirstRefNode; 00409 // During HVN, we create represent address taken nodes as if they were 00410 // unknown (since HVN, unlike HU, does not evaluate unions). 00411 unsigned FirstAdrNode; 00412 // Current pointer equivalence class number 00413 unsigned PEClass; 00414 // Mapping from points-to sets to equivalence classes 00415 typedef DenseMap<SparseBitVector<> *, unsigned, BitmapKeyInfo> BitVectorMap; 00416 BitVectorMap Set2PEClass; 00417 // Mapping from pointer equivalences to the representative node. -1 if we 00418 // have no representative node for this pointer equivalence class yet. 00419 std::vector<int> PEClass2Node; 00420 // Mapping from pointer equivalences to representative node. This includes 00421 // pointer equivalent but not location equivalent variables. -1 if we have 00422 // no representative node for this pointer equivalence class yet. 00423 std::vector<int> PENLEClass2Node; 00424 // Union/Find for HCD 00425 std::vector<unsigned> HCDSCCRep; 00426 // HCD's offline-detected cycles; "Statically DeTected" 00427 // -1 if not part of such a cycle, otherwise a representative node. 00428 std::vector<int> SDT; 00429 // Whether to use SDT (UniteNodes can use it during solving, but not before) 00430 bool SDTActive; 00431 00432 public: 00433 static char ID; 00434 Andersens() : ModulePass((intptr_t)&ID) {} 00435 00436 bool runOnModule(Module &M) { 00437 InitializeAliasAnalysis(this); 00438 IdentifyObjects(M); 00439 CollectConstraints(M); 00440 #undef DEBUG_TYPE 00441 #define DEBUG_TYPE "anders-aa-constraints" 00442 DEBUG(PrintConstraints()); 00443 #undef DEBUG_TYPE 00444 #define DEBUG_TYPE "anders-aa" 00445 SolveConstraints(); 00446 DEBUG(PrintPointsToGraph()); 00447 00448 // Free the constraints list, as we don't need it to respond to alias 00449 // requests. 00450 std::vector<Constraint>().swap(Constraints); 00451 //These are needed for Print() (-analyze in opt) 00452 //ObjectNodes.clear(); 00453 //ReturnNodes.clear(); 00454 //VarargNodes.clear(); 00455 return false; 00456 } 00457 00458 void releaseMemory() { 00459 // FIXME: Until we have transitively required passes working correctly, 00460 // this cannot be enabled! Otherwise, using -count-aa with the pass 00461 // causes memory to be freed too early. :( 00462 #if 0 00463 // The memory objects and ValueNodes data structures at the only ones that 00464 // are still live after construction. 00465 std::vector<Node>().swap(GraphNodes); 00466 ValueNodes.clear(); 00467 #endif 00468 } 00469 00470 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 00471 AliasAnalysis::getAnalysisUsage(AU); 00472 AU.setPreservesAll(); // Does not transform code 00473 } 00474 00475 //------------------------------------------------ 00476 // Implement the AliasAnalysis API 00477 // 00478 AliasResult alias(const Value *V1, unsigned V1Size, 00479 const Value *V2, unsigned V2Size); 00480 virtual ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size); 00481 virtual ModRefResult getModRefInfo(CallSite CS1, CallSite CS2); 00482 void getMustAliases(Value *P, std::vector<Value*> &RetVals); 00483 bool pointsToConstantMemory(const Value *P); 00484 00485 virtual void deleteValue(Value *V) { 00486 ValueNodes.erase(V); 00487 getAnalysis<AliasAnalysis>().deleteValue(V); 00488 } 00489 00490 virtual void copyValue(Value *From, Value *To) { 00491 ValueNodes[To] = ValueNodes[From]; 00492 getAnalysis<AliasAnalysis>().copyValue(From, To); 00493 } 00494 00495 private: 00496 /// getNode - Return the node corresponding to the specified pointer scalar. 00497 /// 00498 unsigned getNode(Value *V) { 00499 if (Constant *C = dyn_cast<Constant>(V)) 00500 if (!isa<GlobalValue>(C)) 00501 return getNodeForConstantPointer(C); 00502 00503 DenseMap<Value*, unsigned>::iterator I = ValueNodes.find(V); 00504 if (I == ValueNodes.end()) { 00505 #ifndef NDEBUG 00506 V->dump(); 00507 #endif 00508 assert(0 && "Value does not have a node in the points-to graph!"); 00509 } 00510 return I->second; 00511 } 00512 00513 /// getObject - Return the node corresponding to the memory object for the 00514 /// specified global or allocation instruction. 00515 unsigned getObject(Value *V) const { 00516 DenseMap<Value*, unsigned>::iterator I = ObjectNodes.find(V); 00517 assert(I != ObjectNodes.end() && 00518 "Value does not have an object in the points-to graph!"); 00519 return I->second; 00520 } 00521 00522 /// getReturnNode - Return the node representing the return value for the 00523 /// specified function. 00524 unsigned getReturnNode(Function *F) const { 00525 DenseMap<Function*, unsigned>::iterator I = ReturnNodes.find(F); 00526 assert(I != ReturnNodes.end() && "Function does not return a value!"); 00527 return I->second; 00528 } 00529 00530 /// getVarargNode - Return the node representing the variable arguments 00531 /// formal for the specified function. 00532 unsigned getVarargNode(Function *F) const { 00533 DenseMap<Function*, unsigned>::iterator I = VarargNodes.find(F); 00534 assert(I != VarargNodes.end() && "Function does not take var args!"); 00535 return I->second; 00536 } 00537 00538 /// getNodeValue - Get the node for the specified LLVM value and set the 00539 /// value for it to be the specified value. 00540 unsigned getNodeValue(Value &V) { 00541 unsigned Index = getNode(&V); 00542 GraphNodes[Index].setValue(&V); 00543 return Index; 00544 } 00545 00546 unsigned UniteNodes(unsigned First, unsigned Second, 00547 bool UnionByRank = true); 00548 unsigned FindNode(unsigned Node); 00549 unsigned FindNode(unsigned Node) const; 00550 00551 void IdentifyObjects(Module &M); 00552 void CollectConstraints(Module &M); 00553 bool AnalyzeUsesOfFunction(Value *); 00554 void CreateConstraintGraph(); 00555 void OptimizeConstraints(); 00556 unsigned FindEquivalentNode(unsigned, unsigned); 00557 void ClumpAddressTaken(); 00558 void RewriteConstraints(); 00559 void HU(); 00560 void HVN(); 00561 void HCD(); 00562 void Search(unsigned Node); 00563 void UnitePointerEquivalences(); 00564 void SolveConstraints(); 00565 bool QueryNode(unsigned Node); 00566 void Condense(unsigned Node); 00567 void HUValNum(unsigned Node); 00568 void HVNValNum(unsigned Node); 00569 unsigned getNodeForConstantPointer(Constant *C); 00570 unsigned getNodeForConstantPointerTarget(Constant *C); 00571 void AddGlobalInitializerConstraints(unsigned, Constant *C); 00572 00573 void AddConstraintsForNonInternalLinkage(Function *F); 00574 void AddConstraintsForCall(CallSite CS, Function *F); 00575 bool AddConstraintsForExternalCall(CallSite CS, Function *F); 00576 00577 00578 void PrintNode(const Node *N) const; 00579 void PrintConstraints() const ; 00580 void PrintConstraint(const Constraint &) const; 00581 void PrintLabels() const; 00582 void PrintPointsToGraph() const; 00583 00584 //===------------------------------------------------------------------===// 00585 // Instruction visitation methods for adding constraints 00586 // 00587 friend class InstVisitor<Andersens>; 00588 void visitReturnInst(ReturnInst &RI); 00589 void visitInvokeInst(InvokeInst &II) { visitCallSite(CallSite(&II)); } 00590 void visitCallInst(CallInst &CI) { visitCallSite(CallSite(&CI)); } 00591 void visitCallSite(CallSite CS); 00592 void visitAllocationInst(AllocationInst &AI); 00593 void visitLoadInst(LoadInst &LI); 00594 void visitStoreInst(StoreInst &SI); 00595 void visitGetElementPtrInst(GetElementPtrInst &GEP); 00596 void visitPHINode(PHINode &PN); 00597 void visitCastInst(CastInst &CI); 00598 void visitICmpInst(ICmpInst &ICI) {} // NOOP! 00599 void visitFCmpInst(FCmpInst &ICI) {} // NOOP! 00600 void visitSelectInst(SelectInst &SI); 00601 void visitVAArg(VAArgInst &I); 00602 void visitInstruction(Instruction &I); 00603 00604 //===------------------------------------------------------------------===// 00605 // Implement Analyize interface 00606 // 00607 void print(std::ostream &O, const Module* M) const { 00608 PrintPointsToGraph(); 00609 } 00610 }; 00611 } 00612 00613 char Andersens::ID = 0; 00614 static RegisterPass<Andersens> 00615 X("anders-aa", "Andersen's Interprocedural Alias Analysis", false, true); 00616 static RegisterAnalysisGroup<AliasAnalysis> Y(X); 00617 00618 // Initialize Timestamp Counter (static). 00619 unsigned Andersens::Node::Counter = 0; 00620 00621 ModulePass *llvm::createAndersensPass() { return new Andersens(); } 00622 00623 //===----------------------------------------------------------------------===// 00624 // AliasAnalysis Interface Implementation 00625 //===----------------------------------------------------------------------===// 00626 00627 AliasAnalysis::AliasResult Andersens::alias(const Value *V1, unsigned V1Size, 00628 const Value *V2, unsigned V2Size) { 00629 Node *N1 = &GraphNodes[FindNode(getNode(const_cast<Value*>(V1)))]; 00630 Node *N2 = &GraphNodes[FindNode(getNode(const_cast<Value*>(V2)))]; 00631 00632 // Check to see if the two pointers are known to not alias. They don't alias 00633 // if their points-to sets do not intersect. 00634 if (!N1->intersectsIgnoring(N2, NullObject)) 00635 return NoAlias; 00636 00637 return AliasAnalysis::alias(V1, V1Size, V2, V2Size); 00638 } 00639 00640 AliasAnalysis::ModRefResult 00641 Andersens::getModRefInfo(CallSite CS, Value *P, unsigned Size) { 00642 // The only thing useful that we can contribute for mod/ref information is 00643 // when calling external function calls: if we know that memory never escapes 00644 // from the program, it cannot be modified by an external call. 00645 // 00646 // NOTE: This is not really safe, at least not when the entire program is not 00647 // available. The deal is that the external function could call back into the 00648 // program and modify stuff. We ignore this technical niggle for now. This 00649 // is, after all, a "research quality" implementation of Andersen's analysis. 00650 if (Function *F = CS.getCalledFunction()) 00651 if (F->isDeclaration()) { 00652 Node *N1 = &GraphNodes[FindNode(getNode(P))]; 00653 00654 if (N1->PointsTo->empty()) 00655 return NoModRef; 00656 #if FULL_UNIVERSAL 00657 if (!UniversalSet->PointsTo->test(FindNode(getNode(P)))) 00658 return NoModRef; // Universal set does not contain P 00659 #else 00660 if (!N1->PointsTo->test(UniversalSet)) 00661 return NoModRef; // P doesn't point to the universal set. 00662 #endif 00663 } 00664 00665 return AliasAnalysis::getModRefInfo(CS, P, Size); 00666 } 00667 00668 AliasAnalysis::ModRefResult 00669 Andersens::getModRefInfo(CallSite CS1, CallSite CS2) { 00670 return AliasAnalysis::getModRefInfo(CS1,CS2); 00671 } 00672 00673 /// getMustAlias - We can provide must alias information if we know that a 00674 /// pointer can only point to a specific function or the null pointer. 00675 /// Unfortunately we cannot determine must-alias information for global 00676 /// variables or any other memory memory objects because we do not track whether 00677 /// a pointer points to the beginning of an object or a field of it. 00678 void Andersens::getMustAliases(Value *P, std::vector<Value*> &RetVals) { 00679 Node *N = &GraphNodes[FindNode(getNode(P))]; 00680 if (N->PointsTo->count() == 1) { 00681 Node *Pointee = &GraphNodes[N->PointsTo->find_first()]; 00682 // If a function is the only object in the points-to set, then it must be 00683 // the destination. Note that we can't handle global variables here, 00684 // because we don't know if the pointer is actually pointing to a field of 00685 // the global or to the beginning of it. 00686 if (Value *V = Pointee->getValue()) { 00687 if (Function *F = dyn_cast<Function>(V)) 00688 RetVals.push_back(F); 00689 } else { 00690 // If the object in the points-to set is the null object, then the null 00691 // pointer is a must alias. 00692 if (Pointee == &GraphNodes[NullObject]) 00693 RetVals.push_back(Constant::getNullValue(P->getType())); 00694 } 00695 } 00696 AliasAnalysis::getMustAliases(P, RetVals); 00697 } 00698 00699 /// pointsToConstantMemory - If we can determine that this pointer only points 00700 /// to constant memory, return true. In practice, this means that if the 00701 /// pointer can only point to constant globals, functions, or the null pointer, 00702 /// return true. 00703 /// 00704 bool Andersens::pointsToConstantMemory(const Value *P) { 00705 Node *N = &GraphNodes[FindNode(getNode(const_cast<Value*>(P)))]; 00706 unsigned i; 00707 00708 for (SparseBitVector<>::iterator bi = N->PointsTo->begin(); 00709 bi != N->PointsTo->end(); 00710 ++bi) { 00711 i = *bi; 00712 Node *Pointee = &GraphNodes[i]; 00713 if (Value *V = Pointee->getValue()) { 00714 if (!isa<GlobalValue>(V) || (isa<GlobalVariable>(V) && 00715 !cast<GlobalVariable>(V)->isConstant())) 00716 return AliasAnalysis::pointsToConstantMemory(P); 00717 } else { 00718 if (i != NullObject) 00719 return AliasAnalysis::pointsToConstantMemory(P); 00720 } 00721 } 00722 00723 return true; 00724 } 00725 00726 //===----------------------------------------------------------------------===// 00727 // Object Identification Phase 00728 //===----------------------------------------------------------------------===// 00729 00730 /// IdentifyObjects - This stage scans the program, adding an entry to the 00731 /// GraphNodes list for each memory object in the program (global stack or 00732 /// heap), and populates the ValueNodes and ObjectNodes maps for these objects. 00733 /// 00734 void Andersens::IdentifyObjects(Module &M) { 00735 unsigned NumObjects = 0; 00736 00737 // Object #0 is always the universal set: the object that we don't know 00738 // anything about. 00739 assert(NumObjects == UniversalSet && "Something changed!"); 00740 ++NumObjects; 00741 00742 // Object #1 always represents the null pointer. 00743 assert(NumObjects == NullPtr && "Something changed!"); 00744 ++NumObjects; 00745 00746 // Object #2 always represents the null object (the object pointed to by null) 00747 assert(NumObjects == NullObject && "Something changed!"); 00748 ++NumObjects; 00749 00750 // Add all the globals first. 00751 for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 00752 I != E; ++I) { 00753 ObjectNodes[I] = NumObjects++; 00754 ValueNodes[I] = NumObjects++; 00755 } 00756 00757 // Add nodes for all of the functions and the instructions inside of them. 00758 for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { 00759 // The function itself is a memory object. 00760 unsigned First = NumObjects; 00761 ValueNodes[F] = NumObjects++; 00762 if (isa<PointerType>(F->getFunctionType()->getReturnType())) 00763 ReturnNodes[F] = NumObjects++; 00764 if (F->getFunctionType()->isVarArg()) 00765 VarargNodes[F] = NumObjects++; 00766 00767 00768 // Add nodes for all of the incoming pointer arguments. 00769 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); 00770 I != E; ++I) 00771 { 00772 if (isa<PointerType>(I->getType())) 00773 ValueNodes[I] = NumObjects++; 00774 } 00775 MaxK[First] = NumObjects - First; 00776 00777 // Scan the function body, creating a memory object for each heap/stack 00778 // allocation in the body of the function and a node to represent all 00779 // pointer values defined by instructions and used as operands. 00780 for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { 00781 // If this is an heap or stack allocation, create a node for the memory 00782 // object. 00783 if (isa<PointerType>(II->getType())) { 00784 ValueNodes[&*II] = NumObjects++; 00785 if (AllocationInst *AI = dyn_cast<AllocationInst>(&*II)) 00786 ObjectNodes[AI] = NumObjects++; 00787 } 00788 00789 // Calls to inline asm need to be added as well because the callee isn't 00790 // referenced anywhere else. 00791 if (CallInst *CI = dyn_cast<CallInst>(&*II)) { 00792 Value *Callee = CI->getCalledValue(); 00793 if (isa<InlineAsm>(Callee)) 00794 ValueNodes[Callee] = NumObjects++; 00795 } 00796 } 00797 } 00798 00799 // Now that we know how many objects to create, make them all now! 00800 GraphNodes.resize(NumObjects); 00801 NumNodes += NumObjects; 00802 } 00803 00804 //===----------------------------------------------------------------------===// 00805 // Constraint Identification Phase 00806 //===----------------------------------------------------------------------===// 00807 00808 /// getNodeForConstantPointer - Return the node corresponding to the constant 00809 /// pointer itself. 00810 unsigned Andersens::getNodeForConstantPointer(Constant *C) { 00811 assert(isa<PointerType>(C->getType()) && "Not a constant pointer!"); 00812 00813 if (isa<ConstantPointerNull>(C) || isa<UndefValue>(C)) 00814 return NullPtr; 00815 else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) 00816 return getNode(GV); 00817 else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { 00818 switch (CE->getOpcode()) { 00819 case Instruction::GetElementPtr: 00820 return getNodeForConstantPointer(CE->getOperand(0)); 00821 case Instruction::IntToPtr: 00822 return UniversalSet; 00823 case Instruction::BitCast: 00824 return getNodeForConstantPointer(CE->getOperand(0)); 00825 default: 00826 cerr << "Constant Expr not yet handled: " << *CE << "\n"; 00827 assert(0); 00828 } 00829 } else { 00830 assert(0 && "Unknown constant pointer!"); 00831 } 00832 return 0; 00833 } 00834 00835 /// getNodeForConstantPointerTarget - Return the node POINTED TO by the 00836 /// specified constant pointer. 00837 unsigned Andersens::getNodeForConstantPointerTarget(Constant *C) { 00838 assert(isa<PointerType>(C->getType()) && "Not a constant pointer!"); 00839 00840 if (isa<ConstantPointerNull>(C)) 00841 return NullObject; 00842 else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) 00843 return getObject(GV); 00844 else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { 00845 switch (CE->getOpcode()) { 00846 case Instruction::GetElementPtr: 00847 return getNodeForConstantPointerTarget(CE->getOperand(0)); 00848 case Instruction::IntToPtr: 00849 return UniversalSet; 00850 case Instruction::BitCast: 00851 return getNodeForConstantPointerTarget(CE->getOperand(0)); 00852 default: 00853 cerr << "Constant Expr not yet handled: " << *CE << "\n"; 00854 assert(0); 00855 } 00856 } else { 00857 assert(0 && "Unknown constant pointer!"); 00858 } 00859 return 0; 00860 } 00861 00862 /// AddGlobalInitializerConstraints - Add inclusion constraints for the memory 00863 /// object N, which contains values indicated by C. 00864 void Andersens::AddGlobalInitializerConstraints(unsigned NodeIndex, 00865 Constant *C) { 00866 if (C->getType()->isSingleValueType()) { 00867 if (isa<PointerType>(C->getType())) 00868 Constraints.push_back(Constraint(Constraint::Copy, NodeIndex, 00869 getNodeForConstantPointer(C))); 00870 } else if (C->isNullValue()) { 00871 Constraints.push_back(Constraint(Constraint::Copy, NodeIndex, 00872 NullObject)); 00873 return; 00874 } else if (!isa<UndefValue>(C)) { 00875 // If this is an array or struct, include constraints for each element. 00876 assert(isa<ConstantArray>(C) || isa<ConstantStruct>(C)); 00877 for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) 00878 AddGlobalInitializerConstraints(NodeIndex, 00879 cast<Constant>(C->getOperand(i))); 00880 } 00881 } 00882 00883 /// AddConstraintsForNonInternalLinkage - If this function does not have 00884 /// internal linkage, realize that we can't trust anything passed into or 00885 /// returned by this function. 00886 void Andersens::AddConstraintsForNonInternalLinkage(Function *F) { 00887 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) 00888 if (isa<PointerType>(I->getType())) 00889 // If this is an argument of an externally accessible function, the 00890 // incoming pointer might point to anything. 00891 Constraints.push_back(Constraint(Constraint::Copy, getNode(I), 00892 UniversalSet)); 00893 } 00894 00895 /// AddConstraintsForCall - If this is a call to a "known" function, add the 00896 /// constraints and return true. If this is a call to an unknown function, 00897 /// return false. 00898 bool Andersens::AddConstraintsForExternalCall(CallSite CS, Function *F) { 00899 assert(F->isDeclaration() && "Not an external function!"); 00900 00901 // These functions don't induce any points-to constraints. 00902 if (F->getName() == "atoi" || F->getName() == "atof" || 00903 F->getName() == "atol" || F->getName() == "atoll" || 00904 F->getName() == "remove" || F->getName() == "unlink" || 00905 F->getName() == "rename" || F->getName() == "memcmp" || 00906 F->getName() == "llvm.memset.i32" || 00907 F->getName() == "llvm.memset.i64" || 00908 F->getName() == "strcmp" || F->getName() == "strncmp" || 00909 F->getName() == "execl" || F->getName() == "execlp" || 00910 F->getName() == "execle" || F->getName() == "execv" || 00911 F->getName() == "execvp" || F->getName() == "chmod" || 00912 F->getName() == "puts" || F->getName() == "write" || 00913 F->getName() == "open" || F->getName() == "create" || 00914 F->getName() == "truncate" || F->getName() == "chdir" || 00915 F->getName() == "mkdir" || F->getName() == "rmdir" || 00916 F->getName() == "read" || F->getName() == "pipe" || 00917 F->getName() == "wait" || F->getName() == "time" || 00918 F->getName() == "stat" || F->getName() == "fstat" || 00919 F->getName() == "lstat" || F->getName() == "strtod" || 00920 F->getName() == "strtof" || F->getName() == "strtold" || 00921 F->getName() == "fopen" || F->getName() == "fdopen" || 00922 F->getName() == "freopen" || 00923 F->getName() == "fflush" || F->getName() == "feof" || 00924 F->getName() == "fileno" || F->getName() == "clearerr" || 00925 F->getName() == "rewind" || F->getName() == "ftell" || 00926 F->getName() == "ferror" || F->getName() == "fgetc" || 00927 F->getName() == "fgetc" || F->getName() == "_IO_getc" || 00928 F->getName() == "fwrite" || F->getName() == "fread" || 00929 F->getName() == "fgets" || F->getName() == "ungetc" || 00930 F->getName() == "fputc" || 00931 F->getName() == "fputs" || F->getName() == "putc" || 00932 F->getName() == "ftell" || F->getName() == "rewind" || 00933 F->getName() == "_IO_putc" || F->getName() == "fseek" || 00934 F->getName() == "fgetpos" || F->getName() == "fsetpos" || 00935 F->getName() == "printf" || F->getName() == "fprintf" || 00936 F->getName() == "sprintf" || F->getName() == "vprintf" || 00937 F->getName() == "vfprintf" || F->getName() == "vsprintf" || 00938 F->getName() == "scanf" || F->getName() == "fscanf" || 00939 F->getName() == "sscanf" || F->getName() == "__assert_fail" || 00940 F->getName() == "modf") 00941 return true; 00942 00943 00944 // These functions do induce points-to edges. 00945 if (F->getName() == "llvm.memcpy.i32" || F->getName() == "llvm.memcpy.i64" || 00946 F->getName() == "llvm.memmove.i32" ||F->getName() == "llvm.memmove.i64" || 00947 F->getName() == "memmove") { 00948 00949 // *Dest = *Src, which requires an artificial graph node to represent the 00950 // constraint. It is broken up into *Dest = temp, temp = *Src 00951 unsigned FirstArg = getNode(CS.getArgument(0)); 00952 unsigned SecondArg = getNode(CS.getArgument(1)); 00953 unsigned TempArg = GraphNodes.size(); 00954 GraphNodes.push_back(Node()); 00955 Constraints.push_back(Constraint(Constraint::Store, 00956 FirstArg, TempArg)); 00957 Constraints.push_back(Constraint(Constraint::Load, 00958 TempArg, SecondArg)); 00959 // In addition, Dest = Src 00960 Constraints.push_back(Constraint(Constraint::Copy, 00961 FirstArg, SecondArg)); 00962 return true; 00963 } 00964 00965 // Result = Arg0 00966 if (F->getName() == "realloc" || F->getName() == "strchr" || 00967 F->getName() == "strrchr" || F->getName() == "strstr" || 00968 F->getName() == "strtok") { 00969 Constraints.push_back(Constraint(Constraint::Copy, 00970 getNode(CS.getInstruction()), 00971 getNode(CS.getArgument(0)))); 00972 return true; 00973 } 00974 00975 return false; 00976 } 00977 00978 00979 00980 /// AnalyzeUsesOfFunction - Look at all of the users of the specified function. 00981 /// If this is used by anything complex (i.e., the address escapes), return 00982 /// true. 00983 bool Andersens::AnalyzeUsesOfFunction(Value *V) { 00984 00985 if (!isa<PointerType>(V->getType())) return true; 00986 00987 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) 00988 if (dyn_cast<LoadInst>(*UI)) { 00989 return false; 00990 } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { 00991 if (V == SI->getOperand(1)) { 00992 return false; 00993 } else if (SI->getOperand(1)) { 00994 return true; // Storing the pointer 00995 } 00996 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) { 00997 if (AnalyzeUsesOfFunction(GEP)) return true; 00998 } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) { 00999 // Make sure that this is just the function being called, not that it is 01000 // passing into the function. 01001 for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i) 01002 if (CI->getOperand(i) == V) return true; 01003 } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) { 01004 // Make sure that this is just the function being called, not that it is 01005 // passing into the function. 01006 for (unsigned i = 3, e = II->getNumOperands(); i != e; ++i) 01007 if (II->getOperand(i) == V) return true; 01008 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) { 01009 if (CE->getOpcode() == Instruction::GetElementPtr || 01010 CE->getOpcode() == Instruction::BitCast) { 01011 if (AnalyzeUsesOfFunction(CE)) 01012 return true; 01013 } else { 01014 return true; 01015 } 01016 } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) { 01017 if (!isa<ConstantPointerNull>(ICI->getOperand(1))) 01018 return true; // Allow comparison against null. 01019 } else if (dyn_cast<FreeInst>(*UI)) { 01020 return false; 01021 } else { 01022 return true; 01023 } 01024 return false; 01025 } 01026 01027 /// CollectConstraints - This stage scans the program, adding a constraint to 01028 /// the Constraints list for each instruction in the program that induces a 01029 /// constraint, and setting up the initial points-to graph. 01030 /// 01031 void Andersens::CollectConstraints(Module &M) { 01032 // First, the universal set points to itself. 01033 Constraints.push_back(Constraint(Constraint::AddressOf, UniversalSet, 01034 UniversalSet)); 01035 Constraints.push_back(Constraint(Constraint::Store, UniversalSet, 01036 UniversalSet)); 01037 01038 // Next, the null pointer points to the null object. 01039 Constraints.push_back(Constraint(Constraint::AddressOf, NullPtr, NullObject)); 01040 01041 // Next, add any constraints on global variables and their initializers. 01042 for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 01043 I != E; ++I) { 01044 // Associate the address of the global object as pointing to the memory for 01045 // the global: &G = <G memory> 01046 unsigned ObjectIndex = getObject(I); 01047 Node *Object = &GraphNodes[ObjectIndex]; 01048 Object->setValue(I); 01049 Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(*I), 01050 ObjectIndex)); 01051 01052 if (I->hasInitializer()) { 01053 AddGlobalInitializerConstraints(ObjectIndex, I->getInitializer()); 01054 } else { 01055 // If it doesn't have an initializer (i.e. it's defined in another 01056 // translation unit), it points to the universal set. 01057 Constraints.push_back(Constraint(Constraint::Copy, ObjectIndex, 01058 UniversalSet)); 01059 } 01060 } 01061 01062 for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { 01063 // Set up the return value node. 01064 if (isa<PointerType>(F->getFunctionType()->getReturnType())) 01065 GraphNodes[getReturnNode(F)].setValue(F); 01066 if (F->getFunctionType()->isVarArg()) 01067 GraphNodes[getVarargNode(F)].setValue(F); 01068 01069 // Set up incoming argument nodes. 01070 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); 01071 I != E; ++I) 01072 if (isa<PointerType>(I->getType())) 01073 getNodeValue(*I); 01074 01075 // At some point we should just add constraints for the escaping functions 01076 // at solve time, but this slows down solving. For now, we simply mark 01077 // address taken functions as escaping and treat them as external. 01078 if (!F->hasInternalLinkage() || AnalyzeUsesOfFunction(F)) 01079 AddConstraintsForNonInternalLinkage(F); 01080 01081 if (!F->isDeclaration()) { 01082 // Scan the function body, creating a memory object for each heap/stack 01083 // allocation in the body of the function and a node to represent all 01084 // pointer values defined by instructions and used as operands. 01085 visit(F); 01086 } else { 01087 // External functions that return pointers return the universal set. 01088 if (isa<PointerType>(F->getFunctionType()->getReturnType())) 01089 Constraints.push_back(Constraint(Constraint::Copy, 01090 getReturnNode(F), 01091 UniversalSet)); 01092 01093 // Any pointers that are passed into the function have the universal set 01094 // stored into them. 01095 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); 01096 I != E; ++I) 01097 if (isa<PointerType>(I->getType())) { 01098 // Pointers passed into external functions could have anything stored 01099 // through them. 01100 Constraints.push_back(Constraint(Constraint::Store, getNode(I), 01101 UniversalSet)); 01102 // Memory objects passed into external function calls can have the 01103 // universal set point to them. 01104 #if FULL_UNIVERSAL 01105 Constraints.push_back(Constraint(Constraint::Copy, 01106 UniversalSet, 01107 getNode(I))); 01108 #else 01109 Constraints.push_back(Constraint(Constraint::Copy, 01110 getNode(I), 01111 UniversalSet)); 01112 #endif 01113 } 01114 01115 // If this is an external varargs function, it can also store pointers 01116 // into any pointers passed through the varargs section. 01117 if (F->getFunctionType()->isVarArg()) 01118 Constraints.push_back(Constraint(Constraint::Store, getVarargNode(F), 01119 UniversalSet)); 01120 } 01121 } 01122 NumConstraints += Constraints.size(); 01123 } 01124 01125 01126 void Andersens::visitInstruction(Instruction &I) { 01127 #ifdef NDEBUG 01128 return; // This function is just a big assert. 01129 #endif 01130 if (isa<BinaryOperator>(I)) 01131 return; 01132 // Most instructions don't have any effect on pointer values. 01133 switch (I.getOpcode()) { 01134 case Instruction::Br: 01135 case Instruction::Switch: 01136 case Instruction::Unwind: 01137 case Instruction::Unreachable: 01138 case Instruction::Free: 01139 case Instruction::ICmp: 01140 case Instruction::FCmp: 01141 return; 01142 default: 01143 // Is this something we aren't handling yet? 01144 cerr << "Unknown instruction: " << I; 01145 abort(); 01146 } 01147 } 01148 01149 void Andersens::visitAllocationInst(AllocationInst &AI) { 01150 unsigned ObjectIndex = getObject(&AI); 01151 GraphNodes[ObjectIndex].setValue(&AI); 01152 Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(AI), 01153 ObjectIndex)); 01154 } 01155 01156 void Andersens::visitReturnInst(ReturnInst &RI) { 01157 if (RI.getNumOperands() && isa<PointerType>(RI.getOperand(0)->getType())) 01158 // return V --> <Copy/retval{F}/v> 01159 Constraints.push_back(Constraint(Constraint::Copy, 01160 getReturnNode(RI.getParent()->getParent()), 01161 getNode(RI.getOperand(0)))); 01162 } 01163 01164 void Andersens::visitLoadInst(LoadInst &LI) { 01165 if (isa<PointerType>(LI.getType())) 01166 // P1 = load P2 --> <Load/P1/P2> 01167 Constraints.push_back(Constraint(Constraint::Load, getNodeValue(LI), 01168 getNode(LI.getOperand(0)))); 01169 } 01170 01171 void Andersens::visitStoreInst(StoreInst &SI) { 01172 if (isa<PointerType>(SI.getOperand(0)->getType())) 01173 // store P1, P2 --> <Store/P2/P1> 01174 Constraints.push_back(Constraint(Constraint::Store, 01175 getNode(SI.getOperand(1)), 01176 getNode(SI.getOperand(0)))); 01177 } 01178 01179 void Andersens::visitGetElementPtrInst(GetElementPtrInst &GEP) { 01180 // P1 = getelementptr P2, ... --> <Copy/P1/P2> 01181 Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(GEP), 01182 getNode(GEP.getOperand(0)))); 01183 } 01184 01185 void Andersens::visitPHINode(PHINode &PN) { 01186 if (isa<PointerType>(PN.getType())) { 01187 unsigned PNN = getNodeValue(PN); 01188 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) 01189 // P1 = phi P2, P3 --> <Copy/P1/P2>, <Copy/P1/P3>, ... 01190 Constraints.push_back(Constraint(Constraint::Copy, PNN, 01191 getNode(PN.getIncomingValue(i)))); 01192 } 01193 } 01194 01195 void Andersens::visitCastInst(CastInst &CI) { 01196 Value *Op = CI.getOperand(0); 01197 if (isa<PointerType>(CI.getType())) { 01198 if (isa<PointerType>(Op->getType())) { 01199 // P1 = cast P2 --> <Copy/P1/P2> 01200 Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI), 01201 getNode(CI.getOperand(0)))); 01202 } else { 01203 // P1 = cast int --> <Copy/P1/Univ> 01204 #if 0 01205 Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI), 01206 UniversalSet)); 01207 #else 01208 getNodeValue(CI); 01209 #endif 01210 } 01211 } else if (isa<PointerType>(Op->getType())) { 01212 // int = cast P1 --> <Copy/Univ/P1> 01213 #if 0 01214 Constraints.push_back(Constraint(Constraint::Copy, 01215 UniversalSet, 01216 getNode(CI.getOperand(0)))); 01217 #else 01218 getNode(CI.getOperand(0)); 01219 #endif 01220 } 01221 } 01222 01223 void Andersens::visitSelectInst(SelectInst &SI) { 01224 if (isa<PointerType>(SI.getType())) { 01225 unsigned SIN = getNodeValue(SI); 01226 // P1 = select C, P2, P3 ---> <Copy/P1/P2>, <Copy/P1/P3> 01227 Constraints.push_back(Constraint(Constraint::Copy, SIN, 01228 getNode(SI.getOperand(1)))); 01229 Constraints.push_back(Constraint(Constraint::Copy, SIN, 01230 getNode(SI.getOperand(2)))); 01231 } 01232 } 01233 01234 void Andersens::visitVAArg(VAArgInst &I) { 01235 assert(0 && "vaarg not handled yet!"); 01236 } 01237 01238 /// AddConstraintsForCall - Add constraints for a call with actual arguments 01239 /// specified by CS to the function specified by F. Note that the types of 01240 /// arguments might not match up in the case where this is an indirect call and 01241 /// the function pointer has been casted. If this is the case, do something 01242 /// reasonable. 01243 void Andersens::AddConstraintsForCall(CallSite CS, Function *F) { 01244 Value *CallValue = CS.getCalledValue(); 01245 bool IsDeref = F == NULL; 01246 01247 // If this is a call to an external function, try to handle it directly to get 01248 // some taste of context sensitivity. 01249 if (F && F->isDeclaration() && AddConstraintsForExternalCall(CS, F)) 01250 return; 01251 01252 if (isa<PointerType>(CS.getType())) { 01253 unsigned CSN = getNode(CS.getInstruction()); 01254 if (!F || isa<PointerType>(F->getFunctionType()->getReturnType())) { 01255 if (IsDeref) 01256 Constraints.push_back(Constraint(Constraint::Load, CSN, 01257 getNode(CallValue), CallReturnPos)); 01258 else 01259 Constraints.push_back(Constraint(Constraint::Copy, CSN, 01260 getNode(CallValue) + CallReturnPos)); 01261 } else { 01262 // If the function returns a non-pointer value, handle this just like we 01263 // treat a nonpointer cast to pointer. 01264 Constraints.push_back(Constraint(Constraint::Copy, CSN, 01265 UniversalSet)); 01266 } 01267 } else if (F && isa<PointerType>(F->getFunctionType()->getReturnType())) { 01268 #if FULL_UNIVERSAL 01269 Constraints.push_back(Constraint(Constraint::Copy, 01270 UniversalSet, 01271 getNode(CallValue) + CallReturnPos)); 01272 #else 01273 Constraints.push_back(Constraint(Constraint::Copy, 01274 getNode(CallValue) + CallReturnPos, 01275 UniversalSet)); 01276 #endif 01277 01278 01279 } 01280 01281 CallSite::arg_iterator ArgI = CS.arg_begin(), ArgE = CS.arg_end(); 01282 bool external = !F || F->isDeclaration(); 01283 if (F) { 01284 // Direct Call 01285 Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end(); 01286 for (; AI != AE && ArgI != ArgE; ++AI, ++ArgI) 01287 { 01288 #if !FULL_UNIVERSAL 01289 if (external && isa<PointerType>((*ArgI)->getType())) 01290 { 01291 // Add constraint that ArgI can now point to anything due to 01292 // escaping, as can everything it points to. The second portion of 01293 // this should be taken care of by universal = *universal 01294 Constraints.push_back(Constraint(Constraint::Copy, 01295 getNode(*ArgI), 01296 UniversalSet)); 01297 } 01298 #endif 01299 if (isa<PointerType>(AI->getType())) { 01300 if (isa<PointerType>((*ArgI)->getType())) { 01301 // Copy the actual argument into the formal argument. 01302 Constraints.push_back(Constraint(Constraint::Copy, getNode(AI), 01303 getNode(*ArgI))); 01304 } else { 01305 Constraints.push_back(Constraint(Constraint::Copy, getNode(AI), 01306 UniversalSet)); 01307 } 01308 } else if (isa<PointerType>((*ArgI)->getType())) { 01309 #if FULL_UNIVERSAL 01310 Constraints.push_back(Constraint(Constraint::Copy, 01311 UniversalSet, 01312 getNode(*ArgI))); 01313 #else 01314 Constraints.push_back(Constraint(Constraint::Copy, 01315 getNode(*ArgI), 01316 UniversalSet)); 01317 #endif 01318 } 01319 } 01320 } else { 01321 //Indirect Call 01322 unsigned ArgPos = CallFirstArgPos; 01323 for (; ArgI != ArgE; ++ArgI) { 01324 if (isa<PointerType>((*ArgI)->getType())) { 01325 // Copy the actual argument into the formal argument. 01326 Constraints.push_back(Constraint(Constraint::Store, 01327 getNode(CallValue), 01328 getNode(*ArgI), ArgPos++)); 01329 } else { 01330 Constraints.push_back(Constraint(Constraint::Store, 01331 getNode (CallValue), 01332 UniversalSet, ArgPos++)); 01333 } 01334 } 01335 } 01336 // Copy all pointers passed through the varargs section to the varargs node. 01337 if (F && F->getFunctionType()->isVarArg()) 01338 for (; ArgI != ArgE; ++ArgI) 01339 if (isa<PointerType>((*ArgI)->getType())) 01340 Constraints.push_back(Constraint(Constraint::Copy, getVarargNode(F), 01341 getNode(*ArgI))); 01342 // If more arguments are passed in than we track, just drop them on the floor. 01343 } 01344 01345 void Andersens::visitCallSite(CallSite CS) { 01346 if (isa<PointerType>(CS.getType())) 01347 getNodeValue(*CS.getInstruction()); 01348 01349 if (Function *F = CS.getCalledFunction()) { 01350 AddConstraintsForCall(CS, F); 01351 } else { 01352 AddConstraintsForCall(CS, NULL); 01353 } 01354 } 01355 01356 //===----------------------------------------------------------------------===// 01357 // Constraint Solving Phase 01358 //===----------------------------------------------------------------------===// 01359 01360 /// intersects - Return true if the points-to set of this node intersects 01361 /// with the points-to set of the specified node. 01362 bool Andersens::Node::intersects(Node *N) const { 01363 return PointsTo->intersects(N->PointsTo); 01364 } 01365 01366 /// intersectsIgnoring - Return true if the points-to set of this node 01367 /// intersects with the points-to set of the specified node on any nodes 01368 /// except for the specified node to ignore. 01369 bool Andersens::Node::intersectsIgnoring(Node *N, unsigned Ignoring) const { 01370 // TODO: If we are only going to call this with the same value for Ignoring, 01371 // we should move the special values out of the points-to bitmap. 01372 bool WeHadIt = PointsTo->test(Ignoring); 01373 bool NHadIt = N->PointsTo->test(Ignoring); 01374 bool Result = false; 01375 if (WeHadIt) 01376 PointsTo->reset(Ignoring); 01377 if (NHadIt) 01378 N->PointsTo->reset(Ignoring); 01379 Result = PointsTo->intersects(N->PointsTo); 01380 if (WeHadIt) 01381 PointsTo->set(Ignoring); 01382 if (NHadIt) 01383 N->PointsTo->set(Ignoring); 01384 return Result; 01385 } 01386 01387 void dumpToDOUT(SparseBitVector<> *bitmap) { 01388 #ifndef NDEBUG 01389 dump(*bitmap, DOUT); 01390 #endif 01391 } 01392 01393 01394 /// Clump together address taken variables so that the points-to sets use up 01395 /// less space and can be operated on faster. 01396 01397 void Andersens::ClumpAddressTaken() { 01398 #undef DEBUG_TYPE 01399 #define DEBUG_TYPE "anders-aa-renumber" 01400 std::vector<unsigned> Translate; 01401 std::vector<Node> NewGraphNodes; 01402 01403 Translate.resize(GraphNodes.size()); 01404 unsigned NewPos = 0; 01405 01406 for (unsigned i = 0; i < Constraints.size(); ++i) { 01407 Constraint &C = Constraints[i]; 01408 if (C.Type == Constraint::AddressOf) { 01409 GraphNodes[C.Src].AddressTaken = true; 01410 } 01411 } 01412 for (unsigned i = 0; i < NumberSpecialNodes; ++i) { 01413 unsigned Pos = NewPos++; 01414 Translate[i] = Pos; 01415 NewGraphNodes.push_back(GraphNodes[i]); 01416 DOUT << "Renumbering node " << i << " to node " << Pos << "\n"; 01417 } 01418 01419 // I believe this ends up being faster than making two vectors and splicing 01420 // them. 01421 for (unsigned i = NumberSpecialNodes; i < GraphNodes.size(); ++i) { 01422 if (GraphNodes[i].AddressTaken) { 01423 unsigned Pos = NewPos++; 01424 Translate[i] = Pos; 01425 NewGraphNodes.push_back(GraphNodes[i]); 01426 DOUT << "Renumbering node " << i << " to node " << Pos << "\n"; 01427 } 01428 } 01429 01430 for (unsigned i = NumberSpecialNodes; i < GraphNodes.size(); ++i) { 01431 if (!GraphNodes[i].AddressTaken) { 01432 unsigned Pos = N