LLVM API Documentation

StringMap.cpp

Go to the documentation of this file.
00001 //===--- StringMap.cpp - String Hash table map implementation -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the StringMap class.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "llvm/ADT/StringMap.h"
00015 #include <cassert>
00016 using namespace llvm;
00017 
00018 StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
00019   ItemSize = itemSize;
00020   
00021   // If a size is specified, initialize the table with that many buckets.
00022   if (InitSize) {
00023     init(InitSize);
00024     return;
00025   }
00026   
00027   // Otherwise, initialize it with zero buckets to avoid the allocation.
00028   TheTable = 0;
00029   NumBuckets = 0;
00030   NumItems = 0;
00031   NumTombstones = 0;
00032 }
00033 
00034 void StringMapImpl::init(unsigned InitSize) {
00035   assert((InitSize & (InitSize-1)) == 0 &&
00036          "Init Size must be a power of 2 or zero!");
00037   NumBuckets = InitSize ? InitSize : 16;
00038   NumItems = 0;
00039   NumTombstones = 0;
00040   
00041   TheTable = (ItemBucket*)calloc(NumBuckets+1, sizeof(ItemBucket));
00042   
00043   // Allocate one extra bucket, set it to look filled so the iterators stop at
00044   // end.
00045   TheTable[NumBuckets].Item = (StringMapEntryBase*)2;
00046 }
00047 
00048 
00049 /// HashString - Compute a hash code for the specified string.
00050 ///
00051 static unsigned HashString(const char *Start, const char *End) {
00052   // Bernstein hash function.
00053   unsigned int Result = 0;
00054   // TODO: investigate whether a modified bernstein hash function performs
00055   // better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
00056   //   X*33+c -> X*33^c
00057   while (Start != End)
00058     Result = Result * 33 + *Start++;
00059   Result = Result + (Result >> 5);
00060   return Result;
00061 }
00062 
00063 /// LookupBucketFor - Look up the bucket that the specified string should end
00064 /// up in.  If it already exists as a key in the map, the Item pointer for the
00065 /// specified bucket will be non-null.  Otherwise, it will be null.  In either
00066 /// case, the FullHashValue field of the bucket will be set to the hash value
00067 /// of the string.
00068 unsigned StringMapImpl::LookupBucketFor(const char *NameStart,
00069                                         const char *NameEnd) {
00070   unsigned HTSize = NumBuckets;
00071   if (HTSize == 0) {  // Hash table unallocated so far?
00072     init(16);
00073     HTSize = NumBuckets;
00074   }
00075   unsigned FullHashValue = HashString(NameStart, NameEnd);
00076   unsigned BucketNo = FullHashValue & (HTSize-1);
00077   
00078   unsigned ProbeAmt = 1;
00079   int FirstTombstone = -1;
00080   while (1) {
00081     ItemBucket &Bucket = TheTable[BucketNo];
00082     StringMapEntryBase *BucketItem = Bucket.Item;
00083     // If we found an empty bucket, this key isn't in the table yet, return it.
00084     if (BucketItem == 0) {
00085       // If we found a tombstone, we want to reuse the tombstone instead of an
00086       // empty bucket.  This reduces probing.
00087       if (FirstTombstone != -1) {
00088         TheTable[FirstTombstone].FullHashValue = FullHashValue;
00089         return FirstTombstone;
00090       }
00091       
00092       Bucket.FullHashValue = FullHashValue;
00093       return BucketNo;
00094     }
00095     
00096     if (BucketItem == getTombstoneVal()) {
00097       // Skip over tombstones.  However, remember the first one we see.
00098       if (FirstTombstone == -1) FirstTombstone = BucketNo;
00099     } else if (Bucket.FullHashValue == FullHashValue) {
00100       // If the full hash value matches, check deeply for a match.  The common
00101       // case here is that we are only looking at the buckets (for item info
00102       // being non-null and for the full hash value) not at the items.  This
00103       // is important for cache locality.
00104       
00105       // Do the comparison like this because NameStart isn't necessarily
00106       // null-terminated!
00107       char *ItemStr = (char*)BucketItem+ItemSize;
00108       unsigned ItemStrLen = BucketItem->getKeyLength();
00109       if (unsigned(NameEnd-NameStart) == ItemStrLen &&
00110           memcmp(ItemStr, NameStart, ItemStrLen) == 0) {
00111         // We found a match!
00112         return BucketNo;
00113       }
00114     }
00115     
00116     // Okay, we didn't find the item.  Probe to the next bucket.
00117     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
00118     
00119     // Use quadratic probing, it has fewer clumping artifacts than linear
00120     // probing and has good cache behavior in the common case.
00121     ++ProbeAmt;
00122   }
00123 }
00124 
00125 
00126 /// FindKey - Look up the bucket that contains the specified key. If it exists
00127 /// in the map, return the bucket number of the key.  Otherwise return -1.
00128 /// This does not modify the map.
00129 int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const {
00130   unsigned HTSize = NumBuckets;
00131   if (HTSize == 0) return -1;  // Really empty table?
00132   unsigned FullHashValue = HashString(KeyStart, KeyEnd);
00133   unsigned BucketNo = FullHashValue & (HTSize-1);
00134   
00135   unsigned ProbeAmt = 1;
00136   while (1) {
00137     ItemBucket &Bucket = TheTable[BucketNo];
00138     StringMapEntryBase *BucketItem = Bucket.Item;
00139     // If we found an empty bucket, this key isn't in the table yet, return.
00140     if (BucketItem == 0)
00141       return -1;
00142     
00143     if (BucketItem == getTombstoneVal()) {
00144       // Ignore tombstones.
00145     } else if (Bucket.FullHashValue == FullHashValue) {
00146       // If the full hash value matches, check deeply for a match.  The common
00147       // case here is that we are only looking at the buckets (for item info
00148       // being non-null and for the full hash value) not at the items.  This
00149       // is important for cache locality.
00150       
00151       // Do the comparison like this because NameStart isn't necessarily
00152       // null-terminated!
00153       char *ItemStr = (char*)BucketItem+ItemSize;
00154       unsigned ItemStrLen = BucketItem->getKeyLength();
00155       if (unsigned(KeyEnd-KeyStart) == ItemStrLen &&
00156           memcmp(ItemStr, KeyStart, ItemStrLen) == 0) {
00157         // We found a match!
00158         return BucketNo;
00159       }
00160     }
00161     
00162     // Okay, we didn't find the item.  Probe to the next bucket.
00163     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
00164     
00165     // Use quadratic probing, it has fewer clumping artifacts than linear
00166     // probing and has good cache behavior in the common case.
00167     ++ProbeAmt;
00168   }
00169 }
00170 
00171 /// RemoveKey - Remove the specified StringMapEntry from the table, but do not
00172 /// delete it.  This aborts if the value isn't in the table.
00173 void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
00174   const char *VStr = (char*)V + ItemSize;
00175   StringMapEntryBase *V2 = RemoveKey(VStr, VStr+V->getKeyLength());
00176   V2 = V2;
00177   assert(V == V2 && "Didn't find key?");
00178 }
00179 
00180 /// RemoveKey - Remove the StringMapEntry for the specified key from the
00181 /// table, returning it.  If the key is not in the table, this returns null.
00182 StringMapEntryBase *StringMapImpl::RemoveKey(const char *KeyStart,
00183                                              const char *KeyEnd) {
00184   int Bucket = FindKey(KeyStart, KeyEnd);
00185   if (Bucket == -1) return 0;
00186   
00187   StringMapEntryBase *Result = TheTable[Bucket].Item;
00188   TheTable[Bucket].Item = getTombstoneVal();
00189   --NumItems;
00190   ++NumTombstones;
00191   return Result;
00192 }
00193 
00194 
00195 
00196 /// RehashTable - Grow the table, redistributing values into the buckets with
00197 /// the appropriate mod-of-hashtable-size.
00198 void StringMapImpl::RehashTable() {
00199   unsigned NewSize = NumBuckets*2;
00200   // Allocate one extra bucket which will always be non-empty.  This allows the
00201   // iterators to stop at end.
00202   ItemBucket *NewTableArray =(ItemBucket*)calloc(NewSize+1, sizeof(ItemBucket));
00203   NewTableArray[NewSize].Item = (StringMapEntryBase*)2;
00204   
00205   // Rehash all the items into their new buckets.  Luckily :) we already have
00206   // the hash values available, so we don't have to rehash any strings.
00207   for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) {
00208     if (IB->Item && IB->Item != getTombstoneVal()) {
00209       // Fast case, bucket available.
00210       unsigned FullHash = IB->FullHashValue;
00211       unsigned NewBucket = FullHash & (NewSize-1);
00212       if (NewTableArray[NewBucket].Item == 0) {
00213         NewTableArray[FullHash & (NewSize-1)].Item = IB->Item;
00214         NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash;
00215         continue;
00216       }
00217       
00218       // Otherwise probe for a spot.
00219       unsigned ProbeSize = 1;
00220       do {
00221         NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
00222       } while (NewTableArray[NewBucket].Item);
00223       
00224       // Finally found a slot.  Fill it in.
00225       NewTableArray[NewBucket].Item = IB->Item;
00226       NewTableArray[NewBucket].FullHashValue = FullHash;
00227     }
00228   }
00229   
00230   free(TheTable);
00231   
00232   TheTable = NewTableArray;
00233   NumBuckets = NewSize;
00234 }



This web site is hosted by the Computer Science Department at the University of Illinois at Urbana-Champaign.