LLVM API Documentation
00001 //===--- StringMap.cpp - String Hash table map implementation -------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements the StringMap class. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "llvm/ADT/StringMap.h" 00015 #include <cassert> 00016 using namespace llvm; 00017 00018 StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) { 00019 ItemSize = itemSize; 00020 00021 // If a size is specified, initialize the table with that many buckets. 00022 if (InitSize) { 00023 init(InitSize); 00024 return; 00025 } 00026 00027 // Otherwise, initialize it with zero buckets to avoid the allocation. 00028 TheTable = 0; 00029 NumBuckets = 0; 00030 NumItems = 0; 00031 NumTombstones = 0; 00032 } 00033 00034 void StringMapImpl::init(unsigned InitSize) { 00035 assert((InitSize & (InitSize-1)) == 0 && 00036 "Init Size must be a power of 2 or zero!"); 00037 NumBuckets = InitSize ? InitSize : 16; 00038 NumItems = 0; 00039 NumTombstones = 0; 00040 00041 TheTable = (ItemBucket*)calloc(NumBuckets+1, sizeof(ItemBucket)); 00042 00043 // Allocate one extra bucket, set it to look filled so the iterators stop at 00044 // end. 00045 TheTable[NumBuckets].Item = (StringMapEntryBase*)2; 00046 } 00047 00048 00049 /// HashString - Compute a hash code for the specified string. 00050 /// 00051 static unsigned HashString(const char *Start, const char *End) { 00052 // Bernstein hash function. 00053 unsigned int Result = 0; 00054 // TODO: investigate whether a modified bernstein hash function performs 00055 // better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx 00056 // X*33+c -> X*33^c 00057 while (Start != End) 00058 Result = Result * 33 + *Start++; 00059 Result = Result + (Result >> 5); 00060 return Result; 00061 } 00062 00063 /// LookupBucketFor - Look up the bucket that the specified string should end 00064 /// up in. If it already exists as a key in the map, the Item pointer for the 00065 /// specified bucket will be non-null. Otherwise, it will be null. In either 00066 /// case, the FullHashValue field of the bucket will be set to the hash value 00067 /// of the string. 00068 unsigned StringMapImpl::LookupBucketFor(const char *NameStart, 00069 const char *NameEnd) { 00070 unsigned HTSize = NumBuckets; 00071 if (HTSize == 0) { // Hash table unallocated so far? 00072 init(16); 00073 HTSize = NumBuckets; 00074 } 00075 unsigned FullHashValue = HashString(NameStart, NameEnd); 00076 unsigned BucketNo = FullHashValue & (HTSize-1); 00077 00078 unsigned ProbeAmt = 1; 00079 int FirstTombstone = -1; 00080 while (1) { 00081 ItemBucket &Bucket = TheTable[BucketNo]; 00082 StringMapEntryBase *BucketItem = Bucket.Item; 00083 // If we found an empty bucket, this key isn't in the table yet, return it. 00084 if (BucketItem == 0) { 00085 // If we found a tombstone, we want to reuse the tombstone instead of an 00086 // empty bucket. This reduces probing. 00087 if (FirstTombstone != -1) { 00088 TheTable[FirstTombstone].FullHashValue = FullHashValue; 00089 return FirstTombstone; 00090 } 00091 00092 Bucket.FullHashValue = FullHashValue; 00093 return BucketNo; 00094 } 00095 00096 if (BucketItem == getTombstoneVal()) { 00097 // Skip over tombstones. However, remember the first one we see. 00098 if (FirstTombstone == -1) FirstTombstone = BucketNo; 00099 } else if (Bucket.FullHashValue == FullHashValue) { 00100 // If the full hash value matches, check deeply for a match. The common 00101 // case here is that we are only looking at the buckets (for item info 00102 // being non-null and for the full hash value) not at the items. This 00103 // is important for cache locality. 00104 00105 // Do the comparison like this because NameStart isn't necessarily 00106 // null-terminated! 00107 char *ItemStr = (char*)BucketItem+ItemSize; 00108 unsigned ItemStrLen = BucketItem->getKeyLength(); 00109 if (unsigned(NameEnd-NameStart) == ItemStrLen && 00110 memcmp(ItemStr, NameStart, ItemStrLen) == 0) { 00111 // We found a match! 00112 return BucketNo; 00113 } 00114 } 00115 00116 // Okay, we didn't find the item. Probe to the next bucket. 00117 BucketNo = (BucketNo+ProbeAmt) & (HTSize-1); 00118 00119 // Use quadratic probing, it has fewer clumping artifacts than linear 00120 // probing and has good cache behavior in the common case. 00121 ++ProbeAmt; 00122 } 00123 } 00124 00125 00126 /// FindKey - Look up the bucket that contains the specified key. If it exists 00127 /// in the map, return the bucket number of the key. Otherwise return -1. 00128 /// This does not modify the map. 00129 int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const { 00130 unsigned HTSize = NumBuckets; 00131 if (HTSize == 0) return -1; // Really empty table? 00132 unsigned FullHashValue = HashString(KeyStart, KeyEnd); 00133 unsigned BucketNo = FullHashValue & (HTSize-1); 00134 00135 unsigned ProbeAmt = 1; 00136 while (1) { 00137 ItemBucket &Bucket = TheTable[BucketNo]; 00138 StringMapEntryBase *BucketItem = Bucket.Item; 00139 // If we found an empty bucket, this key isn't in the table yet, return. 00140 if (BucketItem == 0) 00141 return -1; 00142 00143 if (BucketItem == getTombstoneVal()) { 00144 // Ignore tombstones. 00145 } else if (Bucket.FullHashValue == FullHashValue) { 00146 // If the full hash value matches, check deeply for a match. The common 00147 // case here is that we are only looking at the buckets (for item info 00148 // being non-null and for the full hash value) not at the items. This 00149 // is important for cache locality. 00150 00151 // Do the comparison like this because NameStart isn't necessarily 00152 // null-terminated! 00153 char *ItemStr = (char*)BucketItem+ItemSize; 00154 unsigned ItemStrLen = BucketItem->getKeyLength(); 00155 if (unsigned(KeyEnd-KeyStart) == ItemStrLen && 00156 memcmp(ItemStr, KeyStart, ItemStrLen) == 0) { 00157 // We found a match! 00158 return BucketNo; 00159 } 00160 } 00161 00162 // Okay, we didn't find the item. Probe to the next bucket. 00163 BucketNo = (BucketNo+ProbeAmt) & (HTSize-1); 00164 00165 // Use quadratic probing, it has fewer clumping artifacts than linear 00166 // probing and has good cache behavior in the common case. 00167 ++ProbeAmt; 00168 } 00169 } 00170 00171 /// RemoveKey - Remove the specified StringMapEntry from the table, but do not 00172 /// delete it. This aborts if the value isn't in the table. 00173 void StringMapImpl::RemoveKey(StringMapEntryBase *V) { 00174 const char *VStr = (char*)V + ItemSize; 00175 StringMapEntryBase *V2 = RemoveKey(VStr, VStr+V->getKeyLength()); 00176 V2 = V2; 00177 assert(V == V2 && "Didn't find key?"); 00178 } 00179 00180 /// RemoveKey - Remove the StringMapEntry for the specified key from the 00181 /// table, returning it. If the key is not in the table, this returns null. 00182 StringMapEntryBase *StringMapImpl::RemoveKey(const char *KeyStart, 00183 const char *KeyEnd) { 00184 int Bucket = FindKey(KeyStart, KeyEnd); 00185 if (Bucket == -1) return 0; 00186 00187 StringMapEntryBase *Result = TheTable[Bucket].Item; 00188 TheTable[Bucket].Item = getTombstoneVal(); 00189 --NumItems; 00190 ++NumTombstones; 00191 return Result; 00192 } 00193 00194 00195 00196 /// RehashTable - Grow the table, redistributing values into the buckets with 00197 /// the appropriate mod-of-hashtable-size. 00198 void StringMapImpl::RehashTable() { 00199 unsigned NewSize = NumBuckets*2; 00200 // Allocate one extra bucket which will always be non-empty. This allows the 00201 // iterators to stop at end. 00202 ItemBucket *NewTableArray =(ItemBucket*)calloc(NewSize+1, sizeof(ItemBucket)); 00203 NewTableArray[NewSize].Item = (StringMapEntryBase*)2; 00204 00205 // Rehash all the items into their new buckets. Luckily :) we already have 00206 // the hash values available, so we don't have to rehash any strings. 00207 for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) { 00208 if (IB->Item && IB->Item != getTombstoneVal()) { 00209 // Fast case, bucket available. 00210 unsigned FullHash = IB->FullHashValue; 00211 unsigned NewBucket = FullHash & (NewSize-1); 00212 if (NewTableArray[NewBucket].Item == 0) { 00213 NewTableArray[FullHash & (NewSize-1)].Item = IB->Item; 00214 NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash; 00215 continue; 00216 } 00217 00218 // Otherwise probe for a spot. 00219 unsigned ProbeSize = 1; 00220 do { 00221 NewBucket = (NewBucket + ProbeSize++) & (NewSize-1); 00222 } while (NewTableArray[NewBucket].Item); 00223 00224 // Finally found a slot. Fill it in. 00225 NewTableArray[NewBucket].Item = IB->Item; 00226 NewTableArray[NewBucket].FullHashValue = FullHash; 00227 } 00228 } 00229 00230 free(TheTable); 00231 00232 TheTable = NewTableArray; 00233 NumBuckets = NewSize; 00234 }
This web site is hosted by the Computer Science Department at the University of Illinois at Urbana-Champaign.