LLVM API Documentation
00001 //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This header defines the BitstreamReader class. This class can be used to 00011 // read an arbitrary bitstream, regardless of its contents. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #ifndef BITSTREAM_READER_H 00016 #define BITSTREAM_READER_H 00017 00018 #include "llvm/Bitcode/BitCodes.h" 00019 #include <vector> 00020 00021 namespace llvm { 00022 00023 class Deserializer; 00024 00025 class BitstreamReader { 00026 const unsigned char *NextChar; 00027 const unsigned char *LastChar; 00028 friend class Deserializer; 00029 00030 /// CurWord - This is the current data we have pulled from the stream but have 00031 /// not returned to the client. 00032 uint32_t CurWord; 00033 00034 /// BitsInCurWord - This is the number of bits in CurWord that are valid. This 00035 /// is always from [0...31] inclusive. 00036 unsigned BitsInCurWord; 00037 00038 // CurCodeSize - This is the declared size of code values used for the current 00039 // block, in bits. 00040 unsigned CurCodeSize; 00041 00042 /// CurAbbrevs - Abbrevs installed at in this block. 00043 std::vector<BitCodeAbbrev*> CurAbbrevs; 00044 00045 struct Block { 00046 unsigned PrevCodeSize; 00047 std::vector<BitCodeAbbrev*> PrevAbbrevs; 00048 explicit Block(unsigned PCS) : PrevCodeSize(PCS) {} 00049 }; 00050 00051 /// BlockScope - This tracks the codesize of parent blocks. 00052 SmallVector<Block, 8> BlockScope; 00053 00054 /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks. 00055 /// These describe abbreviations that all blocks of the specified ID inherit. 00056 struct BlockInfo { 00057 unsigned BlockID; 00058 std::vector<BitCodeAbbrev*> Abbrevs; 00059 }; 00060 std::vector<BlockInfo> BlockInfoRecords; 00061 00062 /// FirstChar - This remembers the first byte of the stream. 00063 const unsigned char *FirstChar; 00064 public: 00065 BitstreamReader() { 00066 NextChar = FirstChar = LastChar = 0; 00067 CurWord = 0; 00068 BitsInCurWord = 0; 00069 CurCodeSize = 0; 00070 } 00071 00072 BitstreamReader(const unsigned char *Start, const unsigned char *End) { 00073 init(Start, End); 00074 } 00075 00076 void init(const unsigned char *Start, const unsigned char *End) { 00077 NextChar = FirstChar = Start; 00078 LastChar = End; 00079 assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes"); 00080 CurWord = 0; 00081 BitsInCurWord = 0; 00082 CurCodeSize = 2; 00083 } 00084 00085 ~BitstreamReader() { 00086 // Abbrevs could still exist if the stream was broken. If so, don't leak 00087 // them. 00088 for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size()); 00089 i != e; ++i) 00090 CurAbbrevs[i]->dropRef(); 00091 00092 for (unsigned S = 0, e = static_cast<unsigned>(BlockScope.size()); 00093 S != e; ++S) { 00094 std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs; 00095 for (unsigned i = 0, e = static_cast<unsigned>(Abbrevs.size()); 00096 i != e; ++i) 00097 Abbrevs[i]->dropRef(); 00098 } 00099 00100 // Free the BlockInfoRecords. 00101 while (!BlockInfoRecords.empty()) { 00102 BlockInfo &Info = BlockInfoRecords.back(); 00103 // Free blockinfo abbrev info. 00104 for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size()); 00105 i != e; ++i) 00106 Info.Abbrevs[i]->dropRef(); 00107 BlockInfoRecords.pop_back(); 00108 } 00109 } 00110 00111 bool AtEndOfStream() const { 00112 return NextChar == LastChar && BitsInCurWord == 0; 00113 } 00114 00115 /// GetCurrentBitNo - Return the bit # of the bit we are reading. 00116 uint64_t GetCurrentBitNo() const { 00117 return (NextChar-FirstChar)*8 + ((32-BitsInCurWord) & 31); 00118 } 00119 00120 /// JumpToBit - Reset the stream to the specified bit number. 00121 void JumpToBit(uint64_t BitNo) { 00122 uintptr_t ByteNo = uintptr_t(BitNo/8) & ~3; 00123 uintptr_t WordBitNo = uintptr_t(BitNo) & 31; 00124 assert(ByteNo < (uintptr_t)(LastChar-FirstChar) && "Invalid location"); 00125 00126 // Move the cursor to the right word. 00127 NextChar = FirstChar+ByteNo; 00128 BitsInCurWord = 0; 00129 CurWord = 0; 00130 00131 // Skip over any bits that are already consumed. 00132 if (WordBitNo) { 00133 NextChar -= 4; 00134 Read(static_cast<unsigned>(WordBitNo)); 00135 } 00136 } 00137 00138 /// GetAbbrevIDWidth - Return the number of bits used to encode an abbrev #. 00139 unsigned GetAbbrevIDWidth() const { return CurCodeSize; } 00140 00141 uint32_t Read(unsigned NumBits) { 00142 // If the field is fully contained by CurWord, return it quickly. 00143 if (BitsInCurWord >= NumBits) { 00144 uint32_t R = CurWord & ((1U << NumBits)-1); 00145 CurWord >>= NumBits; 00146 BitsInCurWord -= NumBits; 00147 return R; 00148 } 00149 00150 // If we run out of data, stop at the end of the stream. 00151 if (LastChar == NextChar) { 00152 CurWord = 0; 00153 BitsInCurWord = 0; 00154 return 0; 00155 } 00156 00157 unsigned R = CurWord; 00158 00159 // Read the next word from the stream. 00160 CurWord = (NextChar[0] << 0) | (NextChar[1] << 8) | 00161 (NextChar[2] << 16) | (NextChar[3] << 24); 00162 NextChar += 4; 00163 00164 // Extract NumBits-BitsInCurWord from what we just read. 00165 unsigned BitsLeft = NumBits-BitsInCurWord; 00166 00167 // Be careful here, BitsLeft is in the range [1..32] inclusive. 00168 R |= (CurWord & (~0U >> (32-BitsLeft))) << BitsInCurWord; 00169 00170 // BitsLeft bits have just been used up from CurWord. 00171 if (BitsLeft != 32) 00172 CurWord >>= BitsLeft; 00173 else 00174 CurWord = 0; 00175 BitsInCurWord = 32-BitsLeft; 00176 return R; 00177 } 00178 00179 uint64_t Read64(unsigned NumBits) { 00180 if (NumBits <= 32) return Read(NumBits); 00181 00182 uint64_t V = Read(32); 00183 return V | (uint64_t)Read(NumBits-32) << 32; 00184 } 00185 00186 uint32_t ReadVBR(unsigned NumBits) { 00187 uint32_t Piece = Read(NumBits); 00188 if ((Piece & (1U << (NumBits-1))) == 0) 00189 return Piece; 00190 00191 uint32_t Result = 0; 00192 unsigned NextBit = 0; 00193 while (1) { 00194 Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; 00195 00196 if ((Piece & (1U << (NumBits-1))) == 0) 00197 return Result; 00198 00199 NextBit += NumBits-1; 00200 Piece = Read(NumBits); 00201 } 00202 } 00203 00204 uint64_t ReadVBR64(unsigned NumBits) { 00205 uint64_t Piece = Read(NumBits); 00206 if ((Piece & (1U << (NumBits-1))) == 0) 00207 return Piece; 00208 00209 uint64_t Result = 0; 00210 unsigned NextBit = 0; 00211 while (1) { 00212 Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; 00213 00214 if ((Piece & (1U << (NumBits-1))) == 0) 00215 return Result; 00216 00217 NextBit += NumBits-1; 00218 Piece = Read(NumBits); 00219 } 00220 } 00221 00222 void SkipToWord() { 00223 BitsInCurWord = 0; 00224 CurWord = 0; 00225 } 00226 00227 00228 unsigned ReadCode() { 00229 return Read(CurCodeSize); 00230 } 00231 00232 //===--------------------------------------------------------------------===// 00233 // Block Manipulation 00234 //===--------------------------------------------------------------------===// 00235 00236 private: 00237 /// getBlockInfo - If there is block info for the specified ID, return it, 00238 /// otherwise return null. 00239 BlockInfo *getBlockInfo(unsigned BlockID) { 00240 // Common case, the most recent entry matches BlockID. 00241 if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) 00242 return &BlockInfoRecords.back(); 00243 00244 for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size()); 00245 i != e; ++i) 00246 if (BlockInfoRecords[i].BlockID == BlockID) 00247 return &BlockInfoRecords[i]; 00248 return 0; 00249 } 00250 public: 00251 00252 00253 // Block header: 00254 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] 00255 00256 /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for 00257 /// the block. 00258 unsigned ReadSubBlockID() { 00259 return ReadVBR(bitc::BlockIDWidth); 00260 } 00261 00262 /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip 00263 /// over the body of this block. If the block record is malformed, return 00264 /// true. 00265 bool SkipBlock() { 00266 // Read and ignore the codelen value. Since we are skipping this block, we 00267 // don't care what code widths are used inside of it. 00268 ReadVBR(bitc::CodeLenWidth); 00269 SkipToWord(); 00270 unsigned NumWords = Read(bitc::BlockSizeWidth); 00271 00272 // Check that the block wasn't partially defined, and that the offset isn't 00273 // bogus. 00274 if (AtEndOfStream() || NextChar+NumWords*4 > LastChar) 00275 return true; 00276 00277 NextChar += NumWords*4; 00278 return false; 00279 } 00280 00281 /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter 00282 /// the block, and return true if the block is valid. 00283 bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0) { 00284 // Save the current block's state on BlockScope. 00285 BlockScope.push_back(Block(CurCodeSize)); 00286 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 00287 00288 // Add the abbrevs specific to this block to the CurAbbrevs list. 00289 if (BlockInfo *Info = getBlockInfo(BlockID)) { 00290 for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size()); 00291 i != e; ++i) { 00292 CurAbbrevs.push_back(Info->Abbrevs[i]); 00293 CurAbbrevs.back()->addRef(); 00294 } 00295 } 00296 00297 // Get the codesize of this block. 00298 CurCodeSize = ReadVBR(bitc::CodeLenWidth); 00299 SkipToWord(); 00300 unsigned NumWords = Read(bitc::BlockSizeWidth); 00301 if (NumWordsP) *NumWordsP = NumWords; 00302 00303 // Validate that this block is sane. 00304 if (CurCodeSize == 0 || AtEndOfStream() || NextChar+NumWords*4 > LastChar) 00305 return true; 00306 00307 return false; 00308 } 00309 00310 bool ReadBlockEnd() { 00311 if (BlockScope.empty()) return true; 00312 00313 // Block tail: 00314 // [END_BLOCK, <align4bytes>] 00315 SkipToWord(); 00316 00317 PopBlockScope(); 00318 return false; 00319 } 00320 00321 private: 00322 void PopBlockScope() { 00323 CurCodeSize = BlockScope.back().PrevCodeSize; 00324 00325 // Delete abbrevs from popped scope. 00326 for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size()); 00327 i != e; ++i) 00328 CurAbbrevs[i]->dropRef(); 00329 00330 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 00331 BlockScope.pop_back(); 00332 } 00333 00334 //===--------------------------------------------------------------------===// 00335 // Record Processing 00336 //===--------------------------------------------------------------------===// 00337 00338 private: 00339 void ReadAbbreviatedField(const BitCodeAbbrevOp &Op, 00340 SmallVectorImpl<uint64_t> &Vals) { 00341 if (Op.isLiteral()) { 00342 // If the abbrev specifies the literal value to use, use it. 00343 Vals.push_back(Op.getLiteralValue()); 00344 } else { 00345 // Decode the value as we are commanded. 00346 switch (Op.getEncoding()) { 00347 default: assert(0 && "Unknown encoding!"); 00348 case BitCodeAbbrevOp::Fixed: 00349 Vals.push_back(Read((unsigned)Op.getEncodingData())); 00350 break; 00351 case BitCodeAbbrevOp::VBR: 00352 Vals.push_back(ReadVBR64((unsigned)Op.getEncodingData())); 00353 break; 00354 case BitCodeAbbrevOp::Char6: 00355 Vals.push_back(BitCodeAbbrevOp::DecodeChar6(Read(6))); 00356 break; 00357 } 00358 } 00359 } 00360 public: 00361 unsigned ReadRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals) { 00362 if (AbbrevID == bitc::UNABBREV_RECORD) { 00363 unsigned Code = ReadVBR(6); 00364 unsigned NumElts = ReadVBR(6); 00365 for (unsigned i = 0; i != NumElts; ++i) 00366 Vals.push_back(ReadVBR64(6)); 00367 return Code; 00368 } 00369 00370 unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV; 00371 assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!"); 00372 BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo]; 00373 00374 for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) { 00375 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 00376 if (Op.isLiteral() || Op.getEncoding() != BitCodeAbbrevOp::Array) { 00377 ReadAbbreviatedField(Op, Vals); 00378 } else { 00379 // Array case. Read the number of elements as a vbr6. 00380 unsigned NumElts = ReadVBR(6); 00381 00382 // Get the element encoding. 00383 assert(i+2 == e && "array op not second to last?"); 00384 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 00385 00386 // Read all the elements. 00387 for (; NumElts; --NumElts) 00388 ReadAbbreviatedField(EltEnc, Vals); 00389 } 00390 } 00391 00392 unsigned Code = (unsigned)Vals[0]; 00393 Vals.erase(Vals.begin()); 00394 return Code; 00395 } 00396 00397 //===--------------------------------------------------------------------===// 00398 // Abbrev Processing 00399 //===--------------------------------------------------------------------===// 00400 00401 void ReadAbbrevRecord() { 00402 BitCodeAbbrev *Abbv = new BitCodeAbbrev(); 00403 unsigned NumOpInfo = ReadVBR(5); 00404 for (unsigned i = 0; i != NumOpInfo; ++i) { 00405 bool IsLiteral = Read(1) ? true : false; 00406 if (IsLiteral) { 00407 Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8))); 00408 continue; 00409 } 00410 00411 BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)Read(3); 00412 if (BitCodeAbbrevOp::hasEncodingData(E)) 00413 Abbv->Add(BitCodeAbbrevOp(E, ReadVBR64(5))); 00414 else 00415 Abbv->Add(BitCodeAbbrevOp(E)); 00416 } 00417 CurAbbrevs.push_back(Abbv); 00418 } 00419 00420 //===--------------------------------------------------------------------===// 00421 // BlockInfo Block Reading 00422 //===--------------------------------------------------------------------===// 00423 00424 private: 00425 BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { 00426 if (BlockInfo *BI = getBlockInfo(BlockID)) 00427 return *BI; 00428 00429 // Otherwise, add a new record. 00430 BlockInfoRecords.push_back(BlockInfo()); 00431 BlockInfoRecords.back().BlockID = BlockID; 00432 return BlockInfoRecords.back(); 00433 } 00434 00435 public: 00436 00437 bool ReadBlockInfoBlock() { 00438 if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true; 00439 00440 SmallVector<uint64_t, 64> Record; 00441 BlockInfo *CurBlockInfo = 0; 00442 00443 // Read all the records for this module. 00444 while (1) { 00445 unsigned Code = ReadCode(); 00446 if (Code == bitc::END_BLOCK) 00447 return ReadBlockEnd(); 00448 if (Code == bitc::ENTER_SUBBLOCK) { 00449 ReadSubBlockID(); 00450 if (SkipBlock()) return true; 00451 continue; 00452 } 00453 00454 // Read abbrev records, associate them with CurBID. 00455 if (Code == bitc::DEFINE_ABBREV) { 00456 if (!CurBlockInfo) return true; 00457 ReadAbbrevRecord(); 00458 00459 // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the 00460 // appropriate BlockInfo. 00461 BitCodeAbbrev *Abbv = CurAbbrevs.back(); 00462 CurAbbrevs.pop_back(); 00463 CurBlockInfo->Abbrevs.push_back(Abbv); 00464 continue; 00465 } 00466 00467 // Read a record. 00468 Record.clear(); 00469 switch (ReadRecord(Code, Record)) { 00470 default: break; // Default behavior, ignore unknown content. 00471 case bitc::BLOCKINFO_CODE_SETBID: 00472 if (Record.size() < 1) return true; 00473 CurBlockInfo = &getOrCreateBlockInfo((unsigned)Record[0]); 00474 break; 00475 } 00476 } 00477 } 00478 }; 00479 00480 } // End llvm namespace 00481 00482 #endif
This web site is hosted by the Computer Science Department at the University of Illinois at Urbana-Champaign.