使用TableBuilder构造一个Table
1 struct TableBuilder::Rep { // TableBuilder内部使用的结构,记录当前的一些状态等 2 Options options; 3 Options index_block_options; 4 WritableFile* file; // 对应的.sst文件 5 uint64_t offset; 6 Status status; 7 BlockBuilder data_block; // Data Block 8 BlockBuilder index_block; // Index Block 9 std::string last_key; // 添加的最后一个key,一方面用于key是否排序的判断,另一方面当写入一个Data 10 //+ Block时记录index Block中索引项(last_key+offset+size) 11 int64_t num_entries; // .sst文件中已经添加的key/value数量 12 bool closed; // Either Finish() or Abandon() has been called. 13 14 // Add下一Block的第一个key/value时,才根据这个key构造一个FindShortSuccessor, 15 // 写入Index Block中的一个entry(max_key+offset+size),是为了能够找到 16 // 一个更短的分割2个Block的key,从而减少存储容量; 17 // 只有Finish中是根据最后一个Block的最后一个key构造的。 18 // We do not emit the index entry for a block until we have seen the 19 // first key for the next data block. This allows us to use shorter 20 // keys in the index block. For example, consider a block boundary 21 // between the keys "the quick brown fox" and "the who". We can use 22 // "the r" as the key for the index block entry since it is >= all 23 // entries in the first block and < all entries in subsequent 24 // blocks. 25 // 26 // Invariant: r->pending_index_entry is true only if data_block is empty. 27 bool pending_index_entry; // 标识是否刚写入一个Data Block,控制在Index 28 //+ Block中添加一项索引信息(last_key+offset+size) 29 BlockHandle pending_handle; // Handle to add to index block 30 31 std::string compressed_output; // 数据压缩 32 33 Rep(const Options& opt, WritableFile* f) // 构造函数 34 : options(opt), 35 index_block_options(opt), 36 file(f), 37 offset(0), 38 data_block(&options), 39 index_block(&index_block_options), 40 num_entries(0), 41 closed(false), 42 pending_index_entry(false) 43 { 44 index_block_options.block_restart_interval = 1; // Index Block中每个restart块只有一个record,查找方便 45 } 46 };// struct TableBuilder::Rep ;
TableBuilder头文件
1 class TableBuilder { 2 public: 3 // Create a builder that will store the contents of the table it is 4 // building in *file. Does not close the file. It is up to the 5 // caller to close the file after calling Finish(). 6 //创建一个基于file的builder,存储table. 使用期间不能关闭文件,在调用Finish()后调用方关闭文件 7 TableBuilder(const Options& options, WritableFile* file); 8 9 // REQUIRES: Either Finish() or Abandon() has been called. 10 ~TableBuilder(); 11 12 // Change the options used by this builder. Note: only some of the 13 // option fields can be changed after construction. If a field is 14 // not allowed to change dynamically and its value in the structure 15 // passed to the constructor is different from its value in the 16 // structure passed to this method, this method will return an error 17 // without changing any fields. 18 Status ChangeOptions(const Options& options); 19 20 // Add key,value to the table being constructed. 21 // REQUIRES: key is after any previously added key according to comparator. 22 // REQUIRES: Finish(), Abandon() have not been called 23 //添加key value 稍后查看代码 24 void Add(const Slice& key, const Slice& value); 25 26 // Advanced operation: flush any buffered key/value pairs to file. 27 // Can be used to ensure that two adjacent entries never live in 28 // the same data block. Most clients should not need to use this method. 29 // REQUIRES: Finish(), Abandon() have not been called 30 void Flush(); 31 32 // Return non-ok iff some error has been detected. 33 Status status() const; 34 35 // Finish building the table. Stops using the file passed to the 36 // constructor after this function returns. 37 // REQUIRES: Finish(), Abandon() have not been called 38 39 Status Finish(); 40 41 // Indicate that the contents of this builder should be abandoned. Stops 42 // using the file passed to the constructor after this function returns. 43 // If the caller is not going to call Finish(), it must call Abandon() 44 // before destroying this builder. 45 // REQUIRES: Finish(), Abandon() have not been called 46 void Abandon(); 47 48 // Number of calls to Add() so far. 49 uint64_t NumEntries() const; 50 51 // Size of the file generated so far. If invoked after a successful 52 // Finish() call, returns the size of the final generated file. 53 uint64_t FileSize() const; 54 55 private: 56 bool ok() const { return status().ok(); } 57 void WriteBlock(BlockBuilder* block, BlockHandle* handle); 58 59 struct Rep; 60 Rep* rep_; 61 62 // No copying allowed 63 TableBuilder(const TableBuilder&); 64 void operator=(const TableBuilder&); 65 };
主要是按照格式填充 这里做了简单的注释
// Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "leveldb/table_builder.h" #include <assert.h> #include <stdio.h> #include "leveldb/comparator.h" #include "leveldb/env.h" #include "table/block_builder.h" #include "table/format.h" #include "util/coding.h" #include "util/crc32c.h" #include "util/logging.h" namespace leveldb { struct TableBuilder::Rep { Options options; Options index_block_options; WritableFile* file; uint64_t offset; Status status; BlockBuilder data_block; BlockBuilder index_block; std::string last_key; int64_t num_entries; bool closed; // Either Finish() or Abandon() has been called. // We do not emit the index entry for a block until we have seen the // first key for the next data block. This allows us to use shorter // keys in the index block. For example, consider a block boundary // between the keys "the quick brown fox" and "the who". We can use // "the r" as the key for the index block entry since it is >= all // entries in the first block and < all entries in subsequent // blocks. // // Invariant: r->pending_index_entry is true only if data_block is empty. bool pending_index_entry; BlockHandle pending_handle; // Handle to add to index block std::string compressed_output; Rep(const Options& opt, WritableFile* f) : options(opt), index_block_options(opt), file(f), offset(0), data_block(&options), index_block(&index_block_options), num_entries(0), closed(false), pending_index_entry(false) { index_block_options.block_restart_interval = 1; } }; TableBuilder::TableBuilder(const Options& options, WritableFile* file) : rep_(new Rep(options, file)) { } TableBuilder::~TableBuilder() { assert(rep_->closed); // Catch errors where caller forgot to call Finish() delete rep_; } Status TableBuilder::ChangeOptions(const Options& options) { // Note: if more fields are added to Options, update // this function to catch changes that should not be allowed to // change in the middle of building a Table. if (options.comparator != rep_->options.comparator) { return Status::InvalidArgument("changing comparator while building table"); } // Note that any live BlockBuilders point to rep_->options and therefore // will automatically pick up the updated options. rep_->options = options; rep_->index_block_options = options; rep_->index_block_options.block_restart_interval = 1; return Status::OK(); } void TableBuilder::Add(const Slice& key, const Slice& value) { Rep* r = rep_; assert(!r->closed); if (!ok()) return; //确保Rep没有关闭 并且状态正常 //如果不是添加的table本身的属性 添加的key 必然是有序的的 否则报错 if (r->num_entries > 0) { assert(r->options.comparator->Compare(key, Slice(r->last_key)) > 0); } //pending_index_entry标记是否是新创建的一个block //当新创建一个block时 才可能确认上一个block和新block之间的key的一个分割字符串 记录在lastkey和index_block 方便以后查找key 定位 if (r->pending_index_entry) { assert(r->data_block.empty()); //comparator 中有 FindShortestSeparator() / FindShortSuccessor()两个接口, //FindShortestSeparator(start, limit)是获得大于 start 但小于 limit 的最小值。 //FindShortSuccessor(start)是获得比 start 大的最小值。比较都基于 user - commparator,二者会被 //用来确定 sstable 中 block 的 end - key。 r->options.comparator->FindShortestSeparator(&r->last_key, key); std::string handle_encoding; r->pending_handle.EncodeTo(&handle_encoding); r->index_block.Add(r->last_key, Slice(handle_encoding)); r->pending_index_entry = false; } //更新lastkey 跟新记录计数 添加data block r->last_key.assign(key.data(), key.size()); r->num_entries++; r->data_block.Add(key, value); //data block 大于指定size 进行flush操作 const size_t estimated_block_size = r->data_block.CurrentSizeEstimate(); if (estimated_block_size >= r->options.block_size) { Flush(); } } //block flush落盘 void TableBuilder::Flush() { Rep* r = rep_; assert(!r->closed); if (!ok()) return; if (r->data_block.empty()) return; assert(!r->pending_index_entry); WriteBlock(&r->data_block, &r->pending_handle); if (ok()) { r->pending_index_entry = true; r->status = r->file->Flush(); } } //每个block data 包含 n个字节内容 以及type 1个字节 crc 4个字节 void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) { // File format contains a sequence of blocks where each block has: // block_data: uint8[n] // type: uint8 // crc: uint32 assert(ok()); Rep* r = rep_; Slice raw = block->Finish(); Slice block_contents; CompressionType type = r->options.compression; // TODO(postrelease): Support more compression options: zlib? switch (type) { case kNoCompression: block_contents = raw; break; case kSnappyCompression: { std::string* compressed = &r->compressed_output; if (port::Snappy_Compress(raw.data(), raw.size(), compressed) && compressed->size() < raw.size() - (raw.size() / 8u)) { block_contents = *compressed; } else { // Snappy not supported, or compressed less than 12.5%, so just // store uncompressed form block_contents = raw; type = kNoCompression; } break; } } handle->set_offset(r->offset); handle->set_size(block_contents.size()); r->status = r->file->Append(block_contents); if (r->status.ok()) { char trailer[kBlockTrailerSize]; trailer[0] = type; uint32_t crc = crc32c::Value(block_contents.data(), block_contents.size()); crc = crc32c::Extend(crc, trailer, 1); // Extend crc to cover block type EncodeFixed32(trailer+1, crc32c::Mask(crc)); r->status = r->file->Append(Slice(trailer, kBlockTrailerSize)); if (r->status.ok()) { r->offset += block_contents.size() + kBlockTrailerSize; } } r->compressed_output.clear(); block->Reset(); } Status TableBuilder::status() const { return rep_->status; } Status TableBuilder::Finish() { Rep* r = rep_; Flush(); assert(!r->closed); r->closed = true; BlockHandle metaindex_block_handle; BlockHandle index_block_handle; if (ok()) { BlockBuilder meta_index_block(&r->options); // TODO(postrelease): Add stats and other meta blocks WriteBlock(&meta_index_block, &metaindex_block_handle); } if (ok()) { if (r->pending_index_entry) { r->options.comparator->FindShortSuccessor(&r->last_key); std::string handle_encoding; r->pending_handle.EncodeTo(&handle_encoding); r->index_block.Add(r->last_key, Slice(handle_encoding)); r->pending_index_entry = false; } WriteBlock(&r->index_block, &index_block_handle); } if (ok()) { Footer footer; footer.set_metaindex_handle(metaindex_block_handle); footer.set_index_handle(index_block_handle); std::string footer_encoding; footer.EncodeTo(&footer_encoding); r->status = r->file->Append(footer_encoding); if (r->status.ok()) { r->offset += footer_encoding.size(); } } return r->status; } void TableBuilder::Abandon() { Rep* r = rep_; assert(!r->closed); r->closed = true; } uint64_t TableBuilder::NumEntries() const { return rep_->num_entries; } uint64_t TableBuilder::FileSize() const { return rep_->offset; } }
参考
https://blog.csdn.net/tankles/article/details/7663918
《leveldb实现解析》淘宝 那岩