-
Notifications
You must be signed in to change notification settings - Fork 227
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
leveldb: add log and descriptor decoders
- Loading branch information
Showing
25 changed files
with
571 additions
and
92 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
package leveldb | ||
|
||
// https://github.com/google/leveldb/blob/main/doc/impl.md#manifest | ||
// https://github.com/google/leveldb/blob/main/db/version_edit.cc | ||
// | ||
// Files in LevelDB using this format include: | ||
// - MANIFEST-* | ||
|
||
import ( | ||
"embed" | ||
|
||
"github.com/wader/fq/format" | ||
"github.com/wader/fq/pkg/decode" | ||
"github.com/wader/fq/pkg/interp" | ||
"github.com/wader/fq/pkg/scalar" | ||
) | ||
|
||
//go:embed leveldb_log.md | ||
var leveldbDescriptorFS embed.FS | ||
|
||
func init() { | ||
interp.RegisterFormat( | ||
format.LevelDB_Descriptor, | ||
&decode.Format{ | ||
Description: "LevelDB Descriptor", | ||
Groups: []*decode.Group{format.Probe}, | ||
DecodeFn: ldbDescriptorDecode, | ||
}) | ||
interp.RegisterFS(leveldbDescriptorFS) | ||
} | ||
|
||
const ( | ||
tagTypeComparator = 1 | ||
tagTypeLogNumber = 2 | ||
tagTypeNextFileNumber = 3 | ||
tagTypeLastSequence = 4 | ||
tagTypeCompactPointer = 5 | ||
tagTypeDeletedFile = 6 | ||
tagTypeNewFile = 7 | ||
// 8 not used anymore | ||
tagTypePrevLogNumber = 9 | ||
) | ||
|
||
var tagTypes = scalar.UintMapSymStr{ | ||
tagTypeComparator: "comparator", | ||
tagTypeLogNumber: "log_number", | ||
tagTypeNextFileNumber: "next file number", | ||
tagTypeLastSequence: "last sequence", | ||
tagTypeCompactPointer: "compact pointer", | ||
tagTypeDeletedFile: "deleted file", | ||
tagTypeNewFile: "new file", | ||
tagTypePrevLogNumber: "previous log number", | ||
} | ||
|
||
func ldbDescriptorDecode(d *decode.D) any { | ||
rro := recordReadOptions{readDataFn: func(size int64, recordType int, d *decode.D) { | ||
if recordType == recordTypeFull { | ||
d.FieldStruct("data", func(d *decode.D) { | ||
d.LimitedFn(size, readManifest) | ||
}) | ||
} else { | ||
d.FieldRawLen("data", size) | ||
} | ||
}} | ||
readBlockSequence(rro, d) | ||
|
||
return nil | ||
} | ||
|
||
// List of sorted tables for each level involving key ranges and other metadata. | ||
func readManifest(d *decode.D) { | ||
d.FieldArray("tags", func(d *decode.D) { | ||
for { | ||
if d.End() { | ||
break | ||
} | ||
d.FieldStruct("tag", func(d *decode.D) { | ||
tag := d.FieldULEB128("key", tagTypes) | ||
switch tag { | ||
case tagTypeComparator: | ||
readLengthPrefixedString("value", d) | ||
case tagTypeLogNumber, | ||
tagTypePrevLogNumber, | ||
tagTypeNextFileNumber, | ||
tagTypeLastSequence: | ||
d.FieldULEB128("value") | ||
case tagTypeCompactPointer: | ||
d.FieldStruct("value", func(d *decode.D) { | ||
d.FieldULEB128("level") | ||
readTagInternalKey("internal_key", d) | ||
}) | ||
case tagTypeDeletedFile: | ||
d.FieldStruct("value", func(d *decode.D) { | ||
d.FieldULEB128("level") | ||
d.FieldULEB128("file_number") | ||
}) | ||
case tagTypeNewFile: | ||
d.FieldStruct("value", func(d *decode.D) { | ||
d.FieldULEB128("level") | ||
d.FieldULEB128("file_number") | ||
d.FieldULEB128("file_size") | ||
readTagInternalKey("smallest_internal_key", d) | ||
readTagInternalKey("largest_internal_key", d) | ||
}) | ||
default: | ||
d.Fatalf("unknown tag: %d", tag) | ||
} | ||
}) | ||
} | ||
}) | ||
} | ||
|
||
func readLengthPrefixedString(name string, d *decode.D) { | ||
d.FieldStruct(name, func(d *decode.D) { | ||
length := d.FieldULEB128("length") | ||
d.FieldUTF8("data", int(length)) | ||
}) | ||
} | ||
|
||
func readTagInternalKey(name string, d *decode.D) { | ||
d.FieldStruct(name, func(d *decode.D) { | ||
length := d.FieldULEB128("length") | ||
readInternalKey("data", int64(length), d) | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
### Limitations | ||
|
||
- fragmented non-"full" records are not decoded further. | ||
|
||
|
||
### Authors | ||
|
||
- [@mikez](https://github.com/mikez), original author | ||
|
||
### References | ||
|
||
- https://github.com/google/leveldb/blob/main/doc/impl.md#manifest | ||
- https://github.com/google/leveldb/blob/main/db/version_edit.cc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
package leveldb | ||
|
||
// https://github.com/google/leveldb/blob/main/doc/log_format.md | ||
// | ||
// Files in LevelDB using this format include: | ||
// - *.log | ||
// - MANIFEST-* | ||
|
||
import ( | ||
"embed" | ||
|
||
"github.com/wader/fq/format" | ||
"github.com/wader/fq/internal/mathex" | ||
"github.com/wader/fq/pkg/decode" | ||
"github.com/wader/fq/pkg/interp" | ||
"github.com/wader/fq/pkg/scalar" | ||
) | ||
|
||
//go:embed leveldb_log.md | ||
var leveldbLogFS embed.FS | ||
|
||
func init() { | ||
interp.RegisterFormat( | ||
format.LOG, | ||
&decode.Format{ | ||
Description: "LevelDB Log", | ||
Groups: []*decode.Group{format.Probe}, | ||
DecodeFn: ldbLogDecode, | ||
}) | ||
interp.RegisterFS(leveldbLogFS) | ||
} | ||
|
||
type recordReadOptions struct { | ||
// Both .log- and MANIFEST-files use the Log-format, | ||
// i.e., a sequence of records split into 32KB blocks. | ||
// However, the format of the data within the records differ. | ||
// This function specifies how to read said data. | ||
readDataFn func(size int64, recordType int, d *decode.D) | ||
} | ||
|
||
// https://github.com/google/leveldb/blob/main/db/log_format.h | ||
const ( | ||
// checksum (4 bytes) + length (2 bytes) + record type (1 byte) | ||
headerSize = (4 + 2 + 1) * 8 | ||
|
||
blockSize = (32 * 1024) * 8 // 32KB | ||
|
||
recordTypeZero = 0 // preallocated file regions | ||
recordTypeFull = 1 | ||
recordTypeFirst = 2 // fragments | ||
recordTypeMiddle = 3 | ||
recordTypeLast = 4 | ||
) | ||
|
||
var recordTypes = scalar.UintMapSymStr{ | ||
recordTypeZero: "zero", | ||
recordTypeFull: "full", | ||
recordTypeFirst: "first", | ||
recordTypeMiddle: "middle", | ||
recordTypeLast: "last", | ||
} | ||
|
||
func ldbLogDecode(d *decode.D) any { | ||
rro := recordReadOptions{readDataFn: func(size int64, recordType int, d *decode.D) { | ||
d.FieldRawLen("data", size) | ||
}} | ||
readBlockSequence(rro, d) | ||
|
||
return nil | ||
} | ||
|
||
// Read a sequence of 32KB-blocks (the last one may be less). | ||
// https://github.com/google/leveldb/blob/main/db/log_reader.cc#L189 | ||
func readBlockSequence(rro recordReadOptions, d *decode.D) { | ||
d.Endian = decode.LittleEndian | ||
|
||
d.FieldArray("blocks", func(d *decode.D) { | ||
for d.BitsLeft() >= headerSize { | ||
d.LimitedFn(mathex.Min(blockSize, d.BitsLeft()), func(d *decode.D) { | ||
d.FieldStruct("block", bind(readLogBlock, rro)) | ||
}) | ||
} | ||
}) | ||
|
||
if d.BitsLeft() > 0 { | ||
// The reference implementation says: | ||
// "[...] if buffer_ is non-empty, we have a truncated header at the | ||
// end of the file, which can be caused by the writer crashing in the | ||
// middle of writing the header. Instead of considering this an error, | ||
// just report EOF." | ||
d.FieldRawLen("truncated_block", d.BitsLeft()) | ||
} | ||
} | ||
|
||
// Read a Log-block, consisting of up to 32KB of records and an optional trailer. | ||
// | ||
// block := record* trailer? | ||
func readLogBlock(rro recordReadOptions, d *decode.D) { | ||
if d.BitsLeft() > blockSize { | ||
d.Fatalf("Bits left greater than maximum log-block size of 32KB.") | ||
} | ||
// record* | ||
d.FieldArray("records", func(d *decode.D) { | ||
for d.BitsLeft() >= headerSize { | ||
d.FieldStruct("record", bind(readLogRecord, rro)) | ||
} | ||
}) | ||
// trailer? | ||
if d.BitsLeft() > 0 { | ||
d.FieldRawLen("trailer", d.BitsLeft()) | ||
} | ||
} | ||
|
||
// Read a Log-record. | ||
// | ||
// checksum: uint32 // crc32c of type and data[] ; little-endian | ||
// length: uint16 // little-endian | ||
// type: uint8 // One of FULL, FIRST, MIDDLE, LAST | ||
// data: uint8[length] | ||
// | ||
// via https://github.com/google/leveldb/blob/main/doc/log_format.md | ||
func readLogRecord(rro recordReadOptions, d *decode.D) { | ||
// header | ||
var checksumValue *decode.Value | ||
var length int64 | ||
var recordType int | ||
d.LimitedFn(headerSize, func(d *decode.D) { | ||
d.FieldStruct("header", func(d *decode.D) { | ||
d.FieldU32("checksum", scalar.UintHex) | ||
checksumValue = d.FieldGet("checksum") | ||
length = int64(d.FieldU16("length")) | ||
recordType = int(d.FieldU8("record_type", recordTypes)) | ||
}) | ||
}) | ||
|
||
// verify checksum: record type (1 byte) + data (`length` bytes) | ||
d.RangeFn(d.Pos()-8, (1+length)*8, func(d *decode.D) { | ||
bytesToCheck := d.Bits(int(d.BitsLeft())) | ||
actualChecksum := computeChecksum(bytesToCheck) | ||
_ = checksumValue.TryUintScalarFn(d.UintAssert(uint64(actualChecksum))) | ||
}) | ||
|
||
// data | ||
dataSize := length * 8 | ||
rro.readDataFn(dataSize, recordType, d) | ||
} | ||
|
||
func bind(f func(recordReadOptions, *decode.D), rro recordReadOptions) func(*decode.D) { | ||
return func(d *decode.D) { | ||
f(rro, d) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
### Limitations | ||
|
||
- individual records are not merged and its data further decoded. | ||
|
||
### Authors | ||
|
||
- [@mikez](https://github.com/mikez), original author | ||
|
||
### References | ||
|
||
- https://github.com/google/leveldb/blob/main/doc/log_format.md |
Oops, something went wrong.