Skip to content
This repository was archived by the owner on Mar 29, 2025. It is now read-only.

Adding support for RDB version <= 12 and refactoring #19

Merged
merged 35 commits into from
Mar 29, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
4d52810
Add new rdb version and datatypes
bimtauer Dec 19, 2024
865bea2
Begin port to clap and revised structure
bimtauer Dec 19, 2024
677b503
Refactor, fix previous test, add rust integration test
bimtauer Dec 19, 2024
40bea25
Introduce file output and simplify integration test
bimtauer Dec 19, 2024
a28198f
Change encoding of non ascii chars
bimtauer Dec 22, 2024
7b06391
Implement hash list pack
bimtauer Dec 22, 2024
4decc7b
Begin factoring out formatter from parser
bimtauer Dec 23, 2024
3e25fe5
Further refactor formatter
bimtauer Dec 23, 2024
2d11517
Fix tests for refactored json formatter, cleanup pending
bimtauer Dec 25, 2024
0633c9e
Refactor formatter, generic format method
bimtauer Dec 27, 2024
7aaf4ea
Implement skip for hash list pack
bimtauer Dec 27, 2024
8083786
Add integration tests for plain and protocol format
bimtauer Dec 27, 2024
f5c159b
Improve error handling
bimtauer Dec 27, 2024
036be25
Create protocol based integration test across versions
bimtauer Dec 29, 2024
e4bf3c2
Implement listpack handling, better errors, ensure that all previousl…
bimtauer Dec 29, 2024
0439a7b
Implement sorted set v2 and listpack encodings
bimtauer Dec 30, 2024
3115477
Add logging
bimtauer Dec 30, 2024
c0424ae
Small cleanup
bimtauer Dec 30, 2024
b16a169
Restructure code and public lib interface
bimtauer Jan 1, 2025
ca0cd68
Add python bindings
bimtauer Jan 2, 2025
bac656d
Update readme
bimtauer Jan 2, 2025
419a88c
Update changelog
bimtauer Jan 2, 2025
ead9e1a
Link changelog
bimtauer Jan 2, 2025
b7bcbb5
Fix start and end formatting
bimtauer Jan 2, 2025
fcdf100
Integrate shell script tests fully into rust integration tests
bimtauer Jan 2, 2025
8152adf
Remove makefile
bimtauer Jan 2, 2025
74b5836
Update ignore and rm python version
bimtauer Jan 2, 2025
361a432
Heed clippy's call
bimtauer Jan 2, 2025
279fce9
Add Rust CI workflow
bimtauer Jan 2, 2025
7a5bfac
Clean
bimtauer Jan 13, 2025
cd8a83a
Fix Redis Container Integration Test
bimtauer Jan 14, 2025
817f9b4
Adapt python CI
bimtauer Jan 14, 2025
ef4608a
Prep release
bimtauer Jan 14, 2025
2ce44ab
Removing token should use trusted publisher
bimtauer Jan 14, 2025
2a87c62
Rename python project
bimtauer Jan 15, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,6 @@ clap = { version = "4.4", features = ["derive"] }

[dev-dependencies]
rstest = "0.23.0"

[features]
integration_tests = []
12 changes: 9 additions & 3 deletions src/formatter/json.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#![allow(unused_must_use)]

use super::write_str;
use crate::formatter::Formatter;
use crate::types::EncodingType;
Expand Down Expand Up @@ -32,8 +31,15 @@ impl JSON {
}

fn encode_to_ascii(value: &[u8]) -> String {
let s = unsafe { str::from_utf8_unchecked(value) };
json::encode(&s).unwrap()
match str::from_utf8(value) {
Ok(s) => json::encode(&s).unwrap(),
Err(_) => {
let s: String = value.iter()
.map(|&b| if b < 128 { b as char } else { '\u{FFFD}' })
.collect();
json::encode(&s).unwrap()
}
}
}

impl JSON {
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ extern crate rustc_serialize as serialize;
use std::io::Read;

#[doc(hidden)]
pub use types::{/* error and result types */ RdbError, RdbOk, RdbResult, Type, ZiplistEntry};
pub use types::{/* error and result types */ RdbError, RdbOk, RdbResult, Type};

pub use parser::RdbParser;

Expand Down
4 changes: 4 additions & 0 deletions src/parser/common/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pub mod utils;
mod ziplist;

pub use ziplist::{read_ziplist_entry_string, read_ziplist_metadata};
15 changes: 2 additions & 13 deletions src/parser/utils.rs → src/parser/common/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,15 @@ use byteorder::{BigEndian, LittleEndian, ReadBytesExt};
use lzf;
use std::io::Error as IoError;
use std::io::ErrorKind as IoErrorKind;
use std::io::Result as IoResult;
use std::io::Read;
use std::io::Result as IoResult;
use std::str;


#[doc(hidden)]
use crate::constants::{constant, encoding, version};

#[doc(hidden)]
pub use crate::types::{
RdbOk, RdbResult
};
pub use crate::types::{RdbOk, RdbResult};

#[inline]
pub fn other_error(desc: &'static str) -> IoError {
Expand Down Expand Up @@ -122,14 +119,6 @@ pub fn read_blob<R: Read>(input: &mut R) -> RdbResult<Vec<u8>> {
}
}

pub fn read_ziplist_metadata<T: Read>(input: &mut T) -> RdbResult<(u32, u32, u16)> {
let zlbytes = input.read_u32::<LittleEndian>()?;
let zltail = input.read_u32::<LittleEndian>()?;
let zllen = input.read_u16::<LittleEndian>()?;

Ok((zlbytes, zltail, zllen))
}

pub fn int_to_vec(number: i32) -> Vec<u8> {
let number = number.to_string();
let mut result = Vec::with_capacity(number.len());
Expand Down
105 changes: 105 additions & 0 deletions src/parser/common/ziplist.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
use std::io::Read;

use super::utils::{other_error, read_exact};
use crate::types::RdbResult;
use byteorder::{BigEndian, LittleEndian, ReadBytesExt};

#[derive(Debug, Clone)]
pub enum ZiplistEntry {
String(Vec<u8>),
Number(i64),
}

pub fn read_ziplist_metadata<T: Read>(input: &mut T) -> RdbResult<(u32, u32, u16)> {
let zlbytes = input.read_u32::<LittleEndian>()?;
let zltail = input.read_u32::<LittleEndian>()?;
let zllen = input.read_u16::<LittleEndian>()?;

Ok((zlbytes, zltail, zllen))
}

pub fn read_ziplist_entry_string<R: Read>(input: &mut R) -> RdbResult<Vec<u8>> {
let entry = read_ziplist_entry(input)?;
match entry {
ZiplistEntry::String(val) => Ok(val),
ZiplistEntry::Number(val) => Ok(val.to_string().into_bytes()),
}
}

fn read_ziplist_entry<R: Read>(input: &mut R) -> RdbResult<ZiplistEntry> {
// 1. 1 or 5 bytes length of previous entry
let byte = input.read_u8()?;
if byte == 254 {
let mut bytes = [0; 4];
match input.read(&mut bytes) {
Ok(4) => (),
Ok(_) => {
return Err(other_error(
"Could not read 4 bytes to skip after ziplist length",
))
}
Err(e) => return Err(e),
};
}

let length: u64;
let number_value: i64;

// 2. Read flag or number value
let flag = input.read_u8()?;

match (flag & 0xC0) >> 6 {
0 => length = (flag & 0x3F) as u64,
1 => {
let next_byte = input.read_u8()?;
length = (((flag & 0x3F) as u64) << 8) | next_byte as u64;
}
2 => {
length = input.read_u32::<BigEndian>()? as u64;
}
_ => {
match (flag & 0xF0) >> 4 {
0xC => number_value = input.read_i16::<LittleEndian>()? as i64,
0xD => number_value = input.read_i32::<LittleEndian>()? as i64,
0xE => number_value = input.read_i64::<LittleEndian>()? as i64,
0xF => match flag & 0xF {
0 => {
let mut bytes = [0; 3];
match input.read(&mut bytes) {
Ok(3) => (),
Ok(_) => {
return Err(other_error(
"Could not read enough bytes for 24bit number",
))
}
Err(e) => return Err(e),
};

let number: i32 = (((bytes[2] as i32) << 24)
^ ((bytes[1] as i32) << 16)
^ ((bytes[0] as i32) << 8)
^ 48)
>> 8;

number_value = number as i64;
}
0xE => {
number_value = input.read_i8()? as i64;
}
_ => {
number_value = (flag & 0xF) as i64 - 1;
}
},
_ => {
panic!("Flag not handled: {}", flag);
}
}

return Ok(ZiplistEntry::Number(number_value));
}
}

// 3. Read value
let rawval = read_exact(input, length as usize)?;
Ok(ZiplistEntry::String(rawval))
}
145 changes: 145 additions & 0 deletions src/parser/hash.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
use super::common::utils::{other_error, read_blob, read_exact, read_length};
use super::common::{read_ziplist_entry_string, read_ziplist_metadata};
use crate::formatter::Formatter;
use crate::types::{EncodingType, RdbOk, RdbResult};
use byteorder::{LittleEndian, ReadBytesExt};
use std::io::{Cursor, Read};

pub fn read_hash<R: Read, F: Formatter>(
input: &mut R,
formatter: &mut F,
key: &[u8],
last_expiretime: Option<u64>,
) -> RdbOk {
let mut hash_items = read_length(input)?;

formatter.start_hash(key, hash_items, last_expiretime, EncodingType::Hashtable);

while hash_items > 0 {
let field = read_blob(input)?;
let val = read_blob(input)?;

formatter.hash_element(key, &field, &val);

hash_items -= 1;
}

formatter.end_hash(key);

Ok(())
}

pub fn read_hash_ziplist<R: Read, F: Formatter>(
input: &mut R,
formatter: &mut F,
key: &[u8],
last_expiretime: Option<u64>,
) -> RdbOk {
let ziplist = read_blob(input)?;
let raw_length = ziplist.len() as u64;

let mut reader = Cursor::new(ziplist);
let (_zlbytes, _zltail, zllen) = read_ziplist_metadata(&mut reader)?;

assert!(zllen % 2 == 0);
let zllen = zllen / 2;

formatter.start_hash(
key,
zllen as u32,
last_expiretime,
EncodingType::Ziplist(raw_length),
);

for _ in 0..zllen {
let field = read_ziplist_entry_string(&mut reader)?;
let value = read_ziplist_entry_string(&mut reader)?;
formatter.hash_element(key, &field, &value);
}

let last_byte = reader.read_u8()?;
if last_byte != 0xFF {
return Err(other_error("Invalid end byte of ziplist"));
}

formatter.end_hash(key);

Ok(())
}

pub fn read_hash_zipmap<R: Read, F: Formatter>(
input: &mut R,
formatter: &mut F,
key: &[u8],
last_expiretime: Option<u64>,
) -> RdbOk {
let zipmap = read_blob(input)?;
let raw_length = zipmap.len() as u64;

let mut reader = Cursor::new(zipmap);

let zmlen = reader.read_u8()?;

let mut length: i32;
let size;
if zmlen <= 254 {
length = zmlen as i32;
size = zmlen
} else {
length = -1;
size = 0;
}

formatter.start_hash(
key,
size as u32,
last_expiretime,
EncodingType::Zipmap(raw_length),
);

loop {
let next_byte = reader.read_u8()?;

if next_byte == 0xFF {
break; // End of list.
}

let field = read_zipmap_entry(next_byte, &mut reader)?;

let next_byte = reader.read_u8()?;
let _free = reader.read_u8()?;
let value = read_zipmap_entry(next_byte, &mut reader)?;

formatter.hash_element(key, &field, &value);

if length > 0 {
length -= 1;
}

if length == 0 {
let last_byte = reader.read_u8()?;

if last_byte != 0xFF {
return Err(other_error("Invalid end byte of zipmap"));
}
break;
}
}

formatter.end_hash(key);

Ok(())
}

fn read_zipmap_entry<T: Read>(next_byte: u8, zipmap: &mut T) -> RdbResult<Vec<u8>> {
let elem_len;
match next_byte {
253 => elem_len = zipmap.read_u32::<LittleEndian>().unwrap(),
254 | 255 => {
panic!("Invalid length value in zipmap: {}", next_byte)
}
_ => elem_len = next_byte as u32,
}

read_exact(zipmap, elem_len as usize)
}
Loading