Skip to content

Commit 603742c

Browse files
authored
Merge pull request #3 from torymur/btrees
Add Tree View
2 parents 5e64db2 + d8f079b commit 603742c

File tree

10 files changed

+579
-112
lines changed

10 files changed

+579
-112
lines changed

LICENSE

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2024-Present Victoria Terenina
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

+25-3
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,33 @@
22

33
[SQLite](https://www.sqlite.org/) is a C-language library that implements a small, fast, self-contained, high-reliability, full-featured, SQL database engine. SQLite is the most used database engine in the world, built into all mobile phones and most computers and comes bundled inside countless other applications that people use every day.
44

5-
65
The SQLite file format is stable, cross-platform and backwards compatible, the developers pledge to keep it that way through the year 2050.
76

8-
97
All that makes it interesting to peek into their on-disk [database file format](https://www.sqlite.org/fileformat2.html) to understand it for software development objective and troubleshooting reasons, as well as to study format of SQLite databases for academic purposes or regular self-education.
108

119
### Visual
12-
WIP available at https://torymur.github.io/sqlite-repr/
10+
Available at https://torymur.github.io/sqlite-repr/
11+
12+
### Map 🗺️
13+
14+
#### Parser
15+
- [x] Table Interior Btree page
16+
- [x] Table Leaf Btree page
17+
- [x] Index Interior Btree page
18+
- [x] Index Leaf Btree page
19+
- [x] Freelist pages
20+
- [x] Overflow pages
21+
- [x] Spilled record values
22+
- [ ] Spilled record headers (rare)
23+
- [ ] ~~Pointer map pages~~
24+
- [ ] ~~Lock-byte page~~
25+
- [ ] Freeblock & Fragmented bytes
26+
27+
#### UI
28+
- [x] Hybrid, Hex, Text field repr
29+
- [x] Preloaded example databases, details
30+
- [x] Page View
31+
- [x] Tree View
32+
- [ ] Reserved space
33+
- [ ] Add yours
34+
- [ ] Console

parser/src/btree.rs

+188
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
/// Create BTree.
2+
use crate::*;
3+
4+
#[derive(Debug, Clone, PartialEq)]
5+
pub struct BTreeNode {
6+
pub page: Page,
7+
pub page_num: usize,
8+
pub children: Option<Vec<BTreeNode>>,
9+
pub overflow: Option<Vec<OverflowNode>>,
10+
}
11+
12+
#[derive(Debug, Clone, PartialEq)]
13+
pub struct OverflowNode {
14+
pub page: OverflowPage,
15+
pub page_num: usize,
16+
}
17+
18+
impl BTreeNode {
19+
pub fn new(page_num: usize, reader: &Reader) -> Result<Self, StdError> {
20+
let page = reader.get_btree_page(page_num)?;
21+
let mut children = vec![];
22+
let mut overflow = vec![];
23+
24+
let mut extend_overflow = |cell_overflow: &Option<CellOverflow>| {
25+
if let Some(o) = cell_overflow {
26+
let res = Self::follow_overflow(vec![], o.units.to_vec(), o.page as usize, reader);
27+
if let Ok(res) = res {
28+
let mut page_nums =
29+
res.iter().map(|o| o.next_page as usize).collect::<Vec<_>>();
30+
31+
// Transform list of 'next_page' numbers into page numbers.
32+
// Last next page number is 0 to mark the end of the linked list.
33+
page_nums.insert(0, o.page as usize);
34+
page_nums.pop();
35+
36+
let overflow_list = res
37+
.into_iter()
38+
.zip(page_nums.into_iter())
39+
.map(|(o, n)| OverflowNode {
40+
page: o,
41+
page_num: n,
42+
})
43+
.collect::<Vec<OverflowNode>>();
44+
overflow.extend(overflow_list);
45+
}
46+
}
47+
};
48+
49+
for outer_cell in page.cells.iter() {
50+
match outer_cell {
51+
Cell::TableInterior(cell) => {
52+
children.push(BTreeNode::new(cell.left_page_number as usize, reader)?);
53+
}
54+
Cell::TableLeaf(cell) => {
55+
extend_overflow(&cell.overflow);
56+
}
57+
Cell::IndexInterior(cell) => {
58+
children.push(BTreeNode::new(cell.left_page_number as usize, reader)?);
59+
extend_overflow(&cell.overflow);
60+
}
61+
Cell::IndexLeaf(cell) => {
62+
extend_overflow(&cell.overflow);
63+
}
64+
};
65+
}
66+
if page.page_header.page_type.is_interior() {
67+
// Don't forget the right-most pointer, which is in the page header.
68+
// If it's interior page, then page_num is Some by design.
69+
children.push(BTreeNode::new(
70+
page.page_header.page_num.unwrap() as usize,
71+
reader,
72+
)?);
73+
};
74+
75+
Ok(Self {
76+
page,
77+
page_num,
78+
children: (!children.is_empty()).then_some(children),
79+
overflow: (!overflow.is_empty()).then_some(overflow),
80+
})
81+
}
82+
83+
fn follow_overflow(
84+
mut opages: Vec<OverflowPage>,
85+
overflow_units: Vec<OverflowUnit>,
86+
next_page: usize,
87+
reader: &Reader,
88+
) -> Result<Vec<OverflowPage>, StdError> {
89+
let opage = reader.get_overflow_page(overflow_units, next_page)?;
90+
let units = opage.overflow_units.to_vec();
91+
let next_page = opage.next_page;
92+
opages.push(opage);
93+
match next_page {
94+
0 => Ok(opages),
95+
n => Self::follow_overflow(opages, units, n as usize, reader),
96+
}
97+
}
98+
}
99+
100+
#[derive(Debug, Clone, PartialEq)]
101+
pub struct BTree {
102+
pub ttype: String,
103+
pub name: String,
104+
pub root: BTreeNode,
105+
}
106+
107+
#[derive(Debug, Clone, PartialEq)]
108+
pub enum Schema {
109+
Type = 0,
110+
Name = 1,
111+
TableName = 2,
112+
RootPage = 3,
113+
SQL = 4,
114+
}
115+
116+
impl BTree {
117+
pub fn new(cell: &TableLeafCell, reader: &Reader) -> Result<Self, StdError> {
118+
match &cell.overflow {
119+
Some(overflow) => {
120+
let payload = Self::follow_overflow(
121+
cell.payload.values.to_vec(),
122+
overflow.units.to_vec(),
123+
overflow.page as usize,
124+
reader,
125+
)?;
126+
Self::parse_tree(&payload, reader)
127+
}
128+
None => Self::parse_tree(&cell.payload.values, reader),
129+
}
130+
}
131+
132+
fn follow_overflow(
133+
mut payload: Vec<RecordValue>,
134+
overflow_units: Vec<OverflowUnit>,
135+
next_page: usize,
136+
reader: &Reader,
137+
) -> Result<Vec<RecordValue>, StdError> {
138+
// We need to merge last of previous with the first of overflow value and
139+
// add values in between to payload.
140+
/*
141+
* Btree page Overflow page 1 Overflow page 2
142+
* +-----------+ +----------------------------+ +----------+
143+
* | field1 | -> | field1 | field2 | field3 | -> | field3 |
144+
* +-----------+ +----------------------------+ +----------+
145+
* ↓ ↓ | ↓ ↓
146+
* +----------------+ ↓ +---------------+
147+
* merge as it is merge
148+
*/
149+
let opage = reader.get_overflow_page(overflow_units, next_page)?;
150+
151+
let mut overflow = opage.data.to_vec();
152+
let last_payload = payload.remove(payload.len() - 1);
153+
let first_overflow = overflow.remove(0);
154+
match last_payload.merge(first_overflow.value) {
155+
Some(value) => payload.push(value),
156+
None => unreachable!("Attempt to merge the unexpected Record types."),
157+
};
158+
payload.extend(overflow.into_iter().map(|v| v.value));
159+
160+
match opage.next_page {
161+
0 => Ok(payload),
162+
n => Self::follow_overflow(payload, opage.overflow_units, n as usize, reader),
163+
}
164+
}
165+
166+
fn parse_tree(values: &[RecordValue], reader: &Reader) -> Result<Self, StdError> {
167+
let tname = match &values[Schema::Name as usize].value {
168+
RecordType::Text(v) => v.as_ref().map_or("", |vv| vv),
169+
_ => unreachable!("Unknown type for table schema name."),
170+
};
171+
let ttype = match &values[Schema::Type as usize].value {
172+
RecordType::Text(v) => v.as_ref().map_or("", |vv| vv),
173+
_ => unreachable!("Unknown type for table schema type."),
174+
};
175+
let tpage = match values[Schema::RootPage as usize].value {
176+
RecordType::I8(v) => v as usize,
177+
RecordType::I16(v) => v as usize,
178+
RecordType::I24(v) | RecordType::I32(v) => v as usize,
179+
RecordType::I48(v) | RecordType::I64(v) => v as usize,
180+
_ => unreachable!("Unknown type for table schema root page."),
181+
};
182+
Ok(Self {
183+
ttype: ttype.to_string(),
184+
name: tname.to_string(),
185+
root: BTreeNode::new(tpage, reader)?,
186+
})
187+
}
188+
}

parser/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! Experimentation around sqlite internal format parsing, based on https://www.sqlite.org/fileformat2.html
22
#![feature(str_from_utf16_endian)]
33

4+
pub mod btree;
45
pub mod cell;
56
pub mod freelist;
67
pub mod header;
@@ -10,6 +11,7 @@ pub mod reader;
1011
pub mod record;
1112
pub mod varint;
1213

14+
pub use btree::{BTree, BTreeNode};
1315
pub use cell::{
1416
Cell, CellOverflow, IndexInteriorCell, IndexLeafCell, TableInteriorCell, TableLeafCell,
1517
};

parser/src/reader.rs

+43-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
use crate::{
2-
DBHeader, LeafFreelistPage, OverflowPage, OverflowUnit, Page, Result, StdError,
3-
TrunkFreelistPage,
4-
};
1+
use crate::*;
52
use std::rc::Rc;
63

74
pub const DB_HEADER_SIZE: usize = 100;
@@ -63,6 +60,22 @@ impl Reader {
6360
Ok(page)
6461
}
6562

63+
/// Create btrees.
64+
pub fn get_btrees(&self) -> Result<Vec<BTree>, StdError> {
65+
// Schema page is always a table b-tree and always has a root page of 1.
66+
let mut cells = vec![];
67+
let _ = self.collect_cells(1, &mut cells);
68+
let mut trees = vec![BTree {
69+
ttype: "table".to_string(),
70+
name: "master schema".to_string(),
71+
root: BTreeNode::new(1, self)?,
72+
}];
73+
for cell in cells {
74+
trees.push(BTree::new(&cell, self)?);
75+
}
76+
Ok(trees)
77+
}
78+
6679
/// Get an actual number of total pages per database file.
6780
pub fn pages_total(&self) -> usize {
6881
// Based on docs descriptions, db_size is valid only if:
@@ -80,6 +93,32 @@ impl Reader {
8093
}
8194
}
8295

96+
fn collect_cells(
97+
&self,
98+
page_num: usize,
99+
cells: &mut Vec<TableLeafCell>,
100+
) -> Result<(), StdError> {
101+
let page = self.get_btree_page(page_num)?;
102+
for outer_cell in page.cells.iter() {
103+
match outer_cell {
104+
Cell::TableInterior(cell) => {
105+
// No overflow, but we need to follow references to the leaves.
106+
self.collect_cells(cell.left_page_number as usize, cells)?;
107+
}
108+
Cell::TableLeaf(cell) => {
109+
cells.push(cell.clone());
110+
}
111+
_ => {}
112+
};
113+
}
114+
if page.page_header.page_type.is_interior() {
115+
// Don't forget the right-most pointer, which is in the page header.
116+
// If it's interior page, then page_num is Some by design.
117+
self.collect_cells(page.page_header.page_num.unwrap() as usize, cells)?;
118+
}
119+
Ok(())
120+
}
121+
83122
fn page_slice(&self, page_num: usize) -> Result<Vec<u8>, StdError> {
84123
self.validate_page_bounds(page_num)?;
85124
let page_offset = self.page_offset(page_num);

parser/src/record.rs

+30
Original file line numberDiff line numberDiff line change
@@ -242,4 +242,34 @@ impl RecordValue {
242242
_ => unreachable!("Record Value of unknown serial type."),
243243
}
244244
}
245+
246+
/// Merging RecordValues is helpful to create full payload when spilled over.
247+
pub fn merge(self, rhs: RecordValue) -> Option<RecordValue> {
248+
// Only Text & Blob types can be meaningfully merged together.
249+
match (&self.value, &rhs.value) {
250+
(RecordType::Text(lval), RecordType::Text(rval)) => match (lval, rval) {
251+
(Some(l), Some(r)) => {
252+
let value = RecordType::Text(Some(format!("{}{}", l, r)));
253+
// Bytes are guaranteed to be Some if value is Some by design.
254+
let bytes = Some([self.bytes.unwrap(), rhs.bytes.unwrap()].concat());
255+
Some(RecordValue { bytes, value })
256+
}
257+
(None, Some(_)) => Some(rhs),
258+
(Some(_), None) => Some(self),
259+
(None, None) => None,
260+
},
261+
(RecordType::Blob(lval), RecordType::Blob(rval)) => match (lval, rval) {
262+
(Some(l), Some(r)) => {
263+
let value = RecordType::Blob(Some([l.to_vec(), r.to_vec()].concat()));
264+
// Bytes are guaranteed to be Some if value is Some by design.
265+
let bytes = Some([self.bytes.unwrap(), rhs.bytes.unwrap()].concat());
266+
Some(RecordValue { bytes, value })
267+
}
268+
(None, Some(_)) => Some(rhs),
269+
(Some(_), None) => Some(self),
270+
(None, None) => None,
271+
},
272+
_ => None,
273+
}
274+
}
245275
}

ui/assets/tailwind.css

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)