Skip to content

Commit eb98da3

Browse files
committed
add maxRowIndex to dv descriptor
1 parent be69dcf commit eb98da3

File tree

7 files changed

+21
-8
lines changed

7 files changed

+21
-8
lines changed

kernel/src/actions/deletion_vector.rs

+7
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ pub struct DeletionVectorDescriptor {
4141

4242
/// Number of rows the given DV logically removes from the file.
4343
pub cardinality: i64,
44+
45+
/// Used in row tracking (TODO: Update with details when this is added to spec)
46+
pub max_row_index: Option<i64>,
4447
}
4548

4649
impl DeletionVectorDescriptor {
@@ -240,6 +243,7 @@ mod tests {
240243
offset: Some(4),
241244
size_in_bytes: 40,
242245
cardinality: 6,
246+
max_row_index: None,
243247
}
244248
}
245249

@@ -251,6 +255,7 @@ mod tests {
251255
offset: Some(4),
252256
size_in_bytes: 40,
253257
cardinality: 6,
258+
max_row_index: None,
254259
}
255260
}
256261

@@ -262,6 +267,7 @@ mod tests {
262267
offset: None,
263268
size_in_bytes: 44,
264269
cardinality: 6,
270+
max_row_index: None,
265271
}
266272
}
267273

@@ -272,6 +278,7 @@ mod tests {
272278
offset: Some(1),
273279
size_in_bytes: 36,
274280
cardinality: 2,
281+
max_row_index: None,
275282
}
276283
}
277284

kernel/src/actions/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ mod tests {
304304
StructField::new("offset", DataType::INTEGER, true),
305305
StructField::new("sizeInBytes", DataType::INTEGER, false),
306306
StructField::new("cardinality", DataType::LONG, false),
307+
StructField::new("maxRowIndex", DataType::LONG, true),
307308
]))),
308309
true,
309310
)

kernel/src/actions/visitors.rs

+8-5
Original file line numberDiff line numberDiff line change
@@ -139,11 +139,11 @@ impl AddVisitor {
139139

140140
let deletion_vector = visit_deletion_vector_at(row_index, &getters[7..])?;
141141

142-
let base_row_id: Option<i64> = getters[12].get_opt(row_index, "add.base_row_id")?;
142+
let base_row_id: Option<i64> = getters[13].get_opt(row_index, "add.base_row_id")?;
143143
let default_row_commit_version: Option<i64> =
144-
getters[13].get_opt(row_index, "add.default_row_commit")?;
144+
getters[14].get_opt(row_index, "add.default_row_commit")?;
145145
let clustering_provider: Option<String> =
146-
getters[14].get_opt(row_index, "add.clustering_provider")?;
146+
getters[15].get_opt(row_index, "add.clustering_provider")?;
147147

148148
Ok(Add {
149149
path,
@@ -198,9 +198,9 @@ impl RemoveVisitor {
198198

199199
let deletion_vector = visit_deletion_vector_at(row_index, &getters[7..])?;
200200

201-
let base_row_id: Option<i64> = getters[12].get_opt(row_index, "remove.baseRowId")?;
201+
let base_row_id: Option<i64> = getters[13].get_opt(row_index, "remove.baseRowId")?;
202202
let default_row_commit_version: Option<i64> =
203-
getters[13].get_opt(row_index, "remove.defaultRowCommitVersion")?;
203+
getters[14].get_opt(row_index, "remove.defaultRowCommitVersion")?;
204204

205205
Ok(Remove {
206206
path,
@@ -306,12 +306,15 @@ pub(crate) fn visit_deletion_vector_at<'a>(
306306
let offset: Option<i32> = getters[2].get_opt(row_index, "deletionVector.offset")?;
307307
let size_in_bytes: i32 = getters[3].get(row_index, "deletionVector.sizeInBytes")?;
308308
let cardinality: i64 = getters[4].get(row_index, "deletionVector.cardinality")?;
309+
let max_row_index: Option<i64> =
310+
getters[5].get_opt(row_index, "deletionVector.maxRowIndex")?;
309311
Ok(Some(DeletionVectorDescriptor {
310312
storage_type,
311313
path_or_inline_dv,
312314
offset,
313315
size_in_bytes,
314316
cardinality,
317+
max_row_index,
315318
}))
316319
} else {
317320
Ok(None)

kernel/src/engine/arrow_expression.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ fn column_as_struct<'a>(
162162
}
163163

164164
fn make_arrow_error(s: String) -> Error {
165-
Error::Arrow(arrow_schema::ArrowError::InvalidArgumentError(s))
165+
Error::Arrow(arrow_schema::ArrowError::InvalidArgumentError(s)).with_backtrace()
166166
}
167167

168168
/// Ensure a kernel data type matches an arrow data type. This only ensures that the actual "type"

kernel/src/scan/log_replay.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ struct AddRemoveVisitor {
3232
is_log_batch: bool,
3333
}
3434

35-
const ADD_FIELD_COUNT: usize = 15;
35+
const ADD_FIELD_COUNT: usize = 16;
3636

3737
impl AddRemoveVisitor {
3838
fn new(selection_vector: Option<Vec<bool>>, is_log_batch: bool) -> Self {
@@ -92,6 +92,7 @@ lazy_static! {
9292
StructField::new("offset", DataType::INTEGER, true),
9393
StructField::new("sizeInBytes", DataType::INTEGER, false),
9494
StructField::new("cardinality", DataType::LONG, false),
95+
StructField::new("maxRowIndex", DataType::LONG, true),
9596
]),
9697
true
9798
),

kernel/src/scan/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,7 @@ impl Scan {
359359
/// offset: int,
360360
/// sizeInBytes: int,
361361
/// cardinality: long,
362+
/// maxRowIndex: long
362363
/// },
363364
/// fileConstantValues: {
364365
/// partitionValues: map<string, string>

kernel/src/scan/state.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ impl<T> DataVisitor for ScanFileVisitor<'_, T> {
122122
let deletion_vector = visit_deletion_vector_at(row_index, &getters[dv_index..])?;
123123
let dv_info = DvInfo { deletion_vector };
124124
let partition_values =
125-
getters[8].get(row_index, "scanFile.fileConstantValues.partitionValues")?;
125+
getters[9].get(row_index, "scanFile.fileConstantValues.partitionValues")?;
126126
(self.callback)(&mut self.context, path, size, dv_info, partition_values)
127127
}
128128
}

0 commit comments

Comments
 (0)