From 714e8f361173e49e7eeafb524c3cfb7fd6b85e97 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 30 Oct 2025 09:47:11 -0700 Subject: [PATCH 1/4] fix: no panic on unknown version --- rust/lance-table/src/format/manifest.rs | 40 +++++++++++++++++++++---- rust/lance/src/io/commit.rs | 2 +- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/rust/lance-table/src/format/manifest.rs b/rust/lance-table/src/format/manifest.rs index 703f4366b3e..6ade8f37303 100644 --- a/rust/lance-table/src/format/manifest.rs +++ b/rust/lance-table/src/format/manifest.rs @@ -619,6 +619,14 @@ pub struct WriterVersion { pub version: String, } +/// A parsed Lance library version with major, minor, and patch components. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct LanceVersion { + pub major: u32, + pub minor: u32, + pub patch: u32, +} + #[derive(Debug, Clone, PartialEq, DeepSizeOf)] pub struct DataStorageFormat { pub file_format: String, @@ -684,15 +692,35 @@ impl WriterVersion { Some((major, minor, patch, tag)) } + /// Parse the version as a Lance library version. + /// Returns None if the library is not "lance" or the version cannot be parsed as semver. + pub fn lance_version(&self) -> Option { + if self.library != "lance" { + return None; + } + + self.semver() + .map(|(major, minor, patch, _tag)| LanceVersion { + major, + minor, + patch, + }) + } + + #[deprecated( + since = "0.38.4", + note = "Use `lance_version()` instead, which safely checks the library field and returns Option" + )] pub fn semver_or_panic(&self) -> (u32, u32, u32, Option<&str>) { self.semver() .unwrap_or_else(|| panic!("Invalid writer version: {}", self.version)) } - /// Return true if self is older than the given major/minor/patch - pub fn older_than(&self, major: u32, minor: u32, patch: u32) -> bool { - let version = self.semver_or_panic(); - (version.0, version.1, version.2) < (major, minor, patch) + /// Check if this is a Lance library version older than the given major/minor/patch. + /// Returns None if the library is not "lance" or the version cannot be parsed. + pub fn older_than(&self, major: u32, minor: u32, patch: u32) -> Option { + let version = self.lance_version()?; + Some((version.major, version.minor, version.patch) < (major, minor, patch)) } pub fn bump(&self, part: VersionPart, keep_tag: bool) -> Self { @@ -1020,7 +1048,9 @@ mod tests { for part in &[VersionPart::Major, VersionPart::Minor, VersionPart::Patch] { let bumped = wv.bump(*part, false); let bumped_parts = bumped.semver_or_panic(); - assert!(wv.older_than(bumped_parts.0, bumped_parts.1, bumped_parts.2)); + assert!(wv + .older_than(bumped_parts.0, bumped_parts.1, bumped_parts.2) + .expect("Valid Lance version should be comparable")); } } diff --git a/rust/lance/src/io/commit.rs b/rust/lance/src/io/commit.rs index c5e3b5ea334..b0aebc1c598 100644 --- a/rust/lance/src/io/commit.rs +++ b/rust/lance/src/io/commit.rs @@ -539,7 +539,7 @@ fn must_recalculate_fragment_bitmap( ) -> bool { // If the fragment bitmap was written by an old version of lance then we need to recalculate // it because it could be corrupt due to a bug in versions < 0.8.15 - index.fragment_bitmap.is_none() || version.map(|v| v.older_than(0, 8, 15)).unwrap_or(true) + index.fragment_bitmap.is_none() || version.and_then(|v| v.older_than(0, 8, 15)).unwrap_or(true) } /// Update indices with new fields. From 0c26d72bd5852622465772f1f147491d9ac73a72 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 30 Oct 2025 13:22:41 -0700 Subject: [PATCH 2/4] refactor: use semver crate for version parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, version parsing was done manually with string splitting. This changes LanceVersion to wrap semver::Version, providing robust parsing and proper handling of pre-release versions like beta.1. Also renames lance_version() to lance_lib_version() to clarify that it refers to the library version, not the file format version. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- Cargo.lock | 1 + Cargo.toml | 1 + rust/lance-table/Cargo.toml | 1 + rust/lance-table/src/format/manifest.rs | 65 ++++++++++++++----------- 4 files changed, 39 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d48218afc64..91b189f00e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4852,6 +4852,7 @@ dependencies = [ "rangemap", "roaring", "rstest", + "semver", "serde", "serde_json", "snafu", diff --git a/Cargo.toml b/Cargo.toml index c01f12dc946..3c1e768e833 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -160,6 +160,7 @@ rstest = "0.23.0" rustc_version = "0.4" serde = { version = "^1" } serde_json = { version = "1" } +semver = "1.0" shellexpand = "3.0" snafu = "0.8" strum = "0.26" diff --git a/rust/lance-table/Cargo.toml b/rust/lance-table/Cargo.toml index e9f9184d898..961e11d45c3 100644 --- a/rust/lance-table/Cargo.toml +++ b/rust/lance-table/Cargo.toml @@ -38,6 +38,7 @@ rangemap.workspace = true roaring.workspace = true serde.workspace = true serde_json.workspace = true +semver.workspace = true snafu.workspace = true tokio.workspace = true tracing.workspace = true diff --git a/rust/lance-table/src/format/manifest.rs b/rust/lance-table/src/format/manifest.rs index 6ade8f37303..979460ce36b 100644 --- a/rust/lance-table/src/format/manifest.rs +++ b/rust/lance-table/src/format/manifest.rs @@ -619,12 +619,24 @@ pub struct WriterVersion { pub version: String, } -/// A parsed Lance library version with major, minor, and patch components. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub struct LanceVersion { - pub major: u32, - pub minor: u32, - pub patch: u32, +/// A parsed Lance library version. +/// +/// This is a wrapper around semver::Version that provides Lance-specific +/// version comparison methods. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct LanceVersion(semver::Version); + +impl LanceVersion { + /// Check if this version is older than the given major/minor/patch. + pub fn older_than(&self, major: u32, minor: u32, patch: u32) -> bool { + let other = semver::Version::new(major.into(), minor.into(), patch.into()); + self.0 < other + } + + /// Get the underlying semver::Version. + pub fn as_semver(&self) -> &semver::Version { + &self.0 + } } #[derive(Debug, Clone, PartialEq, DeepSizeOf)] @@ -674,42 +686,37 @@ impl WriterVersion { /// Try to parse the version string as a semver string. Returns None if /// not successful. pub fn semver(&self) -> Option<(u32, u32, u32, Option<&str>)> { - // First split by '-' to separate the version from the pre-release tag - let (version_part, tag) = if let Some(dash_idx) = self.version.find('-') { - ( - &self.version[..dash_idx], - Some(&self.version[dash_idx + 1..]), - ) + let version = semver::Version::parse(&self.version).ok()?; + + // Extract the pre-release tag from the original string to maintain lifetime + let tag = if version.pre.is_empty() { + None } else { - (self.version.as_str(), None) + self.version.split_once('-').map(|(_, tag)| tag) }; - let mut parts = version_part.split('.'); - let major = parts.next().unwrap_or("0").parse().ok()?; - let minor = parts.next().unwrap_or("0").parse().ok()?; - let patch = parts.next().unwrap_or("0").parse().ok()?; - - Some((major, minor, patch, tag)) + Some(( + version.major as u32, + version.minor as u32, + version.patch as u32, + tag, + )) } /// Parse the version as a Lance library version. /// Returns None if the library is not "lance" or the version cannot be parsed as semver. - pub fn lance_version(&self) -> Option { + pub fn lance_lib_version(&self) -> Option { if self.library != "lance" { return None; } - self.semver() - .map(|(major, minor, patch, _tag)| LanceVersion { - major, - minor, - patch, - }) + let version = semver::Version::parse(&self.version).ok()?; + Some(LanceVersion(version)) } #[deprecated( since = "0.38.4", - note = "Use `lance_version()` instead, which safely checks the library field and returns Option" + note = "Use `lance_lib_version()` instead, which safely checks the library field and returns Option" )] pub fn semver_or_panic(&self) -> (u32, u32, u32, Option<&str>) { self.semver() @@ -719,8 +726,8 @@ impl WriterVersion { /// Check if this is a Lance library version older than the given major/minor/patch. /// Returns None if the library is not "lance" or the version cannot be parsed. pub fn older_than(&self, major: u32, minor: u32, patch: u32) -> Option { - let version = self.lance_version()?; - Some((version.major, version.minor, version.patch) < (major, minor, patch)) + let version = self.lance_lib_version()?; + Some(version.older_than(major, minor, patch)) } pub fn bump(&self, part: VersionPart, keep_tag: bool) -> Self { From 6f8eae3bf39530b1ff999b339194049e0609f4bc Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 30 Oct 2025 15:15:20 -0700 Subject: [PATCH 3/4] preserve existing API, use semver --- Cargo.lock | 1 + rust/lance-table/src/format/manifest.rs | 146 ++++++++++++------------ rust/lance/Cargo.toml | 1 + rust/lance/src/io/commit.rs | 19 ++- 4 files changed, 94 insertions(+), 73 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 91b189f00e9..71561804408 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4380,6 +4380,7 @@ dependencies = [ "rand 0.9.2", "roaring", "rstest", + "semver", "serde", "serde_json", "snafu", diff --git a/rust/lance-table/src/format/manifest.rs b/rust/lance-table/src/format/manifest.rs index 979460ce36b..11512065c7c 100644 --- a/rust/lance-table/src/format/manifest.rs +++ b/rust/lance-table/src/format/manifest.rs @@ -619,26 +619,6 @@ pub struct WriterVersion { pub version: String, } -/// A parsed Lance library version. -/// -/// This is a wrapper around semver::Version that provides Lance-specific -/// version comparison methods. -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub struct LanceVersion(semver::Version); - -impl LanceVersion { - /// Check if this version is older than the given major/minor/patch. - pub fn older_than(&self, major: u32, minor: u32, patch: u32) -> bool { - let other = semver::Version::new(major.into(), minor.into(), patch.into()); - self.0 < other - } - - /// Get the underlying semver::Version. - pub fn as_semver(&self) -> &semver::Version { - &self.0 - } -} - #[derive(Debug, Clone, PartialEq, DeepSizeOf)] pub struct DataStorageFormat { pub file_format: String, @@ -682,70 +662,96 @@ pub enum VersionPart { Patch, } +fn bump_version(version: &mut semver::Version, part: VersionPart) { + match part { + VersionPart::Major => { + version.major += 1; + version.minor = 0; + version.patch = 0; + } + VersionPart::Minor => { + version.minor += 1; + version.patch = 0; + } + VersionPart::Patch => { + version.patch += 1; + } + } +} + impl WriterVersion { /// Try to parse the version string as a semver string. Returns None if /// not successful. + #[deprecated(note = "Use `lance_lib_version()` instead")] pub fn semver(&self) -> Option<(u32, u32, u32, Option<&str>)> { - let version = semver::Version::parse(&self.version).ok()?; - - // Extract the pre-release tag from the original string to maintain lifetime - let tag = if version.pre.is_empty() { - None + // First split by '-' to separate the version from the pre-release tag + let (version_part, tag) = if let Some(dash_idx) = self.version.find('-') { + ( + &self.version[..dash_idx], + Some(&self.version[dash_idx + 1..]), + ) } else { - self.version.split_once('-').map(|(_, tag)| tag) + (self.version.as_str(), None) }; - Some(( - version.major as u32, - version.minor as u32, - version.patch as u32, - tag, - )) + let mut parts = version_part.split('.'); + let major = parts.next().unwrap_or("0").parse().ok()?; + let minor = parts.next().unwrap_or("0").parse().ok()?; + let patch = parts.next().unwrap_or("0").parse().ok()?; + + Some((major, minor, patch, tag)) } - /// Parse the version as a Lance library version. + /// If the library is "lance", parse the version as semver and return it. /// Returns None if the library is not "lance" or the version cannot be parsed as semver. - pub fn lance_lib_version(&self) -> Option { + pub fn lance_lib_version(&self) -> Option { if self.library != "lance" { return None; } let version = semver::Version::parse(&self.version).ok()?; - Some(LanceVersion(version)) + Some(version) } #[deprecated( - since = "0.38.4", note = "Use `lance_lib_version()` instead, which safely checks the library field and returns Option" )] + #[allow(deprecated)] pub fn semver_or_panic(&self) -> (u32, u32, u32, Option<&str>) { self.semver() .unwrap_or_else(|| panic!("Invalid writer version: {}", self.version)) } /// Check if this is a Lance library version older than the given major/minor/patch. - /// Returns None if the library is not "lance" or the version cannot be parsed. - pub fn older_than(&self, major: u32, minor: u32, patch: u32) -> Option { - let version = self.lance_lib_version()?; - Some(version.older_than(major, minor, patch)) + /// + /// # Panics + /// + /// Panics if the library is not "lance" or the version cannot be parsed as semver. + #[deprecated(note = "Use `lance_lib_version()` and its `older_than` method instead.")] + pub fn older_than(&self, major: u32, minor: u32, patch: u32) -> bool { + let version = self + .lance_lib_version() + .expect("Not lance library or invalid version"); + let other = semver::Version { + major: major.into(), + minor: minor.into(), + patch: patch.into(), + pre: semver::Prerelease::EMPTY, + build: semver::BuildMetadata::EMPTY, + }; + version < other } + #[deprecated(note = "This is meant for testing and will be made private in future version.")] pub fn bump(&self, part: VersionPart, keep_tag: bool) -> Self { - let parts = self.semver_or_panic(); - let tag = if keep_tag { parts.3 } else { None }; - let new_parts = match part { - VersionPart::Major => (parts.0 + 1, parts.1, parts.2, tag), - VersionPart::Minor => (parts.0, parts.1 + 1, parts.2, tag), - VersionPart::Patch => (parts.0, parts.1, parts.2 + 1, tag), - }; - let new_version = if let Some(tag) = tag { - format!("{}.{}.{}-{}", new_parts.0, new_parts.1, new_parts.2, tag) - } else { - format!("{}.{}.{}", new_parts.0, new_parts.1, new_parts.2) - }; + let mut version = self.lance_lib_version().expect("Should be lance version"); + bump_version(&mut version, part); + if !keep_tag { + version.pre = semver::Prerelease::EMPTY; + } Self { library: self.library.clone(), - version: new_version, + version: version.to_string(), } } } @@ -761,6 +767,7 @@ impl Default for WriterVersion { // Unit tests always run as if they are in the next version. #[cfg(test)] + #[allow(deprecated)] fn default() -> Self { Self { library: "lance".to_string(), @@ -1024,7 +1031,7 @@ mod tests { fn test_writer_version() { let wv = WriterVersion::default(); assert_eq!(wv.library, "lance"); - let parts = wv.semver().unwrap(); + let version = wv.lance_lib_version().unwrap(); // Parse the actual cargo version to check if it has a pre-release tag let cargo_version = env!("CARGO_PKG_VERSION"); @@ -1035,29 +1042,24 @@ mod tests { }; assert_eq!( - parts, - ( - env!("CARGO_PKG_VERSION_MAJOR").parse().unwrap(), - env!("CARGO_PKG_VERSION_MINOR").parse().unwrap(), - // Unit tests run against (major,minor,patch + 1) - env!("CARGO_PKG_VERSION_PATCH").parse::().unwrap() + 1, - expected_tag - ) + version.major, + env!("CARGO_PKG_VERSION_MAJOR").parse::().unwrap() + ); + assert_eq!( + version.minor, + env!("CARGO_PKG_VERSION_MINOR").parse::().unwrap() ); - - // Verify the base version (without tag) matches CARGO_PKG_VERSION - let base_version = cargo_version.split('-').next().unwrap(); assert_eq!( - format!("{}.{}.{}", parts.0, parts.1, parts.2 - 1), - base_version + version.patch, + // Unit tests run against (major,minor,patch + 1) + env!("CARGO_PKG_VERSION_PATCH").parse::().unwrap() + 1 ); + assert_eq!(version.pre.as_str(), expected_tag.unwrap_or("")); for part in &[VersionPart::Major, VersionPart::Minor, VersionPart::Patch] { - let bumped = wv.bump(*part, false); - let bumped_parts = bumped.semver_or_panic(); - assert!(wv - .older_than(bumped_parts.0, bumped_parts.1, bumped_parts.2) - .expect("Valid Lance version should be comparable")); + let mut bumped_version = version.clone(); + bump_version(&mut bumped_version, *part); + assert!(version < bumped_version); } } diff --git a/rust/lance/Cargo.toml b/rust/lance/Cargo.toml index fb7102db7b2..56770ed306b 100644 --- a/rust/lance/Cargo.toml +++ b/rust/lance/Cargo.toml @@ -80,6 +80,7 @@ aws-sdk-dynamodb = { workspace = true, optional = true } tracing.workspace = true humantime = { workspace = true } async_cell = "0.2.2" +semver.workspace = true tokio-stream = { workspace = true } [target.'cfg(target_os = "linux")'.dev-dependencies] diff --git a/rust/lance/src/io/commit.rs b/rust/lance/src/io/commit.rs index b0aebc1c598..37a0fead258 100644 --- a/rust/lance/src/io/commit.rs +++ b/rust/lance/src/io/commit.rs @@ -537,9 +537,26 @@ fn must_recalculate_fragment_bitmap( index: &IndexMetadata, version: Option<&WriterVersion>, ) -> bool { + if index.fragment_bitmap.is_none() { + return true; + } // If the fragment bitmap was written by an old version of lance then we need to recalculate // it because it could be corrupt due to a bug in versions < 0.8.15 - index.fragment_bitmap.is_none() || version.and_then(|v| v.older_than(0, 8, 15)).unwrap_or(true) + if let Some(version) = version { + if version.library != "lance" { + // We assume a different library is not affected by the bug. + return false; + } + + let cutoff = semver::Version::new(0, 8, 15); + version + .lance_lib_version() + .map(|lance_lib_version| lance_lib_version < cutoff) + .unwrap_or(true) + } else { + // Older versions of Lance library didn't record writer version at all. + true + } } /// Update indices with new fields. From 763435d1eeae58f2915938870bcd7d802908e193 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 30 Oct 2025 15:23:50 -0700 Subject: [PATCH 4/4] python lockfile --- python/Cargo.lock | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/Cargo.lock b/python/Cargo.lock index 40b0e89de64..b7fb9183e52 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -3867,6 +3867,7 @@ dependencies = [ "prost-types 0.13.5", "rand 0.9.1", "roaring", + "semver", "serde", "serde_json", "snafu", @@ -4232,6 +4233,7 @@ dependencies = [ "rand 0.9.1", "rangemap", "roaring", + "semver", "serde", "serde_json", "snafu",