diff --git a/Cargo.lock b/Cargo.lock index d48218afc64..71561804408 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4380,6 +4380,7 @@ dependencies = [ "rand 0.9.2", "roaring", "rstest", + "semver", "serde", "serde_json", "snafu", @@ -4852,6 +4853,7 @@ dependencies = [ "rangemap", "roaring", "rstest", + "semver", "serde", "serde_json", "snafu", diff --git a/Cargo.toml b/Cargo.toml index c01f12dc946..3c1e768e833 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -160,6 +160,7 @@ rstest = "0.23.0" rustc_version = "0.4" serde = { version = "^1" } serde_json = { version = "1" } +semver = "1.0" shellexpand = "3.0" snafu = "0.8" strum = "0.26" diff --git a/python/Cargo.lock b/python/Cargo.lock index 40b0e89de64..b7fb9183e52 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -3867,6 +3867,7 @@ dependencies = [ "prost-types 0.13.5", "rand 0.9.1", "roaring", + "semver", "serde", "serde_json", "snafu", @@ -4232,6 +4233,7 @@ dependencies = [ "rand 0.9.1", "rangemap", "roaring", + "semver", "serde", "serde_json", "snafu", diff --git a/rust/lance-table/Cargo.toml b/rust/lance-table/Cargo.toml index e9f9184d898..961e11d45c3 100644 --- a/rust/lance-table/Cargo.toml +++ b/rust/lance-table/Cargo.toml @@ -38,6 +38,7 @@ rangemap.workspace = true roaring.workspace = true serde.workspace = true serde_json.workspace = true +semver.workspace = true snafu.workspace = true tokio.workspace = true tracing.workspace = true diff --git a/rust/lance-table/src/format/manifest.rs b/rust/lance-table/src/format/manifest.rs index 703f4366b3e..11512065c7c 100644 --- a/rust/lance-table/src/format/manifest.rs +++ b/rust/lance-table/src/format/manifest.rs @@ -662,9 +662,27 @@ pub enum VersionPart { Patch, } +fn bump_version(version: &mut semver::Version, part: VersionPart) { + match part { + VersionPart::Major => { + version.major += 1; + version.minor = 0; + version.patch = 0; + } + VersionPart::Minor => { + version.minor += 1; + version.patch = 0; + } + VersionPart::Patch => { + version.patch += 1; + } + } +} + impl WriterVersion { /// Try to parse the version string as a semver string. Returns None if /// not successful. + #[deprecated(note = "Use `lance_lib_version()` instead")] pub fn semver(&self) -> Option<(u32, u32, u32, Option<&str>)> { // First split by '-' to separate the version from the pre-release tag let (version_part, tag) = if let Some(dash_idx) = self.version.find('-') { @@ -684,33 +702,56 @@ impl WriterVersion { Some((major, minor, patch, tag)) } + /// If the library is "lance", parse the version as semver and return it. + /// Returns None if the library is not "lance" or the version cannot be parsed as semver. + pub fn lance_lib_version(&self) -> Option { + if self.library != "lance" { + return None; + } + + let version = semver::Version::parse(&self.version).ok()?; + Some(version) + } + + #[deprecated( + note = "Use `lance_lib_version()` instead, which safely checks the library field and returns Option" + )] + #[allow(deprecated)] pub fn semver_or_panic(&self) -> (u32, u32, u32, Option<&str>) { self.semver() .unwrap_or_else(|| panic!("Invalid writer version: {}", self.version)) } - /// Return true if self is older than the given major/minor/patch + /// Check if this is a Lance library version older than the given major/minor/patch. + /// + /// # Panics + /// + /// Panics if the library is not "lance" or the version cannot be parsed as semver. + #[deprecated(note = "Use `lance_lib_version()` and its `older_than` method instead.")] pub fn older_than(&self, major: u32, minor: u32, patch: u32) -> bool { - let version = self.semver_or_panic(); - (version.0, version.1, version.2) < (major, minor, patch) + let version = self + .lance_lib_version() + .expect("Not lance library or invalid version"); + let other = semver::Version { + major: major.into(), + minor: minor.into(), + patch: patch.into(), + pre: semver::Prerelease::EMPTY, + build: semver::BuildMetadata::EMPTY, + }; + version < other } + #[deprecated(note = "This is meant for testing and will be made private in future version.")] pub fn bump(&self, part: VersionPart, keep_tag: bool) -> Self { - let parts = self.semver_or_panic(); - let tag = if keep_tag { parts.3 } else { None }; - let new_parts = match part { - VersionPart::Major => (parts.0 + 1, parts.1, parts.2, tag), - VersionPart::Minor => (parts.0, parts.1 + 1, parts.2, tag), - VersionPart::Patch => (parts.0, parts.1, parts.2 + 1, tag), - }; - let new_version = if let Some(tag) = tag { - format!("{}.{}.{}-{}", new_parts.0, new_parts.1, new_parts.2, tag) - } else { - format!("{}.{}.{}", new_parts.0, new_parts.1, new_parts.2) - }; + let mut version = self.lance_lib_version().expect("Should be lance version"); + bump_version(&mut version, part); + if !keep_tag { + version.pre = semver::Prerelease::EMPTY; + } Self { library: self.library.clone(), - version: new_version, + version: version.to_string(), } } } @@ -726,6 +767,7 @@ impl Default for WriterVersion { // Unit tests always run as if they are in the next version. #[cfg(test)] + #[allow(deprecated)] fn default() -> Self { Self { library: "lance".to_string(), @@ -989,7 +1031,7 @@ mod tests { fn test_writer_version() { let wv = WriterVersion::default(); assert_eq!(wv.library, "lance"); - let parts = wv.semver().unwrap(); + let version = wv.lance_lib_version().unwrap(); // Parse the actual cargo version to check if it has a pre-release tag let cargo_version = env!("CARGO_PKG_VERSION"); @@ -1000,27 +1042,24 @@ mod tests { }; assert_eq!( - parts, - ( - env!("CARGO_PKG_VERSION_MAJOR").parse().unwrap(), - env!("CARGO_PKG_VERSION_MINOR").parse().unwrap(), - // Unit tests run against (major,minor,patch + 1) - env!("CARGO_PKG_VERSION_PATCH").parse::().unwrap() + 1, - expected_tag - ) + version.major, + env!("CARGO_PKG_VERSION_MAJOR").parse::().unwrap() + ); + assert_eq!( + version.minor, + env!("CARGO_PKG_VERSION_MINOR").parse::().unwrap() ); - - // Verify the base version (without tag) matches CARGO_PKG_VERSION - let base_version = cargo_version.split('-').next().unwrap(); assert_eq!( - format!("{}.{}.{}", parts.0, parts.1, parts.2 - 1), - base_version + version.patch, + // Unit tests run against (major,minor,patch + 1) + env!("CARGO_PKG_VERSION_PATCH").parse::().unwrap() + 1 ); + assert_eq!(version.pre.as_str(), expected_tag.unwrap_or("")); for part in &[VersionPart::Major, VersionPart::Minor, VersionPart::Patch] { - let bumped = wv.bump(*part, false); - let bumped_parts = bumped.semver_or_panic(); - assert!(wv.older_than(bumped_parts.0, bumped_parts.1, bumped_parts.2)); + let mut bumped_version = version.clone(); + bump_version(&mut bumped_version, *part); + assert!(version < bumped_version); } } diff --git a/rust/lance/Cargo.toml b/rust/lance/Cargo.toml index fb7102db7b2..56770ed306b 100644 --- a/rust/lance/Cargo.toml +++ b/rust/lance/Cargo.toml @@ -80,6 +80,7 @@ aws-sdk-dynamodb = { workspace = true, optional = true } tracing.workspace = true humantime = { workspace = true } async_cell = "0.2.2" +semver.workspace = true tokio-stream = { workspace = true } [target.'cfg(target_os = "linux")'.dev-dependencies] diff --git a/rust/lance/src/io/commit.rs b/rust/lance/src/io/commit.rs index c5e3b5ea334..37a0fead258 100644 --- a/rust/lance/src/io/commit.rs +++ b/rust/lance/src/io/commit.rs @@ -537,9 +537,26 @@ fn must_recalculate_fragment_bitmap( index: &IndexMetadata, version: Option<&WriterVersion>, ) -> bool { + if index.fragment_bitmap.is_none() { + return true; + } // If the fragment bitmap was written by an old version of lance then we need to recalculate // it because it could be corrupt due to a bug in versions < 0.8.15 - index.fragment_bitmap.is_none() || version.map(|v| v.older_than(0, 8, 15)).unwrap_or(true) + if let Some(version) = version { + if version.library != "lance" { + // We assume a different library is not affected by the bug. + return false; + } + + let cutoff = semver::Version::new(0, 8, 15); + version + .lance_lib_version() + .map(|lance_lib_version| lance_lib_version < cutoff) + .unwrap_or(true) + } else { + // Older versions of Lance library didn't record writer version at all. + true + } } /// Update indices with new fields.