Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 35 additions & 21 deletions java/lance-jni/src/blocking_dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,10 +283,12 @@ impl BlockingDataset {
&mut self,
transaction: Transaction,
store_params: ObjectStoreParams,
detached: bool,
) -> Result<Self> {
let new_dataset = RT.block_on(
CommitBuilder::new(Arc::new(self.clone().inner))
.with_store_params(store_params)
.with_detached(detached)
.execute(transaction),
)?;
Ok(BlockingDataset { inner: new_dataset })
Expand Down Expand Up @@ -322,13 +324,14 @@ pub extern "system" fn Java_org_lance_Dataset_createWithFfiSchema<'local>(
_obj: JObject,
arrow_schema_addr: jlong,
path: JString,
max_rows_per_file: JObject, // Optional<Integer>
max_rows_per_group: JObject, // Optional<Integer>
max_bytes_per_file: JObject, // Optional<Long>
mode: JObject, // Optional<String>
enable_stable_row_ids: JObject, // Optional<Boolean>
data_storage_version: JObject, // Optional<String>
storage_options_obj: JObject, // Map<String, String>
max_rows_per_file: JObject, // Optional<Integer>
max_rows_per_group: JObject, // Optional<Integer>
max_bytes_per_file: JObject, // Optional<Long>
mode: JObject, // Optional<String>
enable_stable_row_ids: JObject, // Optional<Boolean>
data_storage_version: JObject, // Optional<String>
enable_v2_manifest_paths: JObject, // Optional<Boolean>
storage_options_obj: JObject, // Map<String, String>
s3_credentials_refresh_offset_seconds_obj: JObject, // Optional<Long>
initial_bases: JObject,
target_bases: JObject,
Expand All @@ -345,6 +348,7 @@ pub extern "system" fn Java_org_lance_Dataset_createWithFfiSchema<'local>(
mode,
enable_stable_row_ids,
data_storage_version,
enable_v2_manifest_paths,
storage_options_obj,
s3_credentials_refresh_offset_seconds_obj,
initial_bases,
Expand All @@ -358,13 +362,14 @@ fn inner_create_with_ffi_schema<'local>(
env: &mut JNIEnv<'local>,
arrow_schema_addr: jlong,
path: JString,
max_rows_per_file: JObject, // Optional<Integer>
max_rows_per_group: JObject, // Optional<Integer>
max_bytes_per_file: JObject, // Optional<Long>
mode: JObject, // Optional<String>
enable_stable_row_ids: JObject, // Optional<Boolean>
data_storage_version: JObject, // Optional<String>
storage_options_obj: JObject, // Map<String, String>
max_rows_per_file: JObject, // Optional<Integer>
max_rows_per_group: JObject, // Optional<Integer>
max_bytes_per_file: JObject, // Optional<Long>
mode: JObject, // Optional<String>
enable_stable_row_ids: JObject, // Optional<Boolean>
data_storage_version: JObject, // Optional<String>
enable_v2_manifest_paths: JObject, // Optional<Boolean>
storage_options_obj: JObject, // Map<String, String>
s3_credentials_refresh_offset_seconds_obj: JObject, // Optional<Long>
initial_bases: JObject,
target_bases: JObject,
Expand All @@ -383,6 +388,7 @@ fn inner_create_with_ffi_schema<'local>(
mode,
enable_stable_row_ids,
data_storage_version,
enable_v2_manifest_paths,
storage_options_obj,
JObject::null(), // No provider for schema-only creation
s3_credentials_refresh_offset_seconds_obj,
Expand Down Expand Up @@ -412,13 +418,14 @@ pub extern "system" fn Java_org_lance_Dataset_createWithFfiStream<'local>(
_obj: JObject,
arrow_array_stream_addr: jlong,
path: JString,
max_rows_per_file: JObject, // Optional<Integer>
max_rows_per_group: JObject, // Optional<Integer>
max_bytes_per_file: JObject, // Optional<Long>
mode: JObject, // Optional<String>
enable_stable_row_ids: JObject, // Optional<Boolean>
data_storage_version: JObject, // Optional<String>
storage_options_obj: JObject, // Map<String, String>
max_rows_per_file: JObject, // Optional<Integer>
max_rows_per_group: JObject, // Optional<Integer>
max_bytes_per_file: JObject, // Optional<Long>
mode: JObject, // Optional<String>
enable_stable_row_ids: JObject, // Optional<Boolean>
data_storage_version: JObject, // Optional<String>
enable_v2_manifest_paths: JObject, // Optional<Boolean>
storage_options_obj: JObject, // Map<String, String>
s3_credentials_refresh_offset_seconds_obj: JObject, // Optional<Long>
initial_bases: JObject,
target_bases: JObject,
Expand All @@ -434,6 +441,7 @@ pub extern "system" fn Java_org_lance_Dataset_createWithFfiStream<'local>(
max_bytes_per_file,
mode,
enable_stable_row_ids,
enable_v2_manifest_paths,
data_storage_version,
storage_options_obj,
JObject::null(),
Expand All @@ -456,6 +464,7 @@ pub extern "system" fn Java_org_lance_Dataset_createWithFfiStreamAndProvider<'lo
mode: JObject, // Optional<String>
enable_stable_row_ids: JObject, // Optional<Boolean>
data_storage_version: JObject, // Optional<String>
enable_v2_manifest_paths: JObject, // Optional<Boolean>
storage_options_obj: JObject, // Map<String, String>
storage_options_provider_obj: JObject, // Optional<StorageOptionsProvider>
s3_credentials_refresh_offset_seconds_obj: JObject, // Optional<Long>
Expand All @@ -474,6 +483,7 @@ pub extern "system" fn Java_org_lance_Dataset_createWithFfiStreamAndProvider<'lo
mode,
enable_stable_row_ids,
data_storage_version,
enable_v2_manifest_paths,
storage_options_obj,
storage_options_provider_obj,
s3_credentials_refresh_offset_seconds_obj,
Expand All @@ -494,6 +504,7 @@ fn inner_create_with_ffi_stream<'local>(
mode: JObject, // Optional<String>
enable_stable_row_ids: JObject, // Optional<Boolean>
data_storage_version: JObject, // Optional<String>
enable_v2_manifest_paths: JObject, // Optional<Boolean>
storage_options_obj: JObject, // Map<String, String>
storage_options_provider_obj: JObject, // Optional<StorageOptionsProvider>
s3_credentials_refresh_offset_seconds_obj: JObject, // Optional<Long>
Expand All @@ -511,6 +522,7 @@ fn inner_create_with_ffi_stream<'local>(
mode,
enable_stable_row_ids,
data_storage_version,
enable_v2_manifest_paths,
storage_options_obj,
storage_options_provider_obj,
s3_credentials_refresh_offset_seconds_obj,
Expand All @@ -530,6 +542,7 @@ fn create_dataset<'local>(
mode: JObject,
enable_stable_row_ids: JObject,
data_storage_version: JObject,
enable_v2_manifest_paths: JObject,
storage_options_obj: JObject,
storage_options_provider_obj: JObject, // Optional<StorageOptionsProvider>
s3_credentials_refresh_offset_seconds_obj: JObject,
Expand All @@ -547,6 +560,7 @@ fn create_dataset<'local>(
&mode,
&enable_stable_row_ids,
&data_storage_version,
Some(&enable_v2_manifest_paths),
&storage_options_obj,
&storage_options_provider_obj,
&s3_credentials_refresh_offset_seconds_obj,
Expand Down
1 change: 1 addition & 0 deletions java/lance-jni/src/fragment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ fn create_fragment<'a>(
&mode,
&enable_stable_row_ids,
&data_storage_version,
None,
&storage_options_obj,
&storage_options_provider_obj,
&s3_credentials_refresh_offset_seconds_obj,
Expand Down
13 changes: 10 additions & 3 deletions java/lance-jni/src/transaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use arrow::datatypes::Schema;
use arrow_schema::ffi::FFI_ArrowSchema;
use chrono::DateTime;
use jni::objects::{JByteArray, JLongArray, JMap, JObject, JString, JValue, JValueGen};
use jni::sys::jbyte;
use jni::sys::{jboolean, jbyte};
use jni::JNIEnv;
use lance::dataset::transaction::{
DataReplacementGroup, Operation, RewriteGroup, RewrittenIndex, Transaction, TransactionBuilder,
Expand Down Expand Up @@ -727,17 +727,24 @@ pub extern "system" fn Java_org_lance_Dataset_nativeCommitTransaction<'local>(
mut env: JNIEnv<'local>,
java_dataset: JObject,
java_transaction: JObject,
detached_jbool: jboolean,
) -> JObject<'local> {
ok_or_throw!(
env,
inner_commit_transaction(&mut env, java_dataset, java_transaction)
inner_commit_transaction(
&mut env,
java_dataset,
java_transaction,
detached_jbool != 0,
)
)
}

fn inner_commit_transaction<'local>(
env: &mut JNIEnv<'local>,
java_dataset: JObject,
java_transaction: JObject,
detached: bool,
) -> Result<JObject<'local>> {
let write_param_jobj = env
.call_method(&java_transaction, "writeParams", "()Ljava/util/Map;", &[])?
Expand Down Expand Up @@ -771,7 +778,7 @@ fn inner_commit_transaction<'local>(
let new_blocking_ds = {
let mut dataset_guard =
unsafe { env.get_rust_field::<_, _, BlockingDataset>(&java_dataset, NATIVE_DATASET) }?;
dataset_guard.commit_transaction(transaction, store_params)?
dataset_guard.commit_transaction(transaction, store_params, detached)?
};
new_blocking_ds.into_java(env)
}
Expand Down
11 changes: 11 additions & 0 deletions java/lance-jni/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ pub fn extract_write_params(
mode: &JObject,
enable_stable_row_ids: &JObject,
data_storage_version: &JObject,
enable_v2_manifest_paths: Option<&JObject>,
storage_options_obj: &JObject,
storage_options_provider_obj: &JObject, // Optional<StorageOptionsProvider>
s3_credentials_refresh_offset_seconds_obj: &JObject, // Optional<Long>
Expand Down Expand Up @@ -75,6 +76,16 @@ pub fn extract_write_params(
data_storage_version_val.as_str(),
)?);
}

// Enable v2 manifest paths by default.
write_params.enable_v2_manifest_paths =
if let Some(enable_v2_manifest_paths) = enable_v2_manifest_paths {
env.get_boolean_opt(enable_v2_manifest_paths)?
.unwrap_or(true)
} else {
true
};

let storage_options: HashMap<String, String> =
extract_storage_options(env, storage_options_obj)?;

Expand Down
21 changes: 19 additions & 2 deletions java/src/main/java/org/lance/Dataset.java
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ public static Dataset create(
params.getMode(),
params.getEnableStableRowIds(),
params.getDataStorageVersion(),
params.getEnableV2ManifestPaths(),
params.getStorageOptions(),
params.getS3CredentialsRefreshOffsetSeconds(),
params.getInitialBases(),
Expand Down Expand Up @@ -201,6 +202,7 @@ static Dataset create(
params.getMode(),
params.getEnableStableRowIds(),
params.getDataStorageVersion(),
params.getEnableV2ManifestPaths(),
params.getStorageOptions(),
Optional.ofNullable(storageOptionsProvider),
params.getS3CredentialsRefreshOffsetSeconds(),
Expand All @@ -219,6 +221,7 @@ private static native Dataset createWithFfiSchema(
Optional<String> mode,
Optional<Boolean> enableStableRowIds,
Optional<String> dataStorageVersion,
Optional<Boolean> enableV2ManifestPaths,
Map<String, String> storageOptions,
Optional<Long> s3CredentialsRefreshOffsetSeconds,
Optional<List<BasePath>> initialBases,
Expand All @@ -233,6 +236,7 @@ private static native Dataset createWithFfiStream(
Optional<String> mode,
Optional<Boolean> enableStableRowIds,
Optional<String> dataStorageVersion,
Optional<Boolean> enableV2ManifestPaths,
Map<String, String> storageOptions,
Optional<Long> s3CredentialsRefreshOffsetSeconds,
Optional<List<BasePath>> initialBases,
Expand All @@ -247,6 +251,7 @@ private static native Dataset createWithFfiStreamAndProvider(
Optional<String> mode,
Optional<Boolean> enableStableRowIds,
Optional<String> dataStorageVersion,
Optional<Boolean> enableV2ManifestPaths,
Map<String, String> storageOptions,
Optional<StorageOptionsProvider> storageOptionsProvider,
Optional<Long> s3CredentialsRefreshOffsetSeconds,
Expand Down Expand Up @@ -450,9 +455,21 @@ public Transaction.Builder newTransactionBuilder() {
* @return A new instance of {@link Dataset} linked to committed version.
*/
public Dataset commitTransaction(Transaction transaction) {
return commitTransaction(transaction, false);
}

/**
* Commit a single transaction and return a new Dataset with the new version. Original dataset
* version will not be refreshed.
*
* @param transaction The transaction to commit
* @param detached If true, the commit will not be part of the main dataset lineage.
* @return A new instance of {@link Dataset} linked to committed version.
*/
public Dataset commitTransaction(Transaction transaction, boolean detached) {
Preconditions.checkNotNull(transaction);
try {
Dataset dataset = nativeCommitTransaction(transaction);
Dataset dataset = nativeCommitTransaction(transaction, detached);
if (selfManagedAllocator) {
dataset.allocator = new RootAllocator(Long.MAX_VALUE);
} else {
Expand All @@ -464,7 +481,7 @@ public Dataset commitTransaction(Transaction transaction) {
}
}

private native Dataset nativeCommitTransaction(Transaction transaction);
private native Dataset nativeCommitTransaction(Transaction transaction, boolean detached);

/**
* Drop a Dataset.
Expand Down
14 changes: 14 additions & 0 deletions java/src/main/java/org/lance/WriteParams.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ public String getVersionString() {
private final Optional<WriteMode> mode;
private final Optional<Boolean> enableStableRowIds;
private final Optional<LanceFileVersion> dataStorageVersion;
private final Optional<Boolean> enableV2ManifestPaths;
private Map<String, String> storageOptions = new HashMap<>();
private final Optional<Long> s3CredentialsRefreshOffsetSeconds;
private final Optional<List<BasePath>> initialBases;
Expand All @@ -68,6 +69,7 @@ private WriteParams(
Optional<WriteMode> mode,
Optional<Boolean> enableStableRowIds,
Optional<LanceFileVersion> dataStorageVersion,
Optional<Boolean> enableV2ManifestPaths,
Map<String, String> storageOptions,
Optional<Long> s3CredentialsRefreshOffsetSeconds,
Optional<List<BasePath>> initialBases,
Expand All @@ -78,6 +80,7 @@ private WriteParams(
this.mode = mode;
this.enableStableRowIds = enableStableRowIds;
this.dataStorageVersion = dataStorageVersion;
this.enableV2ManifestPaths = enableV2ManifestPaths;
this.storageOptions = storageOptions;
this.s3CredentialsRefreshOffsetSeconds = s3CredentialsRefreshOffsetSeconds;
this.initialBases = initialBases;
Expand Down Expand Up @@ -113,6 +116,10 @@ public Optional<String> getDataStorageVersion() {
return dataStorageVersion.map(LanceFileVersion::getVersionString);
}

public Optional<Boolean> getEnableV2ManifestPaths() {
return enableV2ManifestPaths;
}

public Map<String, String> getStorageOptions() {
return storageOptions;
}
Expand Down Expand Up @@ -148,6 +155,7 @@ public static class Builder {
private Optional<WriteMode> mode = Optional.empty();
private Optional<Boolean> enableStableRowIds = Optional.empty();
private Optional<LanceFileVersion> dataStorageVersion = Optional.empty();
private Optional<Boolean> enableV2ManifestPaths;
private Map<String, String> storageOptions = new HashMap<>();
private Optional<Long> s3CredentialsRefreshOffsetSeconds = Optional.empty();
private Optional<List<BasePath>> initialBases = Optional.empty();
Expand Down Expand Up @@ -188,6 +196,11 @@ public Builder withEnableStableRowIds(boolean enableStableRowIds) {
return this;
}

public Builder withEnableV2ManifestPaths(boolean enableV2ManifestPaths) {
this.enableV2ManifestPaths = Optional.of(enableV2ManifestPaths);
return this;
}

public Builder withS3CredentialsRefreshOffsetSeconds(long s3CredentialsRefreshOffsetSeconds) {
this.s3CredentialsRefreshOffsetSeconds = Optional.of(s3CredentialsRefreshOffsetSeconds);
return this;
Expand All @@ -211,6 +224,7 @@ public WriteParams build() {
mode,
enableStableRowIds,
dataStorageVersion,
enableV2ManifestPaths,
storageOptions,
s3CredentialsRefreshOffsetSeconds,
initialBases,
Expand Down
Loading