Skip to content

Commit cf336da

Browse files
committed
Fall back to namespace location and warehouse location if no table location is provided
1 parent ab4f69a commit cf336da

File tree

3 files changed

+251
-16
lines changed

3 files changed

+251
-16
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ parquet = "52"
7373
pilota = "0.11.2"
7474
pretty_assertions = "1.4.0"
7575
port_scanner = "0.1.5"
76+
regex = "1.10.5"
7677
reqwest = { version = "^0.12", default-features = false, features = ["json"] }
7778
rust_decimal = "1.31.0"
7879
serde = { version = "^1.0", features = ["rc"] }

crates/catalog/memory/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,5 +37,6 @@ serde_json = { workspace = true }
3737
uuid = { workspace = true, features = ["v4"] }
3838

3939
[dev-dependencies]
40+
regex = { workspace = true }
4041
tempfile = { workspace = true }
4142
tokio = { workspace = true }

crates/catalog/memory/src/catalog.rs

Lines changed: 249 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,24 @@ use uuid::Uuid;
3333

3434
use crate::namespace_state::NamespaceState;
3535

36+
/// namespace `location` property
37+
const LOCATION: &str = "location";
38+
3639
/// Memory catalog implementation.
3740
#[derive(Debug)]
3841
pub struct MemoryCatalog {
3942
root_namespace_state: Mutex<NamespaceState>,
4043
file_io: FileIO,
44+
warehouse_location: Option<String>,
4145
}
4246

4347
impl MemoryCatalog {
4448
/// Creates an memory catalog.
45-
pub fn new(file_io: FileIO) -> Self {
49+
pub fn new(file_io: FileIO, warehouse_location: Option<String>) -> Self {
4650
Self {
4751
root_namespace_state: Mutex::new(NamespaceState::default()),
4852
file_io,
53+
warehouse_location,
4954
}
5055
}
5156
}
@@ -165,11 +170,20 @@ impl Catalog for MemoryCatalog {
165170
let (table_creation, location) = match table_creation.location.clone() {
166171
Some(location) => (table_creation, location),
167172
None => {
168-
let location = format!(
169-
"{}/{}",
170-
table_ident.namespace().join("/"),
171-
table_ident.name()
172-
);
173+
let namespace_properties = root_namespace_state.get_properties(namespace_ident)?;
174+
let location_prefix = match namespace_properties.get(LOCATION) {
175+
Some(namespace_location) => Ok(namespace_location.clone()),
176+
None => match self.warehouse_location.clone() {
177+
Some(warehouse_location) => Ok(format!("{}/{}", warehouse_location, namespace_ident.join("/"))),
178+
None => Err(Error::new(ErrorKind::Unexpected,
179+
format!(
180+
"Cannot create table {:?}. No table location or namespace location or warehouse location were provided.",
181+
&table_ident
182+
)))
183+
},
184+
}?;
185+
186+
let location = format!("{}/{}", location_prefix, table_ident.name());
173187

174188
let new_table_creation = TableCreation {
175189
location: Some(location.clone()),
@@ -273,13 +287,20 @@ mod tests {
273287

274288
use iceberg::io::FileIOBuilder;
275289
use iceberg::spec::{NestedField, PartitionSpec, PrimitiveType, Schema, SortOrder, Type};
290+
use regex::Regex;
276291
use tempfile::TempDir;
277292

278293
use super::*;
279294

295+
fn temp_path() -> String {
296+
let temp_dir = TempDir::new().unwrap();
297+
temp_dir.path().to_str().unwrap().to_string()
298+
}
299+
280300
fn new_memory_catalog() -> impl Catalog {
281301
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
282-
MemoryCatalog::new(file_io)
302+
let warehouse_location = temp_path();
303+
MemoryCatalog::new(file_io, Some(warehouse_location))
283304
}
284305

285306
async fn create_namespace<C: Catalog>(catalog: &C, namespace_ident: &NamespaceIdent) {
@@ -312,16 +333,12 @@ mod tests {
312333
}
313334

314335
async fn create_table<C: Catalog>(catalog: &C, table_ident: &TableIdent) {
315-
let tmp_dir = TempDir::new().unwrap();
316-
let location = tmp_dir.path().to_str().unwrap().to_string();
317-
318336
let _ = catalog
319337
.create_table(
320338
&table_ident.namespace,
321339
TableCreation::builder()
322340
.name(table_ident.name().into())
323341
.schema(simple_table_schema())
324-
.location(location)
325342
.build(),
326343
)
327344
.await
@@ -374,6 +391,14 @@ mod tests {
374391
assert!(!table.readonly());
375392
}
376393

394+
const UUID_REGEX_STR: &str = "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}";
395+
396+
fn assert_table_metadata_location_matches(table: &Table, regex_str: &str) {
397+
let actual = table.metadata_location().unwrap().to_string();
398+
let regex = Regex::new(regex_str).unwrap();
399+
assert!(regex.is_match(&actual))
400+
}
401+
377402
#[tokio::test]
378403
async fn test_list_namespaces_returns_empty_vector() {
379404
let catalog = new_memory_catalog();
@@ -990,12 +1015,220 @@ mod tests {
9901015
.metadata_location()
9911016
.unwrap()
9921017
.to_string()
993-
.starts_with(&location));
1018+
.starts_with(&location))
1019+
}
9941020

995-
assert_table_eq(
996-
&catalog.load_table(&expected_table_ident).await.unwrap(),
997-
&expected_table_ident,
998-
&simple_table_schema(),
1021+
#[tokio::test]
1022+
async fn test_create_table_falls_back_to_namespace_location_if_table_location_is_missing() {
1023+
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
1024+
let warehouse_location = temp_path();
1025+
let catalog = MemoryCatalog::new(file_io, Some(warehouse_location.clone()));
1026+
1027+
let namespace_ident = NamespaceIdent::new("a".into());
1028+
let mut namespace_properties = HashMap::new();
1029+
let namespace_location = temp_path();
1030+
namespace_properties.insert(LOCATION.to_string(), namespace_location.to_string());
1031+
catalog
1032+
.create_namespace(&namespace_ident, namespace_properties)
1033+
.await
1034+
.unwrap();
1035+
1036+
let table_name = "tbl1";
1037+
let expected_table_ident = TableIdent::new(namespace_ident.clone(), table_name.into());
1038+
let expected_table_metadata_location_regex = format!(
1039+
"^{}/tbl1/metadata/0-{}.metadata.json$",
1040+
namespace_location, UUID_REGEX_STR,
1041+
);
1042+
1043+
let table = catalog
1044+
.create_table(
1045+
&namespace_ident,
1046+
TableCreation::builder()
1047+
.name(table_name.into())
1048+
.schema(simple_table_schema())
1049+
// no location specified for table
1050+
.build(),
1051+
)
1052+
.await
1053+
.unwrap();
1054+
assert_table_eq(&table, &expected_table_ident, &simple_table_schema());
1055+
assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex);
1056+
1057+
let table = catalog.load_table(&expected_table_ident).await.unwrap();
1058+
assert_table_eq(&table, &expected_table_ident, &simple_table_schema());
1059+
assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex);
1060+
}
1061+
1062+
#[tokio::test]
1063+
async fn test_create_table_in_nested_namespace_falls_back_to_nested_namespace_location_if_table_location_is_missing(
1064+
) {
1065+
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
1066+
let warehouse_location = temp_path();
1067+
let catalog = MemoryCatalog::new(file_io, Some(warehouse_location.clone()));
1068+
1069+
let namespace_ident = NamespaceIdent::new("a".into());
1070+
let mut namespace_properties = HashMap::new();
1071+
let namespace_location = temp_path();
1072+
namespace_properties.insert(LOCATION.to_string(), namespace_location.to_string());
1073+
catalog
1074+
.create_namespace(&namespace_ident, namespace_properties)
1075+
.await
1076+
.unwrap();
1077+
1078+
let nested_namespace_ident = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
1079+
let mut nested_namespace_properties = HashMap::new();
1080+
let nested_namespace_location = temp_path();
1081+
nested_namespace_properties
1082+
.insert(LOCATION.to_string(), nested_namespace_location.to_string());
1083+
catalog
1084+
.create_namespace(&nested_namespace_ident, nested_namespace_properties)
1085+
.await
1086+
.unwrap();
1087+
1088+
let table_name = "tbl1";
1089+
let expected_table_ident =
1090+
TableIdent::new(nested_namespace_ident.clone(), table_name.into());
1091+
let expected_table_metadata_location_regex = format!(
1092+
"^{}/tbl1/metadata/0-{}.metadata.json$",
1093+
nested_namespace_location, UUID_REGEX_STR,
1094+
);
1095+
1096+
let table = catalog
1097+
.create_table(
1098+
&nested_namespace_ident,
1099+
TableCreation::builder()
1100+
.name(table_name.into())
1101+
.schema(simple_table_schema())
1102+
// no location specified for table
1103+
.build(),
1104+
)
1105+
.await
1106+
.unwrap();
1107+
assert_table_eq(&table, &expected_table_ident, &simple_table_schema());
1108+
assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex);
1109+
1110+
let table = catalog.load_table(&expected_table_ident).await.unwrap();
1111+
assert_table_eq(&table, &expected_table_ident, &simple_table_schema());
1112+
assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex);
1113+
}
1114+
1115+
#[tokio::test]
1116+
async fn test_create_table_falls_back_to_warehouse_location_if_both_table_location_and_namespace_location_are_missing(
1117+
) {
1118+
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
1119+
let warehouse_location = temp_path();
1120+
let catalog = MemoryCatalog::new(file_io, Some(warehouse_location.clone()));
1121+
1122+
let namespace_ident = NamespaceIdent::new("a".into());
1123+
// note: no location specified in namespace_properties
1124+
let namespace_properties = HashMap::new();
1125+
catalog
1126+
.create_namespace(&namespace_ident, namespace_properties)
1127+
.await
1128+
.unwrap();
1129+
1130+
let table_name = "tbl1";
1131+
let expected_table_ident = TableIdent::new(namespace_ident.clone(), table_name.into());
1132+
let expected_table_metadata_location_regex = format!(
1133+
"^{}/a/tbl1/metadata/0-{}.metadata.json$",
1134+
warehouse_location, UUID_REGEX_STR
1135+
);
1136+
1137+
let table = catalog
1138+
.create_table(
1139+
&namespace_ident,
1140+
TableCreation::builder()
1141+
.name(table_name.into())
1142+
.schema(simple_table_schema())
1143+
// no location specified for table
1144+
.build(),
1145+
)
1146+
.await
1147+
.unwrap();
1148+
assert_table_eq(&table, &expected_table_ident, &simple_table_schema());
1149+
assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex);
1150+
1151+
let table = catalog.load_table(&expected_table_ident).await.unwrap();
1152+
assert_table_eq(&table, &expected_table_ident, &simple_table_schema());
1153+
assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex);
1154+
}
1155+
1156+
#[tokio::test]
1157+
async fn test_create_table_in_nested_namespace_falls_back_to_warehouse_location_if_both_table_location_and_namespace_location_are_missing(
1158+
) {
1159+
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
1160+
let warehouse_location = temp_path();
1161+
let catalog = MemoryCatalog::new(file_io, Some(warehouse_location.clone()));
1162+
1163+
let namespace_ident = NamespaceIdent::new("a".into());
1164+
catalog
1165+
// note: no location specified in namespace_properties
1166+
.create_namespace(&namespace_ident, HashMap::new())
1167+
.await
1168+
.unwrap();
1169+
1170+
let nested_namespace_ident = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
1171+
catalog
1172+
// note: no location specified in namespace_properties
1173+
.create_namespace(&nested_namespace_ident, HashMap::new())
1174+
.await
1175+
.unwrap();
1176+
1177+
let table_name = "tbl1";
1178+
let expected_table_ident =
1179+
TableIdent::new(nested_namespace_ident.clone(), table_name.into());
1180+
let expected_table_metadata_location_regex = format!(
1181+
"^{}/a/b/tbl1/metadata/0-{}.metadata.json$",
1182+
warehouse_location, UUID_REGEX_STR
1183+
);
1184+
1185+
let table = catalog
1186+
.create_table(
1187+
&nested_namespace_ident,
1188+
TableCreation::builder()
1189+
.name(table_name.into())
1190+
.schema(simple_table_schema())
1191+
// no location specified for table
1192+
.build(),
1193+
)
1194+
.await
1195+
.unwrap();
1196+
assert_table_eq(&table, &expected_table_ident, &simple_table_schema());
1197+
assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex);
1198+
1199+
let table = catalog.load_table(&expected_table_ident).await.unwrap();
1200+
assert_table_eq(&table, &expected_table_ident, &simple_table_schema());
1201+
assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex);
1202+
}
1203+
1204+
#[tokio::test]
1205+
async fn test_create_table_throws_error_if_table_location_and_namespace_location_and_warehouse_location_are_missing(
1206+
) {
1207+
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
1208+
let catalog = MemoryCatalog::new(file_io, None);
1209+
1210+
let namespace_ident = NamespaceIdent::new("a".into());
1211+
create_namespace(&catalog, &namespace_ident).await;
1212+
1213+
let table_name = "tbl1";
1214+
let expected_table_ident = TableIdent::new(namespace_ident.clone(), table_name.into());
1215+
1216+
assert_eq!(
1217+
catalog
1218+
.create_table(
1219+
&namespace_ident,
1220+
TableCreation::builder()
1221+
.name(table_name.into())
1222+
.schema(simple_table_schema())
1223+
.build(),
1224+
)
1225+
.await
1226+
.unwrap_err()
1227+
.to_string(),
1228+
format!(
1229+
"Unexpected => Cannot create table {:?}. No table location or namespace location or warehouse location were provided.",
1230+
&expected_table_ident
1231+
)
9991232
)
10001233
}
10011234

0 commit comments

Comments
 (0)