Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
ba0ec2b
initial commit
eric-maynard May 23, 2025
6ee1262
add index
eric-maynard May 23, 2025
5426f16
changes
eric-maynard May 23, 2025
2f8ad59
needs refactoring
eric-maynard May 23, 2025
7172b05
wire model
eric-maynard May 23, 2025
8a6f567
continue wiring
eric-maynard May 23, 2025
f8f615c
feature configs
eric-maynard May 23, 2025
911b37b
building
eric-maynard May 23, 2025
b731fe1
testing
eric-maynard May 24, 2025
ef8e024
failed to find version
eric-maynard May 24, 2025
79b8ec2
stable
eric-maynard May 24, 2025
70a3481
slash fix
eric-maynard May 24, 2025
6529a84
stable with slash
eric-maynard May 24, 2025
ac0b17e
testing
eric-maynard May 27, 2025
d8a3140
Merge branch 'main' of github.com:apache/polaris into index-sibling-c…
eric-maynard May 27, 2025
30412b5
sql fix
eric-maynard May 27, 2025
7e69c97
review
eric-maynard May 27, 2025
c88724a
polish
eric-maynard May 27, 2025
dc1b6cb
QG unit test
eric-maynard May 28, 2025
98ba46e
autolint
eric-maynard May 28, 2025
0990483
bugfix
eric-maynard May 28, 2025
5650be2
autolint
eric-maynard May 28, 2025
f11205d
changes per review
eric-maynard May 30, 2025
c9e2d5c
testfix
eric-maynard May 30, 2025
8605a1c
some changes per review
eric-maynard Jun 3, 2025
aa1591c
autolint
eric-maynard Jun 3, 2025
8b5e71e
pull main
eric-maynard Jun 3, 2025
4b49d01
some special characters
eric-maynard Jun 3, 2025
b1d6425
some special characters
eric-maynard Jun 3, 2025
e4c8c2c
refactor in progress
eric-maynard Jun 3, 2025
ecda6e2
Semistable
eric-maynard Jun 4, 2025
a976bb8
stable after refactor
eric-maynard Jun 4, 2025
12d8530
Merge branch 'main' of github.com:apache/polaris into index-sibling-c…
eric-maynard Jun 6, 2025
1005a23
remove 1 = 2
eric-maynard Jun 6, 2025
fd8c776
start fixing for cross-scheme
eric-maynard Jun 16, 2025
45c5e05
resolve conflicts somewhat
eric-maynard Jun 16, 2025
29ae543
try to move to preparedquery
eric-maynard Jun 16, 2025
9cf2b02
fix QueryGenerationTest
eric-maynard Jun 16, 2025
d14b8e2
add extra check
eric-maynard Jun 16, 2025
acc2b09
autolint
eric-maynard Jun 16, 2025
5545ece
normalization test case fixed up
eric-maynard Jun 16, 2025
4edc032
autolint
eric-maynard Jun 16, 2025
609d997
changes per review
eric-maynard Jun 17, 2025
9703a93
Merge branch 'main' of github.com:apache/polaris into index-sibling-c…
eric-maynard Jun 17, 2025
28056d4
fix new issue related to withoutScheme
eric-maynard Jun 20, 2025
1eeb522
missed a comment
eric-maynard Jun 20, 2025
735e9a9
resolve conflicts
eric-maynard Jun 24, 2025
9730cbf
polish
eric-maynard Jun 24, 2025
f85642b
autolint
eric-maynard Jun 24, 2025
f446486
comment changes
eric-maynard Jun 25, 2025
e689d72
autolint
eric-maynard Jun 25, 2025
2ff0e11
another change per review
eric-maynard Jun 25, 2025
c6e5edb
autolint
eric-maynard Jun 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ public void testIcebergCreateNamespaceInExternalCatalog() throws IOException {
.isNotNull()
.isNotEmpty()
.containsEntry(
PolarisEntityConstants.ENTITY_BASE_LOCATION, "s3://my-bucket/path/to/data/db1");
PolarisEntityConstants.ENTITY_BASE_LOCATION, "s3://my-bucket/path/to/data/db1/");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Function;
import java.util.function.Predicate;
Expand All @@ -41,9 +42,11 @@
import org.apache.polaris.core.PolarisCallContext;
import org.apache.polaris.core.context.RealmContext;
import org.apache.polaris.core.entity.EntityNameLookupRecord;
import org.apache.polaris.core.entity.LocationBasedEntity;
import org.apache.polaris.core.entity.PolarisBaseEntity;
import org.apache.polaris.core.entity.PolarisChangeTrackingVersions;
import org.apache.polaris.core.entity.PolarisEntitiesActiveKey;
import org.apache.polaris.core.entity.PolarisEntity;
import org.apache.polaris.core.entity.PolarisEntityCore;
import org.apache.polaris.core.entity.PolarisEntityId;
import org.apache.polaris.core.entity.PolarisEntityType;
Expand Down Expand Up @@ -778,4 +781,12 @@ public void rollback() {
session.getTransaction().rollback();
}
}

/** {@inheritDoc} */
@Override
public <T extends PolarisEntity & LocationBasedEntity>
Optional<Optional<String>> hasOverlappingSiblings(
@Nonnull PolarisCallContext callContext, T entity) {
return Optional.empty();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,6 @@ public static DatabaseType fromDisplayName(String displayName) {
}

public String getInitScriptResource() {
return String.format("%s/schema-v1.sql", this.getDisplayName());
return String.format("%s/schema-v2.sql", this.getDisplayName());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wouldn't version be passed here ? as it will start bootstraping always by v2 ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it good to bootstrap with the latest version? Do we need to wait until users can select a schema version to bootstrap with? Currently it's hardcoded.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thats true v1 is the only version though 🤔, can't they make a choice ? wdyt ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that's probably a separate feature -- in theory, it would be good to let users provide their own .sql file to bootstrap from. I can pick this up separately if possible? Actually, I already spent some time trying this, but I ran into some issues with the Quarkus datasource setup.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sounds good, i think may be taking version at the time of bootstrapping in from the command line and then percolating all the way through could also be an option ? this way the database type is still inferred using the the existing mechanism

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea, let's do that... check out #1942 for a continuation of this!

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,16 @@
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import org.apache.polaris.core.PolarisCallContext;
import org.apache.polaris.core.entity.EntityNameLookupRecord;
import org.apache.polaris.core.entity.LocationBasedEntity;
import org.apache.polaris.core.entity.PolarisBaseEntity;
import org.apache.polaris.core.entity.PolarisChangeTrackingVersions;
import org.apache.polaris.core.entity.PolarisEntity;
import org.apache.polaris.core.entity.PolarisEntityCore;
import org.apache.polaris.core.entity.PolarisEntityId;
import org.apache.polaris.core.entity.PolarisEntityType;
Expand All @@ -59,10 +62,12 @@
import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo;
import org.apache.polaris.core.storage.PolarisStorageIntegration;
import org.apache.polaris.core.storage.PolarisStorageIntegrationProvider;
import org.apache.polaris.core.storage.StorageLocation;
import org.apache.polaris.persistence.relational.jdbc.models.ModelEntity;
import org.apache.polaris.persistence.relational.jdbc.models.ModelGrantRecord;
import org.apache.polaris.persistence.relational.jdbc.models.ModelPolicyMappingRecord;
import org.apache.polaris.persistence.relational.jdbc.models.ModelPrincipalAuthenticationData;
import org.apache.polaris.persistence.relational.jdbc.models.SchemaVersion;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -74,6 +79,10 @@ public class JdbcBasePersistenceImpl implements BasePersistence, IntegrationPers
private final PrincipalSecretsGenerator secretsGenerator;
private final PolarisStorageIntegrationProvider storageIntegrationProvider;
private final String realmId;
private final int version;

// The max number of components a location can have before the optimized sibling check is not used
private static final int MAX_LOCATION_COMPONENTS = 40;

public JdbcBasePersistenceImpl(
DatasourceOperations databaseOperations,
Expand All @@ -84,6 +93,7 @@ public JdbcBasePersistenceImpl(
this.secretsGenerator = secretsGenerator;
this.storageIntegrationProvider = storageIntegrationProvider;
this.realmId = realmId;
this.version = loadVersion();
}

@Override
Expand Down Expand Up @@ -622,6 +632,64 @@ public boolean hasChildren(
}
}

private int loadVersion() {
PreparedQuery query = QueryGenerator.generateVersionQuery();
try {
List<SchemaVersion> schemaVersion =
datasourceOperations.executeSelect(query, new SchemaVersion());
if (schemaVersion == null || schemaVersion.size() != 1) {
throw new RuntimeException("Failed to retrieve schema version");
}
return schemaVersion.getFirst().getValue();
} catch (SQLException e) {
LOGGER.error("Failed to load schema version due to {}", e.getMessage(), e);
throw new IllegalStateException("Failed to retrieve schema version", e);
}
}

/** {@inheritDoc} */
@Override
public <T extends PolarisEntity & LocationBasedEntity>
Optional<Optional<String>> hasOverlappingSiblings(
@Nonnull PolarisCallContext callContext, T entity) {
if (this.version < 2) {
return Optional.empty();
}
if (entity.getBaseLocation().chars().filter(ch -> ch == '/').count()
> MAX_LOCATION_COMPONENTS) {
return Optional.empty();
}

PreparedQuery query =
QueryGenerator.generateOverlapQuery(
realmId, entity.getCatalogId(), entity.getBaseLocation());
try {
var results = datasourceOperations.executeSelect(query, new ModelEntity());
if (!results.isEmpty()) {
StorageLocation entityLocation = StorageLocation.of(entity.getBaseLocation());
for (PolarisBaseEntity result : results) {
StorageLocation potentialSiblingLocation =
StorageLocation.of(((LocationBasedEntity) result).getBaseLocation());
if (entityLocation.isChildOf(potentialSiblingLocation)
|| potentialSiblingLocation.isChildOf(entityLocation)) {
return Optional.of(Optional.of(potentialSiblingLocation.toString()));
}
}
}
return Optional.of(Optional.empty());
} catch (SQLException e) {
LOGGER.error(
"Failed to retrieve location overlap for location {} due to {}",
entity.getBaseLocation(),
e.getMessage(),
e);
throw new RuntimeException(
String.format(
"Failed to retrieve location overlap for location: %s", entity.getBaseLocation()),
e);
}
}

@Nullable
@Override
public PolarisPrincipalSecrets loadPrincipalSecrets(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.util.stream.Collectors;
import org.apache.polaris.core.entity.PolarisEntityCore;
import org.apache.polaris.core.entity.PolarisEntityId;
import org.apache.polaris.core.storage.StorageLocation;
import org.apache.polaris.persistence.relational.jdbc.models.ModelEntity;
import org.apache.polaris.persistence.relational.jdbc.models.ModelGrantRecord;

Expand Down Expand Up @@ -208,6 +209,59 @@ static QueryFragment generateWhereClause(
return new QueryFragment(clause, parameters);
}

@VisibleForTesting
static PreparedQuery generateVersionQuery() {
return new PreparedQuery("SELECT version_value FROM POLARIS_SCHEMA.VERSION", List.of());
}

/**
* Generate a SELECT query to find any entities that have a given realm & parent and that may with
* a given location. The check is performed without consideration for the scheme, so a path on one
* storage type may give a false positive for overlapping with another storage type. This should
* be combined with a check using `StorageLocation`.
*
* @param realmId A realm to search within
* @param catalogId A catalog entity to search within
* @param baseLocation The base location to look for overlap with, with or without a scheme
* @return The list of possibly overlapping entities that meet the criteria
*/
@VisibleForTesting
public static PreparedQuery generateOverlapQuery(
String realmId, long catalogId, String baseLocation) {
StorageLocation baseStorageLocation = StorageLocation.of(baseLocation);
String locationWithoutScheme = baseStorageLocation.withoutScheme();

List<String> conditions = new ArrayList<>();
List<Object> parameters = new ArrayList<>();

String[] components = locationWithoutScheme.split("/");
StringBuilder pathBuilder = new StringBuilder();

for (String component : components) {
pathBuilder.append(component).append("/");
conditions.add("location_without_scheme = ?");
parameters.add(pathBuilder.toString());
}

// Add LIKE condition to match children
conditions.add("location_without_scheme LIKE ?");
parameters.add(locationWithoutScheme + "%");

String locationClause = String.join(" OR ", conditions);
String clause = " WHERE realm_id = ? AND catalog_id = ? AND (" + locationClause + ")";

// realmId and parentId go first
List<Object> finalParams = new ArrayList<>();
finalParams.add(realmId);
finalParams.add(catalogId);
finalParams.addAll(parameters);

QueryFragment where = new QueryFragment(clause, finalParams);
PreparedQuery query =
generateSelectQuery(ModelEntity.ALL_COLUMNS, ModelEntity.TABLE_NAME, where.sql());
return new PreparedQuery(query.sql(), where.parameters());
}

private static String getFullyQualifiedTableName(String tableName) {
// TODO: make schema name configurable.
return "POLARIS_SCHEMA." + tableName;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@
import java.util.List;
import java.util.Map;
import org.apache.polaris.core.entity.PolarisBaseEntity;
import org.apache.polaris.core.entity.PolarisEntityConstants;
import org.apache.polaris.core.entity.PolarisEntitySubType;
import org.apache.polaris.core.entity.PolarisEntityType;
import org.apache.polaris.core.storage.StorageLocation;
import org.apache.polaris.persistence.relational.jdbc.DatabaseType;

public class ModelEntity implements Converter<PolarisBaseEntity> {
Expand All @@ -47,7 +49,8 @@ public class ModelEntity implements Converter<PolarisBaseEntity> {
"last_update_timestamp",
"properties",
"internal_properties",
"grant_records_version");
"grant_records_version",
"location_without_scheme");

// the id of the catalog associated to that entity. use 0 if this entity is top-level
// like a catalog
Expand Down Expand Up @@ -95,6 +98,9 @@ public class ModelEntity implements Converter<PolarisBaseEntity> {
// current version for that entity, will be monotonically incremented
private int grantRecordsVersion;

// location for the entity but without a scheme, when applicable
private String locationWithoutScheme;

public long getId() {
return id;
}
Expand Down Expand Up @@ -155,6 +161,10 @@ public int getGrantRecordsVersion() {
return grantRecordsVersion;
}

public String getLocationWithoutScheme() {
return locationWithoutScheme;
}

public static Builder builder() {
return new Builder();
}
Expand All @@ -180,6 +190,7 @@ public PolarisBaseEntity fromResultSet(ResultSet r) throws SQLException {
// JSONB: use getString(), not getObject().
.internalProperties(r.getString("internal_properties"))
.grantRecordsVersion(r.getObject("grant_records_version", Integer.class))
.locationWithoutScheme(r.getString("location_without_scheme"))
.build();

return toEntity(modelEntity);
Expand Down Expand Up @@ -208,6 +219,7 @@ public Map<String, Object> toMap(DatabaseType databaseType) {
map.put("internal_properties", this.getInternalProperties());
}
map.put("grant_records_version", this.getGrantRecordsVersion());
map.put("location_without_scheme", this.getLocationWithoutScheme());
return map;
}

Expand Down Expand Up @@ -293,29 +305,52 @@ public Builder grantRecordsVersion(int grantRecordsVersion) {
return this;
}

public Builder locationWithoutScheme(String location) {
entity.locationWithoutScheme = location;
return this;
}

public ModelEntity build() {
return entity;
}
}

public static ModelEntity fromEntity(PolarisBaseEntity entity) {
return ModelEntity.builder()
.catalogId(entity.getCatalogId())
.id(entity.getId())
.parentId(entity.getParentId())
.typeCode(entity.getTypeCode())
.name(entity.getName())
.entityVersion(entity.getEntityVersion())
.subTypeCode(entity.getSubTypeCode())
.createTimestamp(entity.getCreateTimestamp())
.dropTimestamp(entity.getDropTimestamp())
.purgeTimestamp(entity.getPurgeTimestamp())
.toPurgeTimestamp(entity.getToPurgeTimestamp())
.lastUpdateTimestamp(entity.getLastUpdateTimestamp())
.properties(entity.getProperties())
.internalProperties(entity.getInternalProperties())
.grantRecordsVersion(entity.getGrantRecordsVersion())
.build();
var builder =
ModelEntity.builder()
.catalogId(entity.getCatalogId())
.id(entity.getId())
.parentId(entity.getParentId())
.typeCode(entity.getTypeCode())
.name(entity.getName())
.entityVersion(entity.getEntityVersion())
.subTypeCode(entity.getSubTypeCode())
.createTimestamp(entity.getCreateTimestamp())
.dropTimestamp(entity.getDropTimestamp())
.purgeTimestamp(entity.getPurgeTimestamp())
.toPurgeTimestamp(entity.getToPurgeTimestamp())
.lastUpdateTimestamp(entity.getLastUpdateTimestamp())
.properties(entity.getProperties())
.internalProperties(entity.getInternalProperties())
.grantRecordsVersion(entity.getGrantRecordsVersion());

if (entity.getType() == PolarisEntityType.TABLE_LIKE) {
if (entity.getSubType() == PolarisEntitySubType.ICEBERG_TABLE
|| entity.getSubType() == PolarisEntitySubType.ICEBERG_VIEW) {
builder.locationWithoutScheme(
StorageLocation.of(
entity.getPropertiesAsMap().get(PolarisEntityConstants.ENTITY_BASE_LOCATION))
.withoutScheme());
}
}
if (entity.getType() == PolarisEntityType.NAMESPACE) {
builder.locationWithoutScheme(
StorageLocation.of(
entity.getPropertiesAsMap().get(PolarisEntityConstants.ENTITY_BASE_LOCATION))
.withoutScheme());
}

return builder.build();
}

public static PolarisBaseEntity toEntity(ModelEntity model) {
Expand Down
Loading
Loading