-
Notifications
You must be signed in to change notification settings - Fork 3k
Spark: Support time travel through table names #3269
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,14 +19,18 @@ | |
|
|
||
| package org.apache.iceberg.spark; | ||
|
|
||
| import java.util.Arrays; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Set; | ||
| import java.util.TreeMap; | ||
| import java.util.regex.Matcher; | ||
| import java.util.regex.Pattern; | ||
| import org.apache.hadoop.conf.Configuration; | ||
| import org.apache.iceberg.CachingCatalog; | ||
| import org.apache.iceberg.CatalogProperties; | ||
| import org.apache.iceberg.CatalogUtil; | ||
| import org.apache.iceberg.MetadataTableType; | ||
| import org.apache.iceberg.Schema; | ||
| import org.apache.iceberg.Table; | ||
| import org.apache.iceberg.Transaction; | ||
|
|
@@ -39,13 +43,16 @@ | |
| import org.apache.iceberg.hadoop.HadoopTables; | ||
| import org.apache.iceberg.relocated.com.google.common.base.Preconditions; | ||
| import org.apache.iceberg.relocated.com.google.common.base.Splitter; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.Lists; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.Maps; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.Sets; | ||
| import org.apache.iceberg.spark.source.SparkTable; | ||
| import org.apache.iceberg.spark.source.StagedSparkTable; | ||
| import org.apache.iceberg.util.Pair; | ||
| import org.apache.iceberg.util.SnapshotUtil; | ||
| import org.apache.spark.sql.SparkSession; | ||
| import org.apache.spark.sql.catalyst.analysis.NamespaceAlreadyExistsException; | ||
| import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; | ||
|
|
@@ -80,6 +87,9 @@ | |
| */ | ||
| public class SparkCatalog extends BaseCatalog { | ||
| private static final Set<String> DEFAULT_NS_KEYS = ImmutableSet.of(TableCatalog.PROP_OWNER); | ||
| private static final Splitter COMMA = Splitter.on(","); | ||
| private static final Pattern AT_TIME = Pattern.compile("at(?:_(?:time(?:stamp)?)?)?_?(\\d+)"); | ||
| private static final Pattern SNAPSHOT_ID = Pattern.compile("s(?:nap(?:shot)?)?(?:_id)?_?(\\d+)"); | ||
|
|
||
| private String catalogName = null; | ||
| private Catalog icebergCatalog = null; | ||
|
|
@@ -118,8 +128,8 @@ protected TableIdentifier buildIdentifier(Identifier identifier) { | |
| @Override | ||
| public SparkTable loadTable(Identifier ident) throws NoSuchTableException { | ||
| try { | ||
| Table icebergTable = load(ident); | ||
| return new SparkTable(icebergTable, !cacheEnabled); | ||
| Pair<Table, Long> icebergTable = load(ident); | ||
| return new SparkTable(icebergTable.first(), icebergTable.second(), !cacheEnabled); | ||
| } catch (org.apache.iceberg.exceptions.NoSuchTableException e) { | ||
| throw new NoSuchTableException(ident); | ||
| } | ||
|
|
@@ -220,7 +230,7 @@ public SparkTable alterTable(Identifier ident, TableChange... changes) throws No | |
| } | ||
|
|
||
| try { | ||
| Table table = load(ident); | ||
| Table table = load(ident).first(); | ||
| commitChanges(table, setLocation, setSnapshotId, pickSnapshotId, propertyChanges, schemaChanges); | ||
| } catch (org.apache.iceberg.exceptions.NoSuchTableException e) { | ||
| throw new NoSuchTableException(ident); | ||
|
|
@@ -256,7 +266,7 @@ public void renameTable(Identifier from, Identifier to) throws NoSuchTableExcept | |
| @Override | ||
| public void invalidateTable(Identifier ident) { | ||
| try { | ||
| load(ident).refresh(); | ||
| load(ident).first().refresh(); | ||
| } catch (org.apache.iceberg.exceptions.NoSuchTableException ignored) { | ||
| // ignore if the table doesn't exist, it is not cached | ||
| } | ||
|
|
@@ -456,10 +466,97 @@ private static void checkNotPathIdentifier(Identifier identifier, String method) | |
| } | ||
| } | ||
|
|
||
| private Table load(Identifier ident) { | ||
| return isPathIdentifier(ident) ? | ||
| tables.load(((PathIdentifier) ident).location()) : | ||
| icebergCatalog.loadTable(buildIdentifier(ident)); | ||
| private Pair<Table, Long> load(Identifier ident) { | ||
| if (isPathIdentifier(ident)) { | ||
| return loadFromPathIdentifier((PathIdentifier) ident); | ||
| } | ||
|
|
||
| try { | ||
| return Pair.of(icebergCatalog.loadTable(buildIdentifier(ident)), null); | ||
|
|
||
| } catch (org.apache.iceberg.exceptions.NoSuchTableException e) { | ||
| // if the original load didn't work, the identifier may be extended and include a snapshot selector | ||
| TableIdentifier namespaceAsIdent = buildIdentifier(namespaceToIdentifier(ident.namespace())); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So we are also using
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I debated that as well. I'm not sure where it would be most clean to put this, which is why I added it just to Spark for now. I think we will also want to extend this to work for branch or tag names, at which point we may want to reconsider moving everything into core.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I forgot one other thing. This is a more reasonable change if we do it in Spark because
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we may want to support the timestamp as part of the |
||
| Table table; | ||
| try { | ||
| table = icebergCatalog.loadTable(namespaceAsIdent); | ||
| } catch (org.apache.iceberg.exceptions.NoSuchTableException ignored) { | ||
| // the namespace does not identify a table, so it cannot be a table with a snapshot selector | ||
| // throw the original exception | ||
| throw e; | ||
| } | ||
|
|
||
| // loading the namespace as a table worked, check the name to see if it is a valid selector | ||
| Matcher at = AT_TIME.matcher(ident.name()); | ||
| if (at.matches()) { | ||
| long asOfTimestamp = Long.parseLong(at.group(1)); | ||
| return Pair.of(table, SnapshotUtil.snapshotIdAsOfTime(table, asOfTimestamp)); | ||
| } | ||
|
|
||
| Matcher id = SNAPSHOT_ID.matcher(ident.name()); | ||
| if (id.matches()) { | ||
| long snapshotId = Long.parseLong(id.group(1)); | ||
| return Pair.of(table, snapshotId); | ||
| } | ||
|
|
||
| // the name wasn't a valid snapshot selector. throw the original exception | ||
| throw e; | ||
| } | ||
| } | ||
|
|
||
| private Pair<String, List<String>> parseLocationString(String location) { | ||
| int hashIndex = location.lastIndexOf('#'); | ||
| if (hashIndex != -1 && !location.endsWith("#")) { | ||
| String baseLocation = location.substring(0, hashIndex); | ||
| List<String> metadata = COMMA.splitToList(location.substring(hashIndex + 1)); | ||
| return Pair.of(baseLocation, metadata); | ||
| } else { | ||
| return Pair.of(location, ImmutableList.of()); | ||
| } | ||
| } | ||
|
|
||
| private Pair<Table, Long> loadFromPathIdentifier(PathIdentifier ident) { | ||
| Pair<String, List<String>> parsed = parseLocationString(ident.location()); | ||
|
|
||
| String metadataTableName = null; | ||
| Long asOfTimestamp = null; | ||
| Long snapshotId = null; | ||
| for (String meta : parsed.second()) { | ||
| if (MetadataTableType.from(meta) != null) { | ||
| metadataTableName = meta; | ||
| continue; | ||
| } | ||
|
|
||
| Matcher at = AT_TIME.matcher(meta); | ||
| if (at.matches()) { | ||
| asOfTimestamp = Long.parseLong(at.group(1)); | ||
| continue; | ||
| } | ||
|
|
||
| Matcher id = SNAPSHOT_ID.matcher(meta); | ||
| if (id.matches()) { | ||
| snapshotId = Long.parseLong(id.group(1)); | ||
| } | ||
| } | ||
|
|
||
| Preconditions.checkArgument(asOfTimestamp == null || snapshotId == null, | ||
| "Cannot specify as-of-timestamp and snapshot-id: %s", ident.location()); | ||
|
|
||
| Table table = tables.load(parsed.first() + (metadataTableName != null ? "#" + metadataTableName : "")); | ||
|
|
||
| if (snapshotId != null) { | ||
| return Pair.of(table, snapshotId); | ||
| } else if (asOfTimestamp != null) { | ||
| return Pair.of(table, SnapshotUtil.snapshotIdAsOfTime(table, asOfTimestamp)); | ||
| } else { | ||
| return Pair.of(table, null); | ||
| } | ||
| } | ||
|
|
||
| private Identifier namespaceToIdentifier(String[] namespace) { | ||
| String[] ns = Arrays.copyOf(namespace, namespace.length - 1); | ||
| String name = namespace[ns.length]; | ||
| return Identifier.of(ns, name); | ||
| } | ||
|
|
||
| private Catalog.TableBuilder newBuilder(Identifier ident, Schema schema) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,7 @@ | |
|
|
||
| import java.util.Map; | ||
| import java.util.Set; | ||
| import org.apache.arrow.util.Preconditions; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason to use this instead of com.google.common.base.Preconditions?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No. And this shouldn't be allowed. We have a rule that checks for the wrong one in imports. Looks like we now need to add the Arrow package. |
||
| import org.apache.iceberg.Schema; | ||
| import org.apache.iceberg.Table; | ||
| import org.apache.iceberg.TableProperties; | ||
|
|
@@ -29,6 +30,7 @@ | |
| import org.apache.iceberg.expressions.Expressions; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.Maps; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.Sets; | ||
| import org.apache.iceberg.spark.Spark3Util; | ||
| import org.apache.iceberg.spark.SparkFilters; | ||
|
|
@@ -76,7 +78,7 @@ public class SparkTable implements org.apache.spark.sql.connector.catalog.Table, | |
| TableCapability.OVERWRITE_DYNAMIC); | ||
|
|
||
| private final Table icebergTable; | ||
| private final StructType requestedSchema; | ||
| private final Long snapshotId; | ||
| private final boolean refreshEagerly; | ||
| private StructType lazyTableSchema = null; | ||
| private SparkSession lazySpark = null; | ||
|
|
@@ -85,15 +87,10 @@ public SparkTable(Table icebergTable, boolean refreshEagerly) { | |
| this(icebergTable, null, refreshEagerly); | ||
| } | ||
|
|
||
| public SparkTable(Table icebergTable, StructType requestedSchema, boolean refreshEagerly) { | ||
| public SparkTable(Table icebergTable, Long snapshotId, boolean refreshEagerly) { | ||
| this.icebergTable = icebergTable; | ||
| this.requestedSchema = requestedSchema; | ||
| this.snapshotId = snapshotId; | ||
| this.refreshEagerly = refreshEagerly; | ||
|
|
||
| if (requestedSchema != null) { | ||
| // convert the requested schema to throw an exception if any requested fields are unknown | ||
| SparkSchemaUtil.convert(icebergTable.schema(), requestedSchema); | ||
| } | ||
| } | ||
|
|
||
| private SparkSession sparkSession() { | ||
|
|
@@ -116,11 +113,8 @@ public String name() { | |
| @Override | ||
| public StructType schema() { | ||
| if (lazyTableSchema == null) { | ||
| if (requestedSchema != null) { | ||
| this.lazyTableSchema = SparkSchemaUtil.convert(SparkSchemaUtil.prune(icebergTable.schema(), requestedSchema)); | ||
| } else { | ||
| this.lazyTableSchema = SparkSchemaUtil.convert(icebergTable.schema()); | ||
| } | ||
| // TODO: return the schema of the snapshot if it is set | ||
| this.lazyTableSchema = SparkSchemaUtil.convert(icebergTable.schema()); | ||
| } | ||
|
|
||
| return lazyTableSchema; | ||
|
|
@@ -171,17 +165,15 @@ public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) { | |
| icebergTable.refresh(); | ||
| } | ||
|
|
||
| SparkScanBuilder scanBuilder = new SparkScanBuilder(sparkSession(), icebergTable, options); | ||
|
|
||
| if (requestedSchema != null) { | ||
| scanBuilder.pruneColumns(requestedSchema); | ||
| } | ||
|
|
||
| return scanBuilder; | ||
| return new SparkScanBuilder(sparkSession(), icebergTable, addSnapshotId(options, snapshotId)); | ||
| } | ||
|
|
||
| @Override | ||
| public WriteBuilder newWriteBuilder(LogicalWriteInfo info) { | ||
| Preconditions.checkArgument( | ||
| snapshotId == null, | ||
| "Cannot write to table at a specific snapshot: %s", snapshotId); | ||
|
|
||
| if (info.options().containsKey(SparkWriteOptions.REWRITTEN_FILE_SCAN_TASK_SET_ID)) { | ||
| // replace data files in the given file scan task set with new files | ||
| return new SparkRewriteBuilder(sparkSession(), icebergTable, info); | ||
|
|
@@ -192,6 +184,10 @@ public WriteBuilder newWriteBuilder(LogicalWriteInfo info) { | |
|
|
||
| @Override | ||
| public MergeBuilder newMergeBuilder(String operation, LogicalWriteInfo info) { | ||
| Preconditions.checkArgument( | ||
| snapshotId == null, | ||
| "Cannot write to table at a specific snapshot: %s", snapshotId); | ||
|
|
||
| String mode = getRowLevelOperationMode(operation); | ||
| ValidationException.check(mode.equals("copy-on-write"), "Unsupported mode for %s: %s", operation, mode); | ||
| return new SparkMergeBuilder(sparkSession(), icebergTable, operation, info); | ||
|
|
@@ -212,6 +208,10 @@ private String getRowLevelOperationMode(String operation) { | |
|
|
||
| @Override | ||
| public boolean canDeleteWhere(Filter[] filters) { | ||
| Preconditions.checkArgument( | ||
| snapshotId == null, | ||
| "Cannot delete from table at a specific snapshot: %s", snapshotId); | ||
|
|
||
| if (table().specs().size() > 1) { | ||
| // cannot guarantee a metadata delete will be successful if we have multiple specs | ||
| return false; | ||
|
|
@@ -283,4 +283,19 @@ public int hashCode() { | |
| // use only name in order to correctly invalidate Spark cache | ||
| return icebergTable.name().hashCode(); | ||
| } | ||
|
|
||
| private static CaseInsensitiveStringMap addSnapshotId(CaseInsensitiveStringMap options, Long snapshotId) { | ||
| if (snapshotId != null) { | ||
| Preconditions.checkArgument(options.get(SparkReadOptions.SNAPSHOT_ID) == null, | ||
| "Cannot override snapshot ID more than once: %s", options.get(SparkReadOptions.SNAPSHOT_ID)); | ||
|
|
||
| Map<String, String> scanOptions = Maps.newHashMap(); | ||
| scanOptions.putAll(options.asCaseSensitiveMap()); | ||
| scanOptions.put(SparkReadOptions.SNAPSHOT_ID, String.valueOf(snapshotId)); | ||
|
|
||
| return new CaseInsensitiveStringMap(scanOptions); | ||
| } | ||
|
|
||
| return options; | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what's the reason for supporting so many different ways to specify the time and snapshot id? My opinion is we should just support 1 shortcut and 1 full name for each, for example
at_andat_timestamp_for timestamp travel,s_andsnapshot_id_for snapshot ID travel. Maybe even the full name ones are not necessary.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I may have gone a bit too far here. I think that we want to be able to use the full "at_timestamp_12938471" version because that is the least likely to conflict with existing table names. Similarly, I think it is valuable to have a short version, like
at_<timestamp>andsnap_<id>so you don't have to typeat_timestamp_or the fullsnapshot_id_every time. Since we were already testing multiple prefixes, I added a few that I thought would be valuable to avoid confusion:_optional because that's an easy mistake to maketimeand not justtimestampbecause people may not remember_idand allow justsnap_ors_as shortened formsThe logic made sense at every step but there are quite a few variations. I'm up for defining the full version and one shortening if you think it's best to have just a couple.