-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Various Kudu cleanups and fixes related to schema emulation races #11264
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8ed149c
4deebd0
db8c708
ee0f62e
b3966d3
5397928
d174114
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,7 +27,7 @@ | |
| import io.trino.spi.type.Type; | ||
| import io.trino.spi.type.VarcharType; | ||
| import org.apache.kudu.client.KuduException; | ||
| import org.apache.kudu.client.KuduSession; | ||
| import org.apache.kudu.client.KuduOperationApplier; | ||
| import org.apache.kudu.client.KuduTable; | ||
| import org.apache.kudu.client.PartialRow; | ||
| import org.apache.kudu.client.Upsert; | ||
|
|
@@ -58,13 +58,12 @@ | |
| import static java.lang.String.format; | ||
| import static java.util.Objects.requireNonNull; | ||
| import static java.util.concurrent.CompletableFuture.completedFuture; | ||
| import static org.apache.kudu.client.KuduOperationApplier.applyOperationAndVerifySucceeded; | ||
|
|
||
| public class KuduPageSink | ||
| implements ConnectorPageSink | ||
| { | ||
| private final ConnectorSession connectorSession; | ||
| private final KuduSession session; | ||
| private final KuduClientSession session; | ||
| private final KuduTable table; | ||
| private final List<Type> columnTypes; | ||
| private final List<Type> originalColumnTypes; | ||
|
|
@@ -102,35 +101,35 @@ private KuduPageSink( | |
| this.generateUUID = mapping.isGenerateUUID(); | ||
|
|
||
| this.table = table; | ||
| this.session = clientSession.newSession(); | ||
| this.session = clientSession; | ||
| uuid = UUID.randomUUID().toString(); | ||
| } | ||
|
|
||
| @Override | ||
| public CompletableFuture<?> appendPage(Page page) | ||
| { | ||
| for (int position = 0; position < page.getPositionCount(); position++) { | ||
| Upsert upsert = table.newUpsert(); | ||
| PartialRow row = upsert.getRow(); | ||
| int start = 0; | ||
| if (generateUUID) { | ||
| String id = format("%s-%08x", uuid, nextSubId++); | ||
| row.addString(0, id); | ||
| start = 1; | ||
| } | ||
|
|
||
| for (int channel = 0; channel < page.getChannelCount(); channel++) { | ||
| appendColumn(row, page, position, channel, channel + start); | ||
| } | ||
|
|
||
| try { | ||
| applyOperationAndVerifySucceeded(session, upsert); | ||
| } | ||
| catch (KuduException e) { | ||
| throw new RuntimeException(e); | ||
| try (KuduOperationApplier operationApplier = KuduOperationApplier.fromKuduClientSession(session)) { | ||
| for (int position = 0; position < page.getPositionCount(); position++) { | ||
| Upsert upsert = table.newUpsert(); | ||
| PartialRow row = upsert.getRow(); | ||
| int start = 0; | ||
| if (generateUUID) { | ||
| String id = format("%s-%08x", uuid, nextSubId++); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Q: Should
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The completablefuture
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to be clear, non-volatile means that any other thread may not see the new value of
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Concrete example: If that were a multi-threaded Exector, this line could yield the same value in both threads: (both threads read kuduPageSink.nextSubId , both increment it, neither see the value of the other, so it's both a race and a lost update - but even if only 1 thread did updates, because it's not volatile, there's no guarantee anyone who has a reference to kuduPageSink instance will see the update On reflection, I'd use But anyways - just wanted to throw that out there for your consideration!
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct me if I'm wrong, but page sinks are not expected to be thread safe. Nowhere in the javadoc for Here we only return |
||
| row.addString(0, id); | ||
| start = 1; | ||
| } | ||
|
|
||
| for (int channel = 0; channel < page.getChannelCount(); channel++) { | ||
| appendColumn(row, page, position, channel, channel + start); | ||
| } | ||
|
|
||
| operationApplier.applyOperationAsync(upsert); | ||
| } | ||
| return NOT_BLOCKED; | ||
| } | ||
| catch (KuduException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| return NOT_BLOCKED; | ||
| } | ||
|
|
||
| private void appendColumn(PartialRow row, Page page, int position, int channel, int destChannel) | ||
|
|
@@ -191,23 +190,11 @@ else if (type instanceof DecimalType) { | |
| @Override | ||
| public CompletableFuture<Collection<Slice>> finish() | ||
| { | ||
| closeSession(); | ||
| return completedFuture(ImmutableList.of()); | ||
| } | ||
|
|
||
| @Override | ||
| public void abort() | ||
| { | ||
| closeSession(); | ||
| } | ||
|
|
||
| private void closeSession() | ||
| { | ||
| try { | ||
| session.close(); | ||
| } | ||
| catch (KuduException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,10 +23,9 @@ | |
| import org.apache.kudu.Type; | ||
| import org.apache.kudu.client.CreateTableOptions; | ||
| import org.apache.kudu.client.Delete; | ||
| import org.apache.kudu.client.Insert; | ||
| import org.apache.kudu.client.KuduException; | ||
| import org.apache.kudu.client.KuduOperationApplier; | ||
| import org.apache.kudu.client.KuduScanner; | ||
| import org.apache.kudu.client.KuduSession; | ||
| import org.apache.kudu.client.KuduTable; | ||
| import org.apache.kudu.client.RowResult; | ||
| import org.apache.kudu.client.RowResultIterator; | ||
|
|
@@ -40,7 +39,6 @@ | |
| import static io.trino.plugin.kudu.KuduClientSession.DEFAULT_SCHEMA; | ||
| import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; | ||
| import static io.trino.spi.StandardErrorCode.GENERIC_USER_ERROR; | ||
| import static org.apache.kudu.client.KuduOperationApplier.applyOperationAndVerifySucceeded; | ||
|
|
||
| public class SchemaEmulationByTableNameConvention | ||
| implements SchemaEmulation | ||
|
|
@@ -62,17 +60,11 @@ public void createSchema(KuduClientWrapper client, String schemaName) | |
| throw new SchemaAlreadyExistsException(schemaName); | ||
| } | ||
| else { | ||
| try { | ||
| try (KuduOperationApplier operationApplier = KuduOperationApplier.fromKuduClientWrapper(client)) { | ||
| KuduTable schemasTable = getSchemasTable(client); | ||
| KuduSession session = client.newSession(); | ||
| try { | ||
| Upsert upsert = schemasTable.newUpsert(); | ||
| upsert.getRow().addString(0, schemaName); | ||
| applyOperationAndVerifySucceeded(session, upsert); | ||
| } | ||
| finally { | ||
| session.close(); | ||
| } | ||
| Upsert upsert = schemasTable.newUpsert(); | ||
| upsert.getRow().addString(0, schemaName); | ||
| operationApplier.applyOperationAsync(upsert); | ||
| } | ||
| catch (KuduException e) { | ||
| throw new TrinoException(GENERIC_INTERNAL_ERROR, e); | ||
|
|
@@ -99,22 +91,16 @@ public void dropSchema(KuduClientWrapper client, String schemaName) | |
| throw new TrinoException(GENERIC_USER_ERROR, "Deleting default schema not allowed."); | ||
| } | ||
| else { | ||
| try { | ||
| try (KuduOperationApplier operationApplier = KuduOperationApplier.fromKuduClientWrapper(client)) { | ||
| String prefix = getPrefixForTablesOfSchema(schemaName); | ||
| for (String name : client.getTablesList(prefix).getTablesList()) { | ||
| client.deleteTable(name); | ||
| } | ||
|
|
||
| KuduTable schemasTable = getSchemasTable(client); | ||
| KuduSession session = client.newSession(); | ||
| try { | ||
| Delete delete = schemasTable.newDelete(); | ||
| delete.getRow().addString(0, schemaName); | ||
| applyOperationAndVerifySucceeded(session, delete); | ||
| } | ||
| finally { | ||
| session.close(); | ||
| } | ||
| Delete delete = schemasTable.newDelete(); | ||
| delete.getRow().addString(0, schemaName); | ||
| operationApplier.applyOperationAsync(delete); | ||
| } | ||
| catch (KuduException e) { | ||
| throw new TrinoException(GENERIC_INTERNAL_ERROR, e); | ||
|
|
@@ -127,9 +113,7 @@ public List<String> listSchemaNames(KuduClientWrapper client) | |
| { | ||
| try { | ||
| if (rawSchemasTable == null) { | ||
| if (!client.tableExists(rawSchemasTableName)) { | ||
| createAndFillSchemasTable(client); | ||
| } | ||
| createAndFillSchemasTable(client); | ||
| rawSchemasTable = getSchemasTable(client); | ||
| } | ||
|
|
||
|
|
@@ -167,17 +151,29 @@ private void createAndFillSchemasTable(KuduClientWrapper client) | |
| Schema schema = new Schema(ImmutableList.of(schemaColumnSchema)); | ||
| CreateTableOptions options = new CreateTableOptions(); | ||
| options.addHashPartitions(ImmutableList.of(schemaColumnSchema.getName()), 2); | ||
| KuduTable schemasTable = client.createTable(rawSchemasTableName, schema, options); | ||
| KuduSession session = client.newSession(); | ||
| try { | ||
|
|
||
| KuduTable schemasTable = createTableIfNotExists(client, schema, rawSchemasTableName, options); | ||
| try (KuduOperationApplier operationApplier = KuduOperationApplier.fromKuduClientWrapper(client)) { | ||
| for (String schemaName : existingSchemaNames) { | ||
| Insert insert = schemasTable.newInsert(); | ||
| insert.getRow().addString(0, schemaName); | ||
| applyOperationAndVerifySucceeded(session, insert); | ||
| Upsert upsert = schemasTable.newUpsert(); | ||
| upsert.getRow().addString(0, schemaName); | ||
| operationApplier.applyOperationAsync(upsert); | ||
| } | ||
| } | ||
| finally { | ||
| session.close(); | ||
| } | ||
|
|
||
| private static KuduTable createTableIfNotExists(KuduClientWrapper client, Schema schema, String name, CreateTableOptions options) | ||
| throws KuduException | ||
| { | ||
| try { | ||
| return client.createTable(name, schema, options); | ||
| } | ||
| catch (KuduException e) { | ||
| if (e.getStatus().isAlreadyPresent()) { | ||
| // Table already exists | ||
| return client.openTable(name); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Q: (For complete correctness) should we do a comparison on the existing table's options to be sure they're effectively "the same"?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Looking a little more closely, it seems the only usage is from
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The whole approach of emulating schemas in kudu using a kudu table is not safe given kudu's consistency semantics and trino does not propagate client timestamps in kudu. I talked with some kudu maintainers, impala uses the hive metastore to emulate schemas in kudu, that introduces a big dependency however. |
||
| } | ||
| throw e; | ||
| } | ||
| } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.