stdOutErr, String expectedOutput,
String stdOutSingleSpaced = singleSpace(stdOutErr.getLeft()).replaceAll(" ", "");
expectedOutput = singleSpace(expectedOutput).replaceAll(" ", "");
+ LOG.error("stdOutErr : " + stdOutErr.getLeft());
+ LOG.error("stdOutErr.getRight : " + stdOutErr.getRight());
+ LOG.error("stdOutSingleSpaced : " + stdOutSingleSpaced);
+ LOG.error("expectedOutput : " + expectedOutput);
+
int lastIndex = 0;
int count = 0;
while (lastIndex != -1) {
diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java
index b68d06a64329..f142ebd502f1 100644
--- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java
+++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java
@@ -21,7 +21,6 @@
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.collection.Pair;
-
import org.apache.hudi.keygen.SimpleKeyGenerator;
import org.junit.jupiter.api.AfterEach;
@@ -33,24 +32,34 @@
/**
* Goes through steps described in https://hudi.apache.org/docker_demo.html
- *
+ *
* To run this as a standalone test in the IDE or command line. First bring up the demo setup using
* `docker/setup_demo.sh` and then run the test class as you would do normally.
*/
public class ITTestHoodieDemo extends ITTestBase {
+ private static final String TRINO_TABLE_CHECK_FILENAME = "trino-table-check.commands";
+ private static final String TRINO_BATCH1_FILENAME = "trino-batch1.commands";
+ private static final String TRINO_BATCH2_FILENAME = "trino-batch2-after-compaction.commands";
+
private static final String HDFS_DATA_DIR = "/usr/hive/data/input";
private static final String HDFS_BATCH_PATH1 = HDFS_DATA_DIR + "/batch_1.json";
private static final String HDFS_BATCH_PATH2 = HDFS_DATA_DIR + "/batch_2.json";
private static final String HDFS_PRESTO_INPUT_TABLE_CHECK_PATH = HDFS_DATA_DIR + "/presto-table-check.commands";
private static final String HDFS_PRESTO_INPUT_BATCH1_PATH = HDFS_DATA_DIR + "/presto-batch1.commands";
private static final String HDFS_PRESTO_INPUT_BATCH2_PATH = HDFS_DATA_DIR + "/presto-batch2-after-compaction.commands";
+ private static final String HDFS_TRINO_INPUT_TABLE_CHECK_PATH = HDFS_DATA_DIR + "/" + TRINO_TABLE_CHECK_FILENAME;
+ private static final String HDFS_TRINO_INPUT_BATCH1_PATH = HDFS_DATA_DIR + "/" + TRINO_BATCH1_FILENAME;
+ private static final String HDFS_TRINO_INPUT_BATCH2_PATH = HDFS_DATA_DIR + "/" + TRINO_BATCH2_FILENAME;
private static final String INPUT_BATCH_PATH1 = HOODIE_WS_ROOT + "/docker/demo/data/batch_1.json";
private static final String PRESTO_INPUT_TABLE_CHECK_RELATIVE_PATH = "/docker/demo/presto-table-check.commands";
private static final String PRESTO_INPUT_BATCH1_RELATIVE_PATH = "/docker/demo/presto-batch1.commands";
private static final String INPUT_BATCH_PATH2 = HOODIE_WS_ROOT + "/docker/demo/data/batch_2.json";
private static final String PRESTO_INPUT_BATCH2_RELATIVE_PATH = "/docker/demo/presto-batch2-after-compaction.commands";
+ private static final String TRINO_INPUT_TABLE_CHECK_RELATIVE_PATH = "/docker/demo/" + TRINO_TABLE_CHECK_FILENAME;
+ private static final String TRINO_INPUT_BATCH1_RELATIVE_PATH = "/docker/demo/" + TRINO_BATCH1_FILENAME;
+ private static final String TRINO_INPUT_BATCH2_RELATIVE_PATH = "/docker/demo/" + TRINO_BATCH2_FILENAME;
private static final String COW_BASE_PATH = "/user/hive/warehouse/stock_ticks_cow";
private static final String MOR_BASE_PATH = "/user/hive/warehouse/stock_ticks_mor";
@@ -110,12 +119,14 @@ public void testParquetDemo() throws Exception {
ingestFirstBatchAndHiveSync();
testHiveAfterFirstBatch();
testPrestoAfterFirstBatch();
+ testTrinoAfterFirstBatch();
testSparkSQLAfterFirstBatch();
// batch 2
ingestSecondBatchAndHiveSync();
testHiveAfterSecondBatch();
testPrestoAfterSecondBatch();
+ testTrinoAfterSecondBatch();
testSparkSQLAfterSecondBatch();
testIncrementalHiveQueryBeforeCompaction();
testIncrementalSparkSQLQuery();
@@ -125,6 +136,7 @@ public void testParquetDemo() throws Exception {
testHiveAfterSecondBatchAfterCompaction();
testPrestoAfterSecondBatchAfterCompaction();
+ testTrinoAfterSecondBatchAfterCompaction();
testIncrementalHiveQueryAfterCompaction();
}
@@ -133,7 +145,7 @@ public void testParquetDemo() throws Exception {
public void testHFileDemo() throws Exception {
baseFileFormat = HoodieFileFormat.HFILE;
- // TODO: Preseto and SparkSQL support for HFile format
+ // TODO: Presto, Trino and SparkSQL support for HFile format
setupDemo();
@@ -141,12 +153,14 @@ public void testHFileDemo() throws Exception {
ingestFirstBatchAndHiveSync();
testHiveAfterFirstBatch();
//testPrestoAfterFirstBatch();
+ //testTrinoAfterFirstBatch();
//testSparkSQLAfterFirstBatch();
// batch 2
ingestSecondBatchAndHiveSync();
testHiveAfterSecondBatch();
//testPrestoAfterSecondBatch();
+ //testTrinoAfterSecondBatch();
//testSparkSQLAfterSecondBatch();
testIncrementalHiveQueryBeforeCompaction();
//testIncrementalSparkSQLQuery();
@@ -155,6 +169,7 @@ public void testHFileDemo() throws Exception {
scheduleAndRunCompaction();
testHiveAfterSecondBatchAfterCompaction();
//testPrestoAfterSecondBatchAfterCompaction();
+ //testTrinoAfterSecondBatchAfterCompaction();
//testIncrementalHiveQueryAfterCompaction();
}
@@ -162,7 +177,8 @@ private void setupDemo() throws Exception {
List cmds = CollectionUtils.createImmutableList("hdfs dfsadmin -safemode wait",
"hdfs dfs -mkdir -p " + HDFS_DATA_DIR,
"hdfs dfs -copyFromLocal -f " + INPUT_BATCH_PATH1 + " " + HDFS_BATCH_PATH1,
- "/bin/bash " + DEMO_CONTAINER_SCRIPT);
+ "/bin/bash " + DEMO_CONTAINER_SCRIPT,
+ "mkdir -p " + HDFS_DATA_DIR);
executeCommandStringsInDocker(ADHOC_1_CONTAINER, cmds);
@@ -174,6 +190,10 @@ private void setupDemo() throws Exception {
executePrestoCopyCommand(System.getProperty("user.dir") + "/.." + PRESTO_INPUT_TABLE_CHECK_RELATIVE_PATH, HDFS_DATA_DIR);
executePrestoCopyCommand(System.getProperty("user.dir") + "/.." + PRESTO_INPUT_BATCH1_RELATIVE_PATH, HDFS_DATA_DIR);
executePrestoCopyCommand(System.getProperty("user.dir") + "/.." + PRESTO_INPUT_BATCH2_RELATIVE_PATH, HDFS_DATA_DIR);
+
+ executeTrinoCopyCommand(System.getProperty("user.dir") + "/.." + TRINO_INPUT_TABLE_CHECK_RELATIVE_PATH, HDFS_DATA_DIR);
+ executeTrinoCopyCommand(System.getProperty("user.dir") + "/.." + TRINO_INPUT_BATCH1_RELATIVE_PATH, HDFS_DATA_DIR);
+ executeTrinoCopyCommand(System.getProperty("user.dir") + "/.." + TRINO_INPUT_BATCH2_RELATIVE_PATH, HDFS_DATA_DIR);
}
private void ingestFirstBatchAndHiveSync() throws Exception {
@@ -335,6 +355,20 @@ private void testPrestoAfterFirstBatch() throws Exception {
"\"GOOG\",\"2018-08-31 10:29:00\",\"3391\",\"1230.1899\",\"1230.085\"", 2);
}
+ private void testTrinoAfterFirstBatch() throws Exception {
+ Pair stdOutErrPair = executeTrinoCommandFile(HDFS_TRINO_INPUT_TABLE_CHECK_PATH);
+ assertStdOutContains(stdOutErrPair, "stock_ticks_cow", 2);
+ assertStdOutContains(stdOutErrPair, "stock_ticks_mor", 4);
+
+ stdOutErrPair = executeTrinoCommandFile(HDFS_TRINO_INPUT_BATCH1_PATH);
+ assertStdOutContains(stdOutErrPair,
+ "\"GOOG\",\"2018-08-31 10:29:00\"", 4);
+ assertStdOutContains(stdOutErrPair,
+ "\"GOOG\",\"2018-08-31 09:59:00\",\"6330\",\"1230.5\",\"1230.02\"", 2);
+ assertStdOutContains(stdOutErrPair,
+ "\"GOOG\",\"2018-08-31 10:29:00\",\"3391\",\"1230.1899\",\"1230.085\"", 2);
+ }
+
private void testHiveAfterSecondBatch() throws Exception {
Pair stdOutErrPair = executeHiveCommandFile(HIVE_BATCH1_COMMANDS);
assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n+---------+----------------------+\n"
@@ -361,7 +395,21 @@ private void testPrestoAfterSecondBatch() throws Exception {
assertStdOutContains(stdOutErrPair,
"\"GOOG\",\"2018-08-31 10:59:00\"", 2);
assertStdOutContains(stdOutErrPair,
- "\"GOOG\",\"2018-08-31 09:59:00\",\"6330\",\"1230.5\",\"1230.02\"",2);
+ "\"GOOG\",\"2018-08-31 09:59:00\",\"6330\",\"1230.5\",\"1230.02\"", 2);
+ assertStdOutContains(stdOutErrPair,
+ "\"GOOG\",\"2018-08-31 10:29:00\",\"3391\",\"1230.1899\",\"1230.085\"");
+ assertStdOutContains(stdOutErrPair,
+ "\"GOOG\",\"2018-08-31 10:59:00\",\"9021\",\"1227.1993\",\"1227.215\"");
+ }
+
+ private void testTrinoAfterSecondBatch() throws Exception {
+ Pair stdOutErrPair = executeTrinoCommandFile(HDFS_TRINO_INPUT_BATCH1_PATH);
+ assertStdOutContains(stdOutErrPair,
+ "\"GOOG\",\"2018-08-31 10:29:00\"", 2);
+ assertStdOutContains(stdOutErrPair,
+ "\"GOOG\",\"2018-08-31 10:59:00\"", 2);
+ assertStdOutContains(stdOutErrPair,
+ "\"GOOG\",\"2018-08-31 09:59:00\",\"6330\",\"1230.5\",\"1230.02\"", 2);
assertStdOutContains(stdOutErrPair,
"\"GOOG\",\"2018-08-31 10:29:00\",\"3391\",\"1230.1899\",\"1230.085\"");
assertStdOutContains(stdOutErrPair,
@@ -390,6 +438,16 @@ private void testPrestoAfterSecondBatchAfterCompaction() throws Exception {
"\"GOOG\",\"2018-08-31 10:59:00\",\"9021\",\"1227.1993\",\"1227.215\"");
}
+ private void testTrinoAfterSecondBatchAfterCompaction() throws Exception {
+ Pair stdOutErrPair = executeTrinoCommandFile(HDFS_TRINO_INPUT_BATCH2_PATH);
+ assertStdOutContains(stdOutErrPair,
+ "\"GOOG\",\"2018-08-31 10:59:00\"", 2);
+ assertStdOutContains(stdOutErrPair,
+ "\"GOOG\",\"2018-08-31 09:59:00\",\"6330\",\"1230.5\",\"1230.02\"");
+ assertStdOutContains(stdOutErrPair,
+ "\"GOOG\",\"2018-08-31 10:59:00\",\"9021\",\"1227.1993\",\"1227.215\"");
+ }
+
private void testSparkSQLAfterSecondBatch() throws Exception {
Pair stdOutErrPair = executeSparkSQLCommand(SPARKSQL_BATCH2_COMMANDS, true);
assertStdOutContains(stdOutErrPair,
diff --git a/packaging/hudi-flink-bundle/pom.xml b/packaging/hudi-flink-bundle/pom.xml
index 222478090b4b..2e517e67efd0 100644
--- a/packaging/hudi-flink-bundle/pom.xml
+++ b/packaging/hudi-flink-bundle/pom.xml
@@ -70,6 +70,7 @@
META-INF/LICENSE
target/classes/META-INF/LICENSE
+
@@ -137,7 +138,7 @@
org.apache.hive:hive-service
org.apache.hive:hive-service-rpc
org.apache.hive:hive-exec
- org.apache.hive:hive-standalone-metastore
+ org.apache.hive:hive-standalone-metastore
org.apache.hive:hive-metastore
org.apache.hive:hive-jdbc
org.datanucleus:datanucleus-core
@@ -147,10 +148,18 @@
org.apache.hbase:hbase-common
org.apache.hbase:hbase-client
+ org.apache.hbase:hbase-hadoop-compat
+ org.apache.hbase:hbase-hadoop2-compat
+ org.apache.hbase:hbase-metrics
+ org.apache.hbase:hbase-metrics-api
org.apache.hbase:hbase-server
- org.apache.hbase:hbase-protocol
- org.apache.htrace:htrace-core
+ org.apache.hbase:hbase-protocol-shaded
+ org.apache.hbase.thirdparty:hbase-shaded-miscellaneous
+ org.apache.hbase.thirdparty:hbase-shaded-netty
+ org.apache.hbase.thirdparty:hbase-shaded-protobuf
+ org.apache.htrace:htrace-core4
commons-codec:commons-codec
+ commons-io:commons-io
@@ -162,6 +171,22 @@
org.apache.avro.
${flink.bundle.shade.prefix}org.apache.avro.
+
+ org.apache.commons.io.
+ org.apache.hudi.org.apache.commons.io.
+
+
+ org.apache.hadoop.hbase.
+ org.apache.hudi.org.apache.hadoop.hbase.
+
+
+ org.apache.hbase.
+ org.apache.hudi.org.apache.hbase.
+
+
+ org.apache.htrace.
+ org.apache.hudi.org.apache.htrace.
+
com.yammer.metrics.
${flink.bundle.shade.prefix}com.yammer.metrics.
@@ -191,6 +216,70 @@
com.fasterxml.jackson.
${flink.bundle.shade.prefix}com.fasterxml.jackson.
+
+ org.apache.hadoop.metrics2.MetricHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+
+
+
+ org.apache.hadoop.metrics2.MetricsExecutor
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+
+
+
+ org.apache.hadoop.metrics2.impl.JmxCacheBuster
+ org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster
+
+
+ org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+
+
+
+ org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+
+
+
+ org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableFastCounter
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+
+
+
+ org.apache.hadoop.metrics2.util.MetricQuantile
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+
+
+
+ org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+
+
@@ -199,7 +288,14 @@
META-INF/*.SF
META-INF/*.DSA
META-INF/*.RSA
+ META-INF/maven/com.google.protobuf/**
+ META-INF/maven/commons-io/**
+ META-INF/maven/org.apache.hbase/**
+ META-INF/maven/org.apache.hbase.thirdparty/**
+ META-INF/maven/org.apache.htrace/**
META-INF/services/javax.*
+ **/*.proto
+ hbase-webapps/**
@@ -597,9 +693,29 @@