Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

import com.google.common.base.Splitter;

import org.apache.gobblin.hive.metastore.HiveMetaStoreUtils;
import org.apache.gobblin.util.HiveJdbcConnector;


Expand All @@ -42,7 +43,7 @@ public abstract class HiveTable {
protected List<HiveAttribute> attributes;

public static class Builder<T extends Builder<?>> {
protected String name = UUID.randomUUID().toString().replaceAll("-", "_");
protected String name = HiveMetaStoreUtils.getHiveTableName(UUID.randomUUID().toString());
protected List<String> primaryKeys = new ArrayList<>();
protected List<HiveAttribute> attributes = new ArrayList<>();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@

import com.google.common.annotations.VisibleForTesting;

import lombok.extern.slf4j.Slf4j;

import org.apache.gobblin.dataset.Dataset;
import org.apache.gobblin.dataset.DatasetsFinder;
import org.apache.gobblin.util.PropertiesUtils;
Expand All @@ -39,6 +41,7 @@
/**
* A decorator for filtering datasets after a {@link DatasetsFinder} finds a {@link List} of {@link Dataset}s
*/
@Slf4j
public class DatasetsFinderFilteringDecorator<T extends Dataset> implements DatasetsFinder<T> {
private static final String PREFIX = "filtering.datasets.finder.";
public static final String DATASET_CLASS = PREFIX + "class";
Expand Down Expand Up @@ -69,6 +72,7 @@ public DatasetsFinderFilteringDecorator(FileSystem fs, Properties properties) th
@Override
public List<T> findDatasets() throws IOException {
List<T> datasets = datasetFinder.findDatasets();
log.info("Found {} datasets", datasets.size());
List<T> allowedDatasets = Collections.emptyList();
try {
allowedDatasets = datasets.parallelStream()
Expand All @@ -83,6 +87,7 @@ public List<T> findDatasets() throws IOException {
wrappedIOException.rethrowWrapped();
}

log.info("Allowed {}/{} datasets", allowedDatasets.size() ,datasets.size());
return allowedDatasets;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@
import org.apache.avro.Schema;
import org.apache.avro.SchemaParseException;
import org.apache.commons.lang.reflect.MethodUtils;
import org.apache.gobblin.hive.avro.HiveAvroSerDeManager;
import org.apache.gobblin.hive.spec.HiveSpec;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
Expand Down Expand Up @@ -73,6 +71,8 @@
import org.apache.gobblin.hive.HiveRegistrationUnit.Column;
import org.apache.gobblin.hive.HiveTable;
import org.apache.gobblin.hive.SharedHiveConfKey;
import org.apache.gobblin.hive.avro.HiveAvroSerDeManager;
import org.apache.gobblin.hive.spec.HiveSpec;


/**
Expand Down Expand Up @@ -151,6 +151,15 @@ public static HiveTable getHiveTable(Table table) {
return hiveTable;
}

/**
* Hive does not use '-' or '.' in the table name, so they are replaced with '_'
* @param topic
* @return
*/
public static String getHiveTableName(String topic) {
return topic.replaceAll("[-.]", "_");
}

/**
* Convert a {@link HivePartition} into a {@link Partition}.
*/
Expand Down Expand Up @@ -289,7 +298,8 @@ public static boolean containsNonOptionalUnionTypeColumn(HiveTable hiveTable) {
.anyMatch(type -> isNonOptionalUnion(type));
}

throw new RuntimeException("Avro based Hive tables without \"" + HiveAvroSerDeManager.SCHEMA_LITERAL +"\" are not supported");
throw new RuntimeException("Avro based Hive tables without \"" + HiveAvroSerDeManager.SCHEMA_LITERAL +"\" are not supported. "
+ "hiveTable=" + hiveTable.getDbName() + "." + hiveTable.getTableName());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ private DbAndTable getDbAndTable(T dataset) {
throw new IllegalStateException(String.format("Dataset urn [%s] doesn't follow expected pattern. " +
"Expected pattern = %s", dataset.getUrn(), pattern.pattern()));
}
return new DbAndTable(m.group(1), m.group(2));

return new DbAndTable(m.group(1), HiveMetaStoreUtils.getHiveTableName(m.group(2)));
}

boolean containsNonOptionalUnion(HiveTable table) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,10 @@

package org.apache.gobblin.iceberg.predicates;

import com.google.common.io.Files;
import java.io.File;
import java.util.Collections;
import lombok.extern.slf4j.Slf4j;

import org.apache.commons.io.FileUtils;
import org.apache.gobblin.configuration.State;
import org.apache.gobblin.dataset.Dataset;
import org.apache.gobblin.dataset.test.SimpleDatasetForTesting;
import org.apache.gobblin.hive.HiveTable;
import org.apache.gobblin.hive.metastore.HiveMetaStoreUtils;
import org.apache.gobblin.util.ConfigUtils;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Table;
Expand All @@ -40,16 +33,30 @@
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.Test;

import com.google.common.io.Files;

import lombok.extern.slf4j.Slf4j;

import org.apache.gobblin.configuration.State;
import org.apache.gobblin.dataset.Dataset;
import org.apache.gobblin.dataset.test.SimpleDatasetForTesting;
import org.apache.gobblin.hive.HiveTable;
import org.apache.gobblin.hive.metastore.HiveMetaStoreUtils;
import org.apache.gobblin.util.ConfigUtils;


@Slf4j
// depends on icebergMetadataWriterTest to avoid concurrency between other HiveMetastoreTest(s) in CI.
// You can uncomment the dependsOnGroups if you want to test this class in isolation
@Test(dependsOnGroups = "icebergMetadataWriterTest")
public class DatasetHiveSchemaContainsNonOptionalUnionTest extends HiveMetastoreTest {

private static String dbName = "dbname_" +
DatasetHiveSchemaContainsNonOptionalUnionTest.class.getSimpleName().toLowerCase();
private static String dbName = "dbName";
private static File tmpDir;
private static State state;
private static String dbUri;
private static String testTable = "test_table";
private static String testTable = "test_table01";
private static String datasetUrn = String.format("/data/%s/streaming/test-Table01/hourly/2023/01/01", dbName);

@AfterSuite
public void clean() throws Exception {
Expand Down Expand Up @@ -77,14 +84,14 @@ public void setup() throws Exception {
metastoreClient.createTable(HiveMetaStoreUtils.getTable(testTable));

state = ConfigUtils.configToState(ConfigUtils.propertiesToConfig(hiveConf.getAllProperties()));
state.setProp(DatasetHiveSchemaContainsNonOptionalUnion.PATTERN, "/data/(\\w+)/(\\w+)");
state.setProp(DatasetHiveSchemaContainsNonOptionalUnion.PATTERN, "/data/(\\w+)/.*/([\\w\\d_-]+)/hourly.*");
Assert.assertNotNull(metastoreClient.getTable(dbName, DatasetHiveSchemaContainsNonOptionalUnionTest.testTable));
}

@Test
public void testContainsNonOptionalUnion() throws Exception {
DatasetHiveSchemaContainsNonOptionalUnion predicate = new DatasetHiveSchemaContainsNonOptionalUnion(state.getProperties());
Dataset dataset = new SimpleDatasetForTesting("/data/" + dbName + "/" + testTable);
Dataset dataset = new SimpleDatasetForTesting(datasetUrn);
Assert.assertTrue(predicate.test(dataset));
}

Expand Down