From 4d194bf0fb3f4f54233a45777febca63cb367333 Mon Sep 17 00:00:00 2001 From: guluo Date: Thu, 4 May 2023 23:05:35 +0800 Subject: [PATCH 1/3] fast-fail when specified column families do not exist. --- .../hadoop/hbase/mapreduce/ImportTsv.java | 16 ++++++++++++++ .../hadoop/hbase/mapreduce/TestImportTsv.java | 21 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java index 0a811c92ba94..201a8279e52b 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.Pair; @@ -554,6 +555,21 @@ protected static Job createSubmittableJob(Configuration conf, String[] args) LOG.error(errorMsg); throw new TableNotFoundException(errorMsg); } + try (Table table = connection.getTable(tableName)) { + ArrayList unmatchedFamilies = new ArrayList<>(); + Set cfSet = getColumnFamilies(columns); + TableDescriptor tDesc = table.getDescriptor(); + for (String cf : cfSet) { + if (!tDesc.hasColumnFamily(Bytes.toBytes(cf))) { + unmatchedFamilies.add(cf); + } + } + if (unmatchedFamilies.size() > 0) { + String noSuchColumnFamiliesMsg = String.format("Column families: %s do not exist.", unmatchedFamilies); + LOG.error(noSuchColumnFamiliesMsg); + throw new NoSuchColumnFamilyException(noSuchColumnFamiliesMsg); + } + } if (mapperClass.equals(TsvImporterTextMapper.class)) { usage(TsvImporterTextMapper.class.toString() + " should not be used for non bulkloading case. use " diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java index 9316b09b8c93..62bfc9865aff 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java @@ -51,6 +51,7 @@ import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileScanner; +import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests; import org.apache.hadoop.hbase.util.Bytes; @@ -241,6 +242,26 @@ public int run(String[] args) throws Exception { }, args)); } + @Test + public void testMROnNoMatchedColumnFamily() throws Exception { + util.createTable(tn, FAMILY); + + String[] args = new String[] { tn.getNameAsString(), "/inputFile" }; + Configuration conf = new Configuration(util.getConfiguration()); + conf.set(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY, FAM_ERROR:A"); + exception.expect(NoSuchColumnFamilyException.class); + assertEquals("running test job configuration failed.", 0, + ToolRunner.run(new Configuration(util.getConfiguration()), new ImportTsv() { + @Override + public int run(String[] args) throws Exception { + createSubmittableJob(getConf(), args); + return 0; + } + }, args)); + + util.deleteTable(tn); + } + @Test public void testMRWithoutAnExistingTable() throws Exception { String[] args = new String[] { tn.getNameAsString(), "/inputFile" }; From ac53c5e67c1e1196f81815839bb9d9194ccaf3b7 Mon Sep 17 00:00:00 2001 From: guluo Date: Fri, 5 May 2023 21:27:31 +0800 Subject: [PATCH 2/3] update tests for noMatchedColumnFamilies --- .../org/apache/hadoop/hbase/mapreduce/ImportTsv.java | 2 +- .../org/apache/hadoop/hbase/mapreduce/TestImportTsv.java | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java index 201a8279e52b..5affb0330032 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java @@ -565,7 +565,7 @@ protected static Job createSubmittableJob(Configuration conf, String[] args) } } if (unmatchedFamilies.size() > 0) { - String noSuchColumnFamiliesMsg = String.format("Column families: %s do not exist.", unmatchedFamilies); + String noSuchColumnFamiliesMsg = format("Column families: %s do not exist.", unmatchedFamilies); LOG.error(noSuchColumnFamiliesMsg); throw new NoSuchColumnFamilyException(noSuchColumnFamiliesMsg); } diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java index 62bfc9865aff..5c75b32bd634 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java @@ -243,12 +243,13 @@ public int run(String[] args) throws Exception { } @Test - public void testMROnNoMatchedColumnFamily() throws Exception { + public void testMRNoMatchedColumnFamily() throws Exception { util.createTable(tn, FAMILY); - String[] args = new String[] { tn.getNameAsString(), "/inputFile" }; - Configuration conf = new Configuration(util.getConfiguration()); - conf.set(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY, FAM_ERROR:A"); + String[] args = new String[] { + "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM01_ERROR:A,FAM01_ERROR:B,FAM02_ERROR:C", + tn.getNameAsString(), + "/inputFile" }; exception.expect(NoSuchColumnFamilyException.class); assertEquals("running test job configuration failed.", 0, ToolRunner.run(new Configuration(util.getConfiguration()), new ImportTsv() { From 933201aef56d84834c3510ecfd611212640cc1c0 Mon Sep 17 00:00:00 2001 From: guluo Date: Wed, 17 May 2023 22:57:00 +0800 Subject: [PATCH 3/3] fix style issue --- .../java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java | 3 ++- .../org/apache/hadoop/hbase/mapreduce/TestImportTsv.java | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java index 5affb0330032..aed257488720 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java @@ -565,7 +565,8 @@ protected static Job createSubmittableJob(Configuration conf, String[] args) } } if (unmatchedFamilies.size() > 0) { - String noSuchColumnFamiliesMsg = format("Column families: %s do not exist.", unmatchedFamilies); + String noSuchColumnFamiliesMsg = + format("Column families: %s do not exist.", unmatchedFamilies); LOG.error(noSuchColumnFamiliesMsg); throw new NoSuchColumnFamilyException(noSuchColumnFamiliesMsg); } diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java index 5c75b32bd634..04fc2c8d3b8f 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java @@ -247,9 +247,9 @@ public void testMRNoMatchedColumnFamily() throws Exception { util.createTable(tn, FAMILY); String[] args = new String[] { - "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM01_ERROR:A,FAM01_ERROR:B,FAM02_ERROR:C", - tn.getNameAsString(), - "/inputFile" }; + "-D" + ImportTsv.COLUMNS_CONF_KEY + + "=HBASE_ROW_KEY,FAM:A,FAM01_ERROR:A,FAM01_ERROR:B,FAM02_ERROR:C", + tn.getNameAsString(), "/inputFile" }; exception.expect(NoSuchColumnFamilyException.class); assertEquals("running test job configuration failed.", 0, ToolRunner.run(new Configuration(util.getConfiguration()), new ImportTsv() {