From ae379cd0e4f7e7e2f198e2915922093ad6529a6f Mon Sep 17 00:00:00 2001 From: David Mollitor Date: Tue, 28 Dec 2021 11:48:09 -0500 Subject: [PATCH 1/2] TEZ-4365: Use Regex Pattern to Parse DAG ID String --- .../org/apache/tez/dag/records/TezDAGID.java | 27 ++++----- .../apache/tez/dag/records/TestTezIds.java | 58 +++++++++---------- 2 files changed, 36 insertions(+), 49 deletions(-) diff --git a/tez-common/src/main/java/org/apache/tez/dag/records/TezDAGID.java b/tez-common/src/main/java/org/apache/tez/dag/records/TezDAGID.java index 68184fc8c0..13b0539eb2 100644 --- a/tez-common/src/main/java/org/apache/tez/dag/records/TezDAGID.java +++ b/tez-common/src/main/java/org/apache/tez/dag/records/TezDAGID.java @@ -21,6 +21,8 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -133,6 +135,7 @@ public void write(DataOutput out) throws IOException { super.write(out); } + public static final Pattern DAG_ID_PATTERN = Pattern.compile("dag_(.+)_(\\d+)_(\\d+)"); // DO NOT CHANGE THIS. DAGClient replicates this code to create DAG id string public static final String DAG = "dag"; static final ThreadLocal tezAppIdFormat = new ThreadLocal() { @@ -175,25 +178,15 @@ public String getGroupId(int numDagsPerGroup) { } public static TezDAGID fromString(String dagId) { - int id = 0; - int appId = 0; - String[] split = dagId.split("_"); - if (split.length != 4 || !dagId.startsWith(DAG + "_")) { + final Matcher match = DAG_ID_PATTERN.matcher(dagId); + if (!match.find()) { throw new IllegalArgumentException("Invalid DAG Id format : " + dagId); } - String rmId = split[1]; - try { - appId = Integer.parseInt(split[2]); - } catch (NumberFormatException e) { - throw new IllegalArgumentException("Error while parsing App Id '" - + split[2] + "' of DAG Id : " + dagId); - } - try { - id = Integer.parseInt(split[3]); - } catch (NumberFormatException e) { - throw new IllegalArgumentException("Error while parsing Id '" + split[3] - + "' of DAG Id : " + dagId); - } + + final String rmId = match.group(1); + final int appId = Integer.parseInt(match.group(2)); + final int id = Integer.parseInt(match.group(3)); + return TezDAGID.getInstance(rmId, appId, id); } diff --git a/tez-common/src/test/java/org/apache/tez/dag/records/TestTezIds.java b/tez-common/src/test/java/org/apache/tez/dag/records/TestTezIds.java index 5e1552d345..1427ac32c8 100644 --- a/tez-common/src/test/java/org/apache/tez/dag/records/TestTezIds.java +++ b/tez-common/src/test/java/org/apache/tez/dag/records/TestTezIds.java @@ -108,43 +108,37 @@ public void testIdStringify() { verifyAttemptId(taIdStr, taId); } - @Test(timeout=5000) - public void testInvalidDagIds() { - String dagIdStr = "aaa_111_1_1"; - TezDAGID dagId; - try { - dagId = TezDAGID.fromString(dagIdStr); - Assert.fail("Expected failure for invalid dagId=" + dagIdStr); - } catch (IllegalArgumentException e) { - Assert.assertTrue(e.getMessage().contains("Invalid DAG Id format")); - } - - dagIdStr = "dag_111_11"; - try { - dagId = TezDAGID.fromString(dagIdStr); - Assert.fail("Expected failure for invalid dagId=" + dagIdStr); - } catch (IllegalArgumentException e) { - Assert.assertTrue(e.getMessage().contains("Invalid DAG Id format")); - } + @Test + public void testFromStringValid() { + final TezDAGID dagId = TezDAGID.fromString("dag_111_2_3"); + Assert.assertEquals(3, dagId.getId()); + Assert.assertEquals(111, dagId.getApplicationId().getClusterTimestamp()); + Assert.assertEquals(2, dagId.getApplicationId().getId()); + } - dagIdStr = "dag_111_11_aa"; - try { - dagId = TezDAGID.fromString(dagIdStr); - Assert.fail("Expected failure for invalid dagId=" + dagIdStr); - } catch (IllegalArgumentException e) { - Assert.assertTrue(e.getMessage().contains("Error while parsing")); - } + @Test(expected = IllegalArgumentException.class) + public void testFromStringInvalidPrefix() { + // Case sensitive + TezDAGID.fromString("DAG_111_1_1"); + } - dagIdStr = "dag_111_aa_1"; - try { - dagId = TezDAGID.fromString(dagIdStr); - Assert.fail("Expected failure for invalid dagId=" + dagIdStr); - } catch (IllegalArgumentException e) { - Assert.assertTrue(e.getMessage().contains("Error while parsing")); - } + @Test(expected = IllegalArgumentException.class) + public void testFromStringInvalidFormat() { + // Missing last field + TezDAGID.fromString("dag_111_11"); + } + @Test(expected = IllegalArgumentException.class) + public void testFromStringInvalidID() { + // ID field must be numeric + TezDAGID.fromString("dag_111_11_aa"); } + @Test(expected = IllegalArgumentException.class) + public void testFromStringInvalidAppID() { + // App ID field must be numeric + TezDAGID.fromString("dag_111_aa_11"); + } public void testGetGroupIds() { ApplicationId appId = ApplicationId.newInstance(0, 1); From 033761c4182128d525fa9bc09f4b090e8c1de050 Mon Sep 17 00:00:00 2001 From: David Mollitor Date: Tue, 28 Dec 2021 22:46:23 -0500 Subject: [PATCH 2/2] Fix whitespace --- .../src/main/java/org/apache/tez/dag/records/TezDAGID.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tez-common/src/main/java/org/apache/tez/dag/records/TezDAGID.java b/tez-common/src/main/java/org/apache/tez/dag/records/TezDAGID.java index 13b0539eb2..ba5aab7a25 100644 --- a/tez-common/src/main/java/org/apache/tez/dag/records/TezDAGID.java +++ b/tez-common/src/main/java/org/apache/tez/dag/records/TezDAGID.java @@ -185,7 +185,7 @@ public static TezDAGID fromString(String dagId) { final String rmId = match.group(1); final int appId = Integer.parseInt(match.group(2)); - final int id = Integer.parseInt(match.group(3)); + final int id = Integer.parseInt(match.group(3)); return TezDAGID.getInstance(rmId, appId, id); }