From 1dc41a9abe90a58fab08af4b2eab014014efa38a Mon Sep 17 00:00:00 2001 From: mwdaub Date: Wed, 6 Jun 2018 13:02:29 -0700 Subject: [PATCH 01/13] Add custom info types to code samples Include samples of using custom dictionaries and custom regexes. --- .../main/java/com/example/dlp/Inspect.java | 52 ++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/dlp/src/main/java/com/example/dlp/Inspect.java b/dlp/src/main/java/com/example/dlp/Inspect.java index d4adc6d33ba..ddcc2b2b896 100644 --- a/dlp/src/main/java/com/example/dlp/Inspect.java +++ b/dlp/src/main/java/com/example/dlp/Inspect.java @@ -27,6 +27,7 @@ import com.google.privacy.dlp.v2.CloudStorageOptions; import com.google.privacy.dlp.v2.ContentItem; import com.google.privacy.dlp.v2.CreateDlpJobRequest; +import com.google.privacy.dlp.v2.CustomInfoType; import com.google.privacy.dlp.v2.DatastoreOptions; import com.google.privacy.dlp.v2.DlpJob; import com.google.privacy.dlp.v2.Finding; @@ -82,6 +83,7 @@ private static void inspectString( Likelihood minLikelihood, int maxFindings, List infoTypes, + List customInfoTypes, boolean includeQuote, String projectId) { // instantiate a client @@ -91,6 +93,7 @@ private static void inspectString( InspectConfig inspectConfig = InspectConfig.newBuilder() .addAllInfoTypes(infoTypes) + .addAllCustomInfoTypes(customInfoTypes) .setMinLikelihood(minLikelihood) .setLimits(findingLimits) .setIncludeQuote(includeQuote) @@ -146,6 +149,7 @@ private static void inspectFile( Likelihood minLikelihood, int maxFindings, List infoTypes, + List customInfoTypes, boolean includeQuote, String projectId) { // Instantiates a client @@ -189,6 +193,7 @@ private static void inspectFile( InspectConfig inspectConfig = InspectConfig.newBuilder() .addAllInfoTypes(infoTypes) + .addAllCustomInfoTypes(customInfoTypes) .setMinLikelihood(minLikelihood) .setLimits(findingLimits) .setIncludeQuote(includeQuote) @@ -242,6 +247,7 @@ private static void inspectGcsFile( String fileName, Likelihood minLikelihood, List infoTypes, + List customInfoTypes, int maxFindings, String topicId, String subscriptionId, @@ -266,6 +272,7 @@ private static void inspectGcsFile( InspectConfig inspectConfig = InspectConfig.newBuilder() .addAllInfoTypes(infoTypes) + .addAllCustomInfoTypes(customInfoTypes) .setMinLikelihood(minLikelihood) .setLimits(findingLimits) .build(); @@ -363,6 +370,7 @@ private static void inspectDatastore( String kind, Likelihood minLikelihood, List infoTypes, + List customInfoTypes, int maxFindings, String topicId, String subscriptionId) { @@ -388,6 +396,7 @@ private static void inspectDatastore( InspectConfig inspectConfig = InspectConfig.newBuilder() .addAllInfoTypes(infoTypes) + .addAllCustomInfoTypes(customInfoTypes) .setMinLikelihood(minLikelihood) .setLimits(findingLimits) .build(); @@ -486,6 +495,7 @@ private static void inspectBigquery( String tableId, Likelihood minLikelihood, List infoTypes, + List customInfoTypes, int maxFindings, String topicId, String subscriptionId) { @@ -511,6 +521,7 @@ private static void inspectBigquery( InspectConfig inspectConfig = InspectConfig.newBuilder() .addAllInfoTypes(infoTypes) + .addAllCustomInfoTypes(customInfoTypes) .setMinLikelihood(minLikelihood) .setLimits(findingLimits) .build(); @@ -629,6 +640,14 @@ public static void main(String[] args) throws Exception { infoTypesOption.setArgs(Option.UNLIMITED_VALUES); commandLineOptions.addOption(infoTypesOption); + Option customDictionariesOption = Option.builder("customDictionaries").hasArg(true).required(false).build(); + customDictionariesOption.setArgs(Option.UNLIMITED_VALUES); + commandLineOptions.addOption(customDictionariesOption); + + Option customRegexesOption = Option.builder("customRegexes").hasArg(true).required(false).build(); + customRegexesOption.setArgs(Option.UNLIMITED_VALUES); + commandLineOptions.addOption(customRegexesOption); + Option includeQuoteOption = Option.builder("includeQuote").hasArg(true).required(false).build(); commandLineOptions.addOption(includeQuoteOption); @@ -695,13 +714,41 @@ public static void main(String[] args) throws Exception { infoTypesList.add(InfoType.newBuilder().setName(infoType).build()); } } + + List customInfoTypesList = new ArrayList<>(); + if (cmd.hasOption(customDictionariesOption.getOpt())) { + String[] dictionaryStrings = cmd.getOptionValues(customDictionariesOption.getOpt()); + for (int i = 0; i < dictionaryStrings.length; i++) { + String[] dictionaryWords = dictionaryStrings[i].split(","); + CustomInfoType customInfoType = + CustomInfoType + .newBuilder() + .setInfoType(InfoType.newBuilder().setName(String.format("CUSTOM_DICTIONARY_%s", i))) + .setDictionary(Dictionary.newBuilder().setWordList(WordList.newBuilder().addAllWords(dictionaryWords))) + .build(); + customInfoTypesList.add(customInfoType); + } + } + if (cmd.hasOption(customRegexOptions.getOpt())) { + String[] patterns = cmd.getOptionValues(customRegexesOption.getOpt()); + for (int i = 0; i < patterns.length; i++) { + CustomInfoType customInfoType = + CustomInfoType + .newBuilder() + .setInfoType(InfoType.newBuilder().setName(String.format("CUSTOM_REGEX_%s", i))) + .setRegex(Regex.newBuilder().setPattern(patterns[i])) + .build(); + customInfoTypesList.add(customInfoType); + } + } + // string inspection if (cmd.hasOption("s")) { String val = cmd.getOptionValue(stringOption.getOpt()); inspectString(val, minLikelihood, maxFindings, infoTypesList, includeQuote, projectId); } else if (cmd.hasOption("f")) { String filePath = cmd.getOptionValue(fileOption.getOpt()); - inspectFile(filePath, minLikelihood, maxFindings, infoTypesList, includeQuote, projectId); + inspectFile(filePath, minLikelihood, maxFindings, infoTypesList, customInfoTypesList, includeQuote, projectId); // gcs file inspection } else if (cmd.hasOption("gcs")) { String bucketName = cmd.getOptionValue(bucketNameOption.getOpt()); @@ -711,6 +758,7 @@ public static void main(String[] args) throws Exception { fileName, minLikelihood, infoTypesList, + customInfoTypesList, maxFindings, topicId, subscriptionId, @@ -726,6 +774,7 @@ public static void main(String[] args) throws Exception { kind, minLikelihood, infoTypesList, + customInfoTypesList, maxFindings, topicId, subscriptionId); @@ -739,6 +788,7 @@ public static void main(String[] args) throws Exception { tableId, minLikelihood, infoTypesList, + customInfoTypesList, maxFindings, topicId, subscriptionId); From ff725cbcd98ed28877225c74a25f69db42c6f719 Mon Sep 17 00:00:00 2001 From: mwdaub Date: Wed, 6 Jun 2018 13:10:02 -0700 Subject: [PATCH 02/13] Add missing imports --- dlp/src/main/java/com/example/dlp/Inspect.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dlp/src/main/java/com/example/dlp/Inspect.java b/dlp/src/main/java/com/example/dlp/Inspect.java index ddcc2b2b896..bffb2daf3a2 100644 --- a/dlp/src/main/java/com/example/dlp/Inspect.java +++ b/dlp/src/main/java/com/example/dlp/Inspect.java @@ -28,6 +28,9 @@ import com.google.privacy.dlp.v2.ContentItem; import com.google.privacy.dlp.v2.CreateDlpJobRequest; import com.google.privacy.dlp.v2.CustomInfoType; +import com.google.privacy.dlp.v2.CustomInfoType.Dictionary; +import com.google.privacy.dlp.v2.CustomInfoType.Dictionary.WordList; +import com.google.privacy.dlp.v2.CustomInfoType.Regex; import com.google.privacy.dlp.v2.DatastoreOptions; import com.google.privacy.dlp.v2.DlpJob; import com.google.privacy.dlp.v2.Finding; From ecdb1b8eef7f84db4263707b8cff59d8e812594b Mon Sep 17 00:00:00 2001 From: mwdaub Date: Wed, 6 Jun 2018 13:35:59 -0700 Subject: [PATCH 03/13] Fix build errors --- dlp/src/main/java/com/example/dlp/Inspect.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dlp/src/main/java/com/example/dlp/Inspect.java b/dlp/src/main/java/com/example/dlp/Inspect.java index bffb2daf3a2..e1951d697c4 100644 --- a/dlp/src/main/java/com/example/dlp/Inspect.java +++ b/dlp/src/main/java/com/example/dlp/Inspect.java @@ -55,6 +55,7 @@ import java.net.URLConnection; import java.nio.file.Files; import java.nio.file.Paths; +import java.util.Arrays; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -727,12 +728,15 @@ public static void main(String[] args) throws Exception { CustomInfoType .newBuilder() .setInfoType(InfoType.newBuilder().setName(String.format("CUSTOM_DICTIONARY_%s", i))) - .setDictionary(Dictionary.newBuilder().setWordList(WordList.newBuilder().addAllWords(dictionaryWords))) + .setDictionary( + Dictionary + .newBuilder() + .setWordList(WordList.newBuilder().addAllWords(Arrays.asList(dictionaryWords)))) .build(); customInfoTypesList.add(customInfoType); } } - if (cmd.hasOption(customRegexOptions.getOpt())) { + if (cmd.hasOption(customRegexesOption.getOpt())) { String[] patterns = cmd.getOptionValues(customRegexesOption.getOpt()); for (int i = 0; i < patterns.length; i++) { CustomInfoType customInfoType = @@ -748,7 +752,7 @@ public static void main(String[] args) throws Exception { // string inspection if (cmd.hasOption("s")) { String val = cmd.getOptionValue(stringOption.getOpt()); - inspectString(val, minLikelihood, maxFindings, infoTypesList, includeQuote, projectId); + inspectString(val, minLikelihood, maxFindings, infoTypesList, customInfoTypesList, includeQuote, projectId); } else if (cmd.hasOption("f")) { String filePath = cmd.getOptionValue(fileOption.getOpt()); inspectFile(filePath, minLikelihood, maxFindings, infoTypesList, customInfoTypesList, includeQuote, projectId); From 1a13047881421b4527095010a1e8767001d62d3b Mon Sep 17 00:00:00 2001 From: mwdaub Date: Wed, 6 Jun 2018 13:45:16 -0700 Subject: [PATCH 04/13] Add tests for custom info types --- .../test/java/com/example/dlp/InspectIT.java | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/dlp/src/test/java/com/example/dlp/InspectIT.java b/dlp/src/test/java/com/example/dlp/InspectIT.java index 60f078dcb26..c3df85c0df9 100644 --- a/dlp/src/test/java/com/example/dlp/InspectIT.java +++ b/dlp/src/test/java/com/example/dlp/InspectIT.java @@ -64,6 +64,20 @@ public void testStringInspectionReturnsInfoTypes() throws Exception { assertThat(output, containsString("EMAIL_ADDRESS")); } + @Test + public void testStringInspectionReturnsCustomInfoTypes() throws Exception { + String text = + "\"My phone number is (234) 456-7890 and my email address is gary@somedomain.com\""; + Inspect.main( + new String[] { + "-s", text, "-customDictionaries", "gary@somedomain.com", "-customRegexes", "(\\d{3}) \\d{3}-\\d{4}" + }); + String output = bout.toString(); + + assertThat(output, containsString("CUSTOM_DICTIONARY_0")); + assertThat(output, containsString("CUSTOM_REGEX_0")); + } + @Test public void testTextFileInspectionReturnsInfoTypes() throws Exception { Inspect.main( @@ -75,6 +89,22 @@ public void testTextFileInspectionReturnsInfoTypes() throws Exception { assertThat(output, containsString("EMAIL_ADDRESS")); } + @Test + public void testTextFileInspectionReturnsCustomInfoTypes() throws Exception { + Inspect.main( + new String[] { + "-f", + "src/test/resources/test.txt", + "-customDictionaries", + "gary@somedomain.com", + "-customRegexes", + "(\\d{3}) \\d{3}-\\d{4}" + }); + String output = bout.toString(); + assertThat(output, containsString("CUSTOM_DICTIONARY_0")); + assertThat(output, containsString("CUSTOM_REGEX_0")); + } + @Test public void testImageFileInspectionReturnsInfoTypes() throws Exception { Inspect.main( From 783306992af45be059d0dec11c251d0262b033e3 Mon Sep 17 00:00:00 2001 From: mwdaub Date: Wed, 6 Jun 2018 14:02:56 -0700 Subject: [PATCH 05/13] Fix bad regexes. --- dlp/src/test/java/com/example/dlp/InspectIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dlp/src/test/java/com/example/dlp/InspectIT.java b/dlp/src/test/java/com/example/dlp/InspectIT.java index c3df85c0df9..5ed3f67c1e4 100644 --- a/dlp/src/test/java/com/example/dlp/InspectIT.java +++ b/dlp/src/test/java/com/example/dlp/InspectIT.java @@ -70,7 +70,7 @@ public void testStringInspectionReturnsCustomInfoTypes() throws Exception { "\"My phone number is (234) 456-7890 and my email address is gary@somedomain.com\""; Inspect.main( new String[] { - "-s", text, "-customDictionaries", "gary@somedomain.com", "-customRegexes", "(\\d{3}) \\d{3}-\\d{4}" + "-s", text, "-customDictionaries", "gary@somedomain.com", "-customRegexes", "\(\\d{3}\) \\d{3}-\\d{4}" }); String output = bout.toString(); @@ -98,7 +98,7 @@ public void testTextFileInspectionReturnsCustomInfoTypes() throws Exception { "-customDictionaries", "gary@somedomain.com", "-customRegexes", - "(\\d{3}) \\d{3}-\\d{4}" + "\(\\d{3}\) \\d{3}-\\d{4}" }); String output = bout.toString(); assertThat(output, containsString("CUSTOM_DICTIONARY_0")); From 973f5c02fb8e699ece980b7e8c2994073a4c1a55 Mon Sep 17 00:00:00 2001 From: mwdaub Date: Wed, 6 Jun 2018 14:03:59 -0700 Subject: [PATCH 06/13] Fix bad regexes, part 2. --- dlp/src/test/java/com/example/dlp/InspectIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dlp/src/test/java/com/example/dlp/InspectIT.java b/dlp/src/test/java/com/example/dlp/InspectIT.java index 5ed3f67c1e4..141a981bea3 100644 --- a/dlp/src/test/java/com/example/dlp/InspectIT.java +++ b/dlp/src/test/java/com/example/dlp/InspectIT.java @@ -70,7 +70,7 @@ public void testStringInspectionReturnsCustomInfoTypes() throws Exception { "\"My phone number is (234) 456-7890 and my email address is gary@somedomain.com\""; Inspect.main( new String[] { - "-s", text, "-customDictionaries", "gary@somedomain.com", "-customRegexes", "\(\\d{3}\) \\d{3}-\\d{4}" + "-s", text, "-customDictionaries", "gary@somedomain.com", "-customRegexes", "\\(\\d{3}\\) \\d{3}-\\d{4}" }); String output = bout.toString(); @@ -98,7 +98,7 @@ public void testTextFileInspectionReturnsCustomInfoTypes() throws Exception { "-customDictionaries", "gary@somedomain.com", "-customRegexes", - "\(\\d{3}\) \\d{3}-\\d{4}" + "\\(\\d{3}\\) \\d{3}-\\d{4}" }); String output = bout.toString(); assertThat(output, containsString("CUSTOM_DICTIONARY_0")); From 67d37028025ac620e865306e05b23654b51fa3d2 Mon Sep 17 00:00:00 2001 From: mwdaub Date: Wed, 6 Jun 2018 14:27:19 -0700 Subject: [PATCH 07/13] Update README.md Add custom info type flags and fix existing examples so they work with the V2 API. --- dlp/README.md | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/dlp/README.md b/dlp/README.md index 2a96a80446b..8cda47a223a 100644 --- a/dlp/README.md +++ b/dlp/README.md @@ -66,49 +66,50 @@ Options: -f, --maxFindings [number] [default: 0] maximum number of results to retrieve -q, --includeQuote [boolean] [default: true] include matching string in results - -t, --infoTypes restrict to limited set of infoTypes [ default: []] - [ eg. PHONE_NUMBER US_PASSPORT] + -t, --infoTypes set of infoTypes to search for [eg. PHONE_NUMBER US_PASSPORT] + -customDictionaries set of comma-separated dictionary words to search for as customInfoTypes + -customRegexes set of regex patterns to search for as customInfoTypes ``` ### Examples - Inspect a string: ``` - java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -s "My phone number is (123) 456-7890 and my email address is me@somedomain.com" + java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -s "My phone number is (123) 456-7890 and my email address is me@somedomain.com" --infoTypes PHONE_NUMBER EMAIL_ADDRESS + java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -s "My phone number is (123) 456-7890 and my email address is me@somedomain.com" -customDictionaries me@somedomain.com -customRegexes "\(\d{3}\) \d{3}-\d{4}" ``` - Inspect a local file (text / image): ``` - java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -f resources/test.txt - java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -f resources/test.png + java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -f src/test/resources/test.txt --infoTypes PHONE_NUMBER EMAIL_ADDRESS + java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -f src/test/resources/test.png --infoTypes PHONE_NUMBER EMAIL_ADDRESS ``` - Inspect a file on Google Cloud Storage: ``` - java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -gcs -bucketName my-bucket -fileName my-file.txt + java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -gcs -bucketName my-bucket -fileName my-file.txt --infoTypes PHONE_NUMBER EMAIL_ADDRESS ``` - Inspect a Google Cloud Datastore kind: ``` - java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -ds -kind my-kind + java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -ds -kind my-kind --infoTypes PHONE_NUMBER EMAIL_ADDRESS ``` -## Automatic redaction of sensitive data -[Automatic redaction](https://cloud.google.com/dlp/docs/classification-redaction) produces an output with sensitive data matches removed. +## Automatic redaction of sensitive data from images +[Automatic redaction](https://cloud.google.com/dlp/docs/redacting-sensitive-data-images) produces an output image with sensitive data matches removed. ``` Commands: - -s Source input string - -r String to replace detected info types + -f Source image file + -o Destination image file Options: --help Show help -minLikelihood choices: "LIKELIHOOD_UNSPECIFIED", "VERY_UNLIKELY", "UNLIKELY", "POSSIBLE", "LIKELY", "VERY_LIKELY"] [default: "LIKELIHOOD_UNSPECIFIED"] specifies the minimum reporting likelihood threshold. - -infoTypes restrict operation to limited set of info types [ default: []] - [ eg. PHONE_NUMBER US_PASSPORT] + -infoTypes set of infoTypes to search for [eg. PHONE_NUMBER US_PASSPORT] ``` ### Example -- Replace sensitive data in text with `_REDACTED_`: +- Redact phone numbers and email addresses from `test.png`: ``` - java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Redact -s "My phone number is (123) 456-7890 and my email address is me@somedomain.com" -r "_REDACTED_" + java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Redact -f src/test/resources/test.png -o test-redacted.png -infoTypes PHONE_NUMBER EMAIL_ADDRESS ``` ## Integration tests From 541920c6dd1cb74a4fe19ff6b08057517f738545 Mon Sep 17 00:00:00 2001 From: mwdaub Date: Wed, 6 Jun 2018 14:57:03 -0700 Subject: [PATCH 08/13] Fix import order --- dlp/src/main/java/com/example/dlp/Inspect.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlp/src/main/java/com/example/dlp/Inspect.java b/dlp/src/main/java/com/example/dlp/Inspect.java index e1951d697c4..619018c9ef4 100644 --- a/dlp/src/main/java/com/example/dlp/Inspect.java +++ b/dlp/src/main/java/com/example/dlp/Inspect.java @@ -55,8 +55,8 @@ import java.net.URLConnection; import java.nio.file.Files; import java.nio.file.Paths; -import java.util.Arrays; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.concurrent.TimeUnit; From 69d8d09b505398aceb134c8fded314cc91530aa6 Mon Sep 17 00:00:00 2001 From: mwdaub Date: Wed, 6 Jun 2018 15:43:28 -0700 Subject: [PATCH 09/13] Fix line length violations --- .../main/java/com/example/dlp/Inspect.java | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/dlp/src/main/java/com/example/dlp/Inspect.java b/dlp/src/main/java/com/example/dlp/Inspect.java index 619018c9ef4..8defeb657c7 100644 --- a/dlp/src/main/java/com/example/dlp/Inspect.java +++ b/dlp/src/main/java/com/example/dlp/Inspect.java @@ -644,11 +644,13 @@ public static void main(String[] args) throws Exception { infoTypesOption.setArgs(Option.UNLIMITED_VALUES); commandLineOptions.addOption(infoTypesOption); - Option customDictionariesOption = Option.builder("customDictionaries").hasArg(true).required(false).build(); + Option customDictionariesOption = + Option.builder("customDictionaries").hasArg(true).required(false).build(); customDictionariesOption.setArgs(Option.UNLIMITED_VALUES); commandLineOptions.addOption(customDictionariesOption); - Option customRegexesOption = Option.builder("customRegexes").hasArg(true).required(false).build(); + Option customRegexesOption = + Option.builder("customRegexes").hasArg(true).required(false).build(); customRegexesOption.setArgs(Option.UNLIMITED_VALUES); commandLineOptions.addOption(customRegexesOption); @@ -727,11 +729,15 @@ public static void main(String[] args) throws Exception { CustomInfoType customInfoType = CustomInfoType .newBuilder() - .setInfoType(InfoType.newBuilder().setName(String.format("CUSTOM_DICTIONARY_%s", i))) + .setInfoType( + InfoType.newBuilder().setName(String.format("CUSTOM_DICTIONARY_%s", i))) .setDictionary( Dictionary .newBuilder() - .setWordList(WordList.newBuilder().addAllWords(Arrays.asList(dictionaryWords)))) + .setWordList( + WordList + .newBuilder() + .addAllWords(Arrays.asList(dictionaryWords)))) .build(); customInfoTypesList.add(customInfoType); } @@ -752,10 +758,24 @@ public static void main(String[] args) throws Exception { // string inspection if (cmd.hasOption("s")) { String val = cmd.getOptionValue(stringOption.getOpt()); - inspectString(val, minLikelihood, maxFindings, infoTypesList, customInfoTypesList, includeQuote, projectId); + inspectString( + val, + minLikelihood, + maxFindings, + infoTypesList, + customInfoTypesList, + includeQuote, + projectId); } else if (cmd.hasOption("f")) { String filePath = cmd.getOptionValue(fileOption.getOpt()); - inspectFile(filePath, minLikelihood, maxFindings, infoTypesList, customInfoTypesList, includeQuote, projectId); + inspectFile( + filePath, + minLikelihood, + maxFindings, + infoTypesList, + customInfoTypesList, + includeQuote, + projectId); // gcs file inspection } else if (cmd.hasOption("gcs")) { String bucketName = cmd.getOptionValue(bucketNameOption.getOpt()); From 29bb0e95c6194aa4d99e762c7d4d1616e0b3dbaf Mon Sep 17 00:00:00 2001 From: mwdaub Date: Wed, 6 Jun 2018 15:45:10 -0700 Subject: [PATCH 10/13] Fix line length formatting violations --- .../test/java/com/example/dlp/InspectIT.java | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/dlp/src/test/java/com/example/dlp/InspectIT.java b/dlp/src/test/java/com/example/dlp/InspectIT.java index 141a981bea3..17814cfc395 100644 --- a/dlp/src/test/java/com/example/dlp/InspectIT.java +++ b/dlp/src/test/java/com/example/dlp/InspectIT.java @@ -70,7 +70,12 @@ public void testStringInspectionReturnsCustomInfoTypes() throws Exception { "\"My phone number is (234) 456-7890 and my email address is gary@somedomain.com\""; Inspect.main( new String[] { - "-s", text, "-customDictionaries", "gary@somedomain.com", "-customRegexes", "\\(\\d{3}\\) \\d{3}-\\d{4}" + "-s", + text, + "-customDictionaries", + "gary@somedomain.com", + "-customRegexes", + "\\(\\d{3}\\) \\d{3}-\\d{4}" }); String output = bout.toString(); @@ -82,7 +87,7 @@ public void testStringInspectionReturnsCustomInfoTypes() throws Exception { public void testTextFileInspectionReturnsInfoTypes() throws Exception { Inspect.main( new String[] { - "-f", "src/test/resources/test.txt", "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" + "-f", "src/test/resources/test.txt", "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" }); String output = bout.toString(); assertThat(output, containsString("PHONE_NUMBER")); @@ -93,12 +98,12 @@ public void testTextFileInspectionReturnsInfoTypes() throws Exception { public void testTextFileInspectionReturnsCustomInfoTypes() throws Exception { Inspect.main( new String[] { - "-f", - "src/test/resources/test.txt", - "-customDictionaries", - "gary@somedomain.com", - "-customRegexes", - "\\(\\d{3}\\) \\d{3}-\\d{4}" + "-f", + "src/test/resources/test.txt", + "-customDictionaries", + "gary@somedomain.com", + "-customRegexes", + "\\(\\d{3}\\) \\d{3}-\\d{4}" }); String output = bout.toString(); assertThat(output, containsString("CUSTOM_DICTIONARY_0")); From 92e5d313c28af0e26aa13f66d914bcd3c7f4e986 Mon Sep 17 00:00:00 2001 From: mwdaub Date: Wed, 13 Jun 2018 09:07:37 -0700 Subject: [PATCH 11/13] Fix broken character mask test DLP now requires the client to specify info types to search for when using DeID with wildcard info types. --- .../com/example/dlp/DeIdentification.java | 27 +++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/dlp/src/main/java/com/example/dlp/DeIdentification.java b/dlp/src/main/java/com/example/dlp/DeIdentification.java index cd6932454af..80789f0e096 100644 --- a/dlp/src/main/java/com/example/dlp/DeIdentification.java +++ b/dlp/src/main/java/com/example/dlp/DeIdentification.java @@ -81,7 +81,11 @@ public class DeIdentification { * @param projectId ID of Google Cloud project to run the API under. */ private static void deIdentifyWithMask( - String string, Character maskingCharacter, int numberToMask, String projectId) { + String string, + List infoTypes, + Character maskingCharacter, + int numberToMask, + String projectId) { // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { @@ -108,6 +112,11 @@ private static void deIdentifyWithMask( .addTransformations(infoTypeTransformationObject) .build(); + InspectConfig inspectConfig = + InspectConfig.newBuilder() + .addAllInfoTypes(infoTypes) + .build(); + DeidentifyConfig deidentifyConfig = DeidentifyConfig.newBuilder() .setInfoTypeTransformations(infoTypeTransformationArray) @@ -117,6 +126,7 @@ private static void deIdentifyWithMask( DeidentifyContentRequest request = DeidentifyContentRequest.newBuilder() .setParent(ProjectName.of(projectId).toString()) + .setInspectConfig(inspectConfig) .setDeidentifyConfig(deidentifyConfig) .setItem(contentItem) .build(); @@ -513,6 +523,10 @@ public static void main(String[] args) throws Exception { Options commandLineOptions = new Options(); commandLineOptions.addOptionGroup(optionsGroup); + Option infoTypesOption = Option.builder("infoTypes").hasArg(true).required(false).build(); + infoTypesOption.setArgs(Option.UNLIMITED_VALUES); + commandLineOptions.addOption(infoTypesOption); + Option maskingCharacterOption = Option.builder("maskingCharacter").hasArg(true).required(false).build(); commandLineOptions.addOption(maskingCharacterOption); @@ -575,12 +589,21 @@ public static void main(String[] args) throws Exception { String projectId = cmd.getOptionValue(projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + List infoTypesList = Collections.emptyList(); + if (cmd.hasOption(infoTypesOption.getOpt())) { + infoTypesList = new ArrayList<>(); + String[] infoTypes = cmd.getOptionValues(infoTypesOption.getOpt()); + for (String infoType : infoTypes) { + infoTypesList.add(InfoType.newBuilder().setName(infoType).build()); + } + } + if (cmd.hasOption("m")) { // deidentification with character masking int numberToMask = Integer.parseInt(cmd.getOptionValue(numberToMaskOption.getOpt(), "0")); char maskingCharacter = cmd.getOptionValue(maskingCharacterOption.getOpt(), "*").charAt(0); String val = cmd.getOptionValue(deidentifyMaskingOption.getOpt()); - deIdentifyWithMask(val, maskingCharacter, numberToMask, projectId); + deIdentifyWithMask(val, infoTypes, maskingCharacter, numberToMask, projectId); } else if (cmd.hasOption("f")) { // deidentification with FPE String wrappedKey = cmd.getOptionValue(wrappedKeyOption.getOpt()); From 6ab7b3e728a134702f2b574a6f997e90fb4c1252 Mon Sep 17 00:00:00 2001 From: mwdaub Date: Wed, 13 Jun 2018 09:12:49 -0700 Subject: [PATCH 12/13] Add SSN info type to tests --- dlp/src/test/java/com/example/dlp/DeIdentificationIT.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java index ec796c60bbb..9ec1d44ef6b 100644 --- a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java +++ b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java @@ -63,6 +63,7 @@ public void testDeidStringMasksCharacters() throws Exception { DeIdentification.main( new String[] { "-m", text, + "-infoTypes", "US_SOCIAL_SECURITY_NUMBER", "-maskingCharacter", "x", "-numberToMask", "5" }); @@ -79,6 +80,8 @@ public void testDeidReidFpe() throws Exception { new String[] { "-f", "\"" + text + "\"", + "-infoTypes", + "US_SOCIAL_SECURITY_NUMBER", "-wrappedKey", wrappedKey, "-keyName", From c04868ac7c1604f9fed73235a6b7debd7d288784 Mon Sep 17 00:00:00 2001 From: mwdaub Date: Wed, 13 Jun 2018 09:16:26 -0700 Subject: [PATCH 13/13] Add info types to DeID with FPE test --- .../main/java/com/example/dlp/DeIdentification.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/dlp/src/main/java/com/example/dlp/DeIdentification.java b/dlp/src/main/java/com/example/dlp/DeIdentification.java index 80789f0e096..7415cc79593 100644 --- a/dlp/src/main/java/com/example/dlp/DeIdentification.java +++ b/dlp/src/main/java/com/example/dlp/DeIdentification.java @@ -57,6 +57,7 @@ import java.time.format.DateTimeParseException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.stream.Collectors; import org.apache.commons.cli.CommandLine; @@ -157,6 +158,7 @@ private static void deIdentifyWithMask( */ private static void deIdentifyWithFpe( String string, + List infoTypes, FfxCommonNativeAlphabet alphabet, String keyName, String wrappedKey, @@ -198,6 +200,11 @@ private static void deIdentifyWithFpe( .addTransformations(infoTypeTransformationObject) .build(); + InspectConfig inspectConfig = + InspectConfig.newBuilder() + .addAllInfoTypes(infoTypes) + .build(); + // Create the deidentification request object DeidentifyConfig deidentifyConfig = DeidentifyConfig.newBuilder() @@ -207,6 +214,7 @@ private static void deIdentifyWithFpe( DeidentifyContentRequest request = DeidentifyContentRequest.newBuilder() .setParent(ProjectName.of(projectId).toString()) + .setInspectConfig(inspectConfig) .setDeidentifyConfig(deidentifyConfig) .setItem(contentItem) .build(); @@ -603,7 +611,7 @@ public static void main(String[] args) throws Exception { int numberToMask = Integer.parseInt(cmd.getOptionValue(numberToMaskOption.getOpt(), "0")); char maskingCharacter = cmd.getOptionValue(maskingCharacterOption.getOpt(), "*").charAt(0); String val = cmd.getOptionValue(deidentifyMaskingOption.getOpt()); - deIdentifyWithMask(val, infoTypes, maskingCharacter, numberToMask, projectId); + deIdentifyWithMask(val, infoTypesList, maskingCharacter, numberToMask, projectId); } else if (cmd.hasOption("f")) { // deidentification with FPE String wrappedKey = cmd.getOptionValue(wrappedKeyOption.getOpt()); @@ -614,7 +622,8 @@ public static void main(String[] args) throws Exception { FfxCommonNativeAlphabet.valueOf( cmd.getOptionValue( alphabetOption.getOpt(), FfxCommonNativeAlphabet.ALPHA_NUMERIC.name())); - deIdentifyWithFpe(val, alphabet, keyName, wrappedKey, projectId, surrogateType); + deIdentifyWithFpe( + val, infoTypesList, alphabet, keyName, wrappedKey, projectId, surrogateType); } else if (cmd.hasOption("d")) { //deidentify with date shift String inputCsv = cmd.getOptionValue(inputCsvPathOption.getOpt());