diff --git a/.github/workflows/dlp.yaml b/.github/workflows/dlp.yaml new file mode 100644 index 0000000000..ce39378e56 --- /dev/null +++ b/.github/workflows/dlp.yaml @@ -0,0 +1,85 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: dlp +on: + push: + branches: + - main + paths: + - 'dlp/**' + - '.github/workflows/dlp.yaml' + pull_request: + paths: + - 'dlp/**' + - '.github/workflows/dlp.yaml' + pull_request_target: + types: [labeled] + paths: + - 'dlp/**' + - '.github/workflows/dlp.yaml' + schedule: + - cron: '0 0 * * 0' +jobs: + test: + if: ${{ github.event.action != 'labeled' || github.event.label.name == 'actions:force-run' }} + runs-on: ubuntu-latest + timeout-minutes: 60 + permissions: + contents: 'write' + pull-requests: 'write' + id-token: 'write' + steps: + - uses: actions/checkout@v3.1.0 + with: + ref: ${{github.event.pull_request.head.sha}} + - uses: 'google-github-actions/auth@v1.0.0' + with: + workload_identity_provider: 'projects/1046198160504/locations/global/workloadIdentityPools/github-actions-pool/providers/github-actions-provider' + service_account: 'kokoro-system-test@long-door-651.iam.gserviceaccount.com' + create_credentials_file: 'true' + access_token_lifetime: 600s + - uses: actions/setup-node@v3.5.1 + with: + node-version: 16 + - run: npm install + working-directory: dlp + - run: npm test + working-directory: dlp + env: + MOCHA_REPORTER_SUITENAME: dlp + MOCHA_REPORTER_OUTPUT: dlp_sponge_log.xml + MOCHA_REPORTER: xunit + - if: ${{ github.event.action == 'labeled' && github.event.label.name == 'actions:force-run' }} + uses: actions/github-script@v6 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + try { + await github.rest.issues.removeLabel({ + name: 'actions:force-run', + owner: 'GoogleCloudPlatform', + repo: 'nodejs-docs-samples', + issue_number: context.payload.pull_request.number + }); + } catch (e) { + if (!e.message.includes('Label does not exist')) { + throw e; + } + } + - if: ${{ github.event_name == 'schedule' && always() }} + run: | + curl https://github.com/googleapis/repo-automation-bots/releases/download/flakybot-1.1.0/flakybot -o flakybot -s -L + chmod +x ./flakybot + ./flakybot --repo GoogleCloudPlatform/nodejs-docs-samples --commit_hash ${{github.sha}} --build_url https://github.com/${{github.repository}}/actions/runs/${{github.run_id}} diff --git a/.github/workflows/workflows.json b/.github/workflows/workflows.json index d5aa3308c5..cf42f958b4 100644 --- a/.github/workflows/workflows.json +++ b/.github/workflows/workflows.json @@ -42,6 +42,7 @@ "datastore/functions", "dialogflow", "dialogflow-cx", + "dlp", "document-ai", "endpoints/getting-started", "endpoints/getting-started-grpc", diff --git a/CODEOWNERS b/CODEOWNERS index 829e779fa2..ece349ab0f 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -33,6 +33,7 @@ secret-manager @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-sample security-center @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers service-directory @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers webrisk @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers +dlp @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers # DEE Platform Ops (DEEPO) container @GoogleCloudPlatform/dee-platform-ops @GoogleCloudPlatform/nodejs-samples-reviewers diff --git a/dlp/.eslintrc.yml b/dlp/.eslintrc.yml new file mode 100644 index 0000000000..b9e0cca8b0 --- /dev/null +++ b/dlp/.eslintrc.yml @@ -0,0 +1,5 @@ +--- +rules: + no-console: off + no-warning-comments: off + node/no-missing-require: off diff --git a/dlp/.gitignore b/dlp/.gitignore new file mode 100644 index 0000000000..df1525596c --- /dev/null +++ b/dlp/.gitignore @@ -0,0 +1,3 @@ +# Test outputs +*.actual.png +*.actual.csv diff --git a/dlp/categoricalRiskAnalysis.js b/dlp/categoricalRiskAnalysis.js new file mode 100644 index 0000000000..01243ca1c8 --- /dev/null +++ b/dlp/categoricalRiskAnalysis.js @@ -0,0 +1,160 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Categorical Risk Analysis +// description: Computes risk metrics of a column of data in a Google BigQuery table. +// usage: node categoricalRiskAnalysis.js my-project nhtsa_traffic_fatalities accident_2015 state_name my-topic my-subscription bigquery-public-data + +function main( + projectId, + tableProjectId, + datasetId, + tableId, + columnName, + topicId, + subscriptionId +) { + // [START dlp_categorical_stats] + // Import the Google Cloud client libraries + const DLP = require('@google-cloud/dlp'); + const {PubSub} = require('@google-cloud/pubsub'); + + // Instantiates clients + const dlp = new DLP.DlpServiceClient(); + const pubsub = new PubSub(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The project ID the table is stored under + // This may or (for public datasets) may not equal the calling project ID + // const tableProjectId = 'my-project'; + + // The ID of the dataset to inspect, e.g. 'my_dataset' + // const datasetId = 'my_dataset'; + + // The ID of the table to inspect, e.g. 'my_table' + // const tableId = 'my_table'; + + // The name of the Pub/Sub topic to notify once the job completes + // TODO(developer): create a Pub/Sub topic to use for this + // const topicId = 'MY-PUBSUB-TOPIC' + + // The name of the Pub/Sub subscription to use when listening for job + // completion notifications + // TODO(developer): create a Pub/Sub subscription to use for this + // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' + + // The name of the column to compute risk metrics for, e.g. 'firstName' + // const columnName = 'firstName'; + async function categoricalRiskAnalysis() { + const sourceTable = { + projectId: tableProjectId, + datasetId: datasetId, + tableId: tableId, + }; + + // Construct request for creating a risk analysis job + const request = { + parent: `projects/${projectId}/locations/global`, + riskJob: { + privacyMetric: { + categoricalStatsConfig: { + field: { + name: columnName, + }, + }, + }, + sourceTable: sourceTable, + actions: [ + { + pubSub: { + topic: `projects/${projectId}/topics/${topicId}`, + }, + }, + ], + }, + }; + + // Create helper function for unpacking values + const getValue = obj => obj[Object.keys(obj)[0]]; + + // Run risk analysis job + const [topicResponse] = await pubsub.topic(topicId).get(); + const subscription = await topicResponse.subscription(subscriptionId); + const [jobsResponse] = await dlp.createDlpJob(request); + const jobName = jobsResponse.name; + // Watch the Pub/Sub topic until the DLP job finishes + await new Promise((resolve, reject) => { + const messageHandler = message => { + if (message.attributes && message.attributes.DlpJobName === jobName) { + message.ack(); + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + resolve(jobName); + } else { + message.nack(); + } + }; + + const errorHandler = err => { + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + reject(err); + }; + + subscription.on('message', messageHandler); + subscription.on('error', errorHandler); + }); + setTimeout(() => { + console.log(' Waiting for DLP job to fully complete'); + }, 500); + const [job] = await dlp.getDlpJob({name: jobName}); + const histogramBuckets = + job.riskDetails.categoricalStatsResult.valueFrequencyHistogramBuckets; + histogramBuckets.forEach((histogramBucket, histogramBucketIdx) => { + console.log(`Bucket ${histogramBucketIdx}:`); + + // Print bucket stats + console.log( + ` Most common value occurs ${histogramBucket.valueFrequencyUpperBound} time(s)` + ); + console.log( + ` Least common value occurs ${histogramBucket.valueFrequencyLowerBound} time(s)` + ); + + // Print bucket values + console.log(`${histogramBucket.bucketSize} unique values total.`); + histogramBucket.bucketValues.forEach(valueBucket => { + console.log( + ` Value ${getValue(valueBucket.value)} occurs ${ + valueBucket.count + } time(s).` + ); + }); + }); + } + + categoricalRiskAnalysis(); + // [END dlp_categorical_stats] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/createInspectTemplate.js b/dlp/createInspectTemplate.js new file mode 100644 index 0000000000..5e13e694c3 --- /dev/null +++ b/dlp/createInspectTemplate.js @@ -0,0 +1,102 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Inspect Templates +// description: Create a new DLP inspection configuration template. +// usage: node createInspectTemplate.js my-project VERY_LIKELY PERSON_NAME 5 false my-template-id + +function main( + projectId, + templateId, + displayName, + infoTypes, + includeQuote, + minLikelihood, + maxFindings +) { + infoTypes = transformCLI(infoTypes); + // [START dlp_create_inspect_template] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The minimum likelihood required before returning a match + // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; + + // The maximum number of findings to report per request (0 = server maximum) + // const maxFindings = 0; + + // The infoTypes of information to match + // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; + + // Whether to include the matching string + // const includeQuote = true; + + // (Optional) The name of the template to be created. + // const templateId = 'my-template'; + + // (Optional) The human-readable name to give the template + // const displayName = 'My template'; + + async function createInspectTemplate() { + // Construct the inspection configuration for the template + const inspectConfig = { + infoTypes: infoTypes, + minLikelihood: minLikelihood, + includeQuote: includeQuote, + limits: { + maxFindingsPerRequest: maxFindings, + }, + }; + + // Construct template-creation request + const request = { + parent: `projects/${projectId}/locations/global`, + inspectTemplate: { + inspectConfig: inspectConfig, + displayName: displayName, + }, + templateId: templateId, + }; + + const [response] = await dlp.createInspectTemplate(request); + const templateName = response.name; + console.log(`Successfully created template ${templateName}.`); + } + createInspectTemplate(); + // [END dlp_create_inspect_template] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(infoTypes) { + infoTypes = infoTypes + ? infoTypes.split(',').map(type => { + return {name: type}; + }) + : undefined; + return infoTypes; +} diff --git a/dlp/createTrigger.js b/dlp/createTrigger.js new file mode 100644 index 0000000000..f4f338d4f4 --- /dev/null +++ b/dlp/createTrigger.js @@ -0,0 +1,138 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Job Triggers +// description: Create a Data Loss Prevention API job trigger. +// usage: node createTrigger.js my-project triggerId displayName description bucketName autoPopulateTimespan scanPeriod infoTypes minLikelihood maxFindings + +function main( + projectId, + triggerId, + displayName, + description, + bucketName, + autoPopulateTimespan, + scanPeriod, + infoTypes, + minLikelihood, + maxFindings +) { + infoTypes = transformCLI(infoTypes); + // [START dlp_create_trigger] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // (Optional) The name of the trigger to be created. + // const triggerId = 'my-trigger'; + + // (Optional) A display name for the trigger to be created + // const displayName = 'My Trigger'; + + // (Optional) A description for the trigger to be created + // const description = "This is a sample trigger."; + + // The name of the bucket to scan. + // const bucketName = 'YOUR-BUCKET'; + + // Limit scan to new content only. + // const autoPopulateTimespan = true; + + // How often to wait between scans, in days (minimum = 1 day) + // const scanPeriod = 1; + + // The infoTypes of information to match + // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; + + // The minimum likelihood required before returning a match + // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; + + // The maximum number of findings to report per request (0 = server maximum) + // const maxFindings = 0; + + async function createTrigger() { + // Get reference to the bucket to be inspected + const storageItem = { + cloudStorageOptions: { + fileSet: {url: `gs://${bucketName}/*`}, + }, + timeSpanConfig: { + enableAutoPopulationOfTimespanConfig: autoPopulateTimespan, + }, + }; + + // Construct job to be triggered + const job = { + inspectConfig: { + infoTypes: infoTypes, + minLikelihood: minLikelihood, + limits: { + maxFindingsPerRequest: maxFindings, + }, + }, + storageConfig: storageItem, + }; + + // Construct trigger creation request + const request = { + parent: `projects/${projectId}/locations/global`, + jobTrigger: { + inspectJob: job, + displayName: displayName, + description: description, + triggers: [ + { + schedule: { + recurrencePeriodDuration: { + seconds: scanPeriod * 60 * 60 * 24, // Trigger the scan daily + }, + }, + }, + ], + status: 'HEALTHY', + }, + triggerId: triggerId, + }; + + // Run trigger creation request + const [trigger] = await dlp.createJobTrigger(request); + console.log(`Successfully created trigger ${trigger.name}.`); + } + + createTrigger(); + // [END dlp_create_trigger] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(infoTypes) { + infoTypes = infoTypes + ? infoTypes.split(',').map(type => { + return {name: type}; + }) + : undefined; + return infoTypes; +} diff --git a/dlp/deidentifyWithDateShift.js b/dlp/deidentifyWithDateShift.js new file mode 100644 index 0000000000..e13d96425d --- /dev/null +++ b/dlp/deidentifyWithDateShift.js @@ -0,0 +1,191 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Deidentify with Date Shift +// description: Deidentify dates in a CSV file by pseudorandomly shifting them. +// usage: node deidentifyWithDateShift.js my-project dates.csv dates-shifted.csv 30 30 birth_date register_date [ projects/my-project/locations/global/keyrings/my-keyring] + +function main( + projectId, + inputCsvFile, + outputCsvFile, + dateFields, + lowerBoundDays, + upperBoundDays, + contextFieldId, + wrappedKey, + keyName +) { + dateFields = transformCLI(dateFields); + // [START dlp_deidentify_date_shift] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // Import other required libraries + const fs = require('fs'); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The path to the CSV file to deidentify + // The first row of the file must specify column names, and all other rows + // must contain valid values + // const inputCsvFile = '/path/to/input/file.csv'; + + // The path to save the date-shifted CSV file to + // const outputCsvFile = '/path/to/output/file.csv'; + + // The list of (date) fields in the CSV file to date shift + // const dateFields = [{ name: 'birth_date'}, { name: 'register_date' }]; + + // The maximum number of days to shift a date backward + // const lowerBoundDays = 1; + + // The maximum number of days to shift a date forward + // const upperBoundDays = 1; + + // (Optional) The column to determine date shift amount based on + // If this is not specified, a random shift amount will be used for every row + // If this is specified, then 'wrappedKey' and 'keyName' must also be set + // const contextFieldId = [{ name: 'user_id' }]; + + // (Optional) The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key + // If this is specified, then 'wrappedKey' and 'contextFieldId' must also be set + // const keyName = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'; + + // (Optional) The encrypted ('wrapped') AES-256 key to use when shifting dates + // This key should be encrypted using the Cloud KMS key specified above + // If this is specified, then 'keyName' and 'contextFieldId' must also be set + // const wrappedKey = 'YOUR_ENCRYPTED_AES_256_KEY' + + // Helper function for converting CSV rows to Protobuf types + const rowToProto = row => { + const values = row.split(','); + const convertedValues = values.map(value => { + if (Date.parse(value)) { + const date = new Date(value); + return { + dateValue: { + year: date.getFullYear(), + month: date.getMonth() + 1, + day: date.getDate(), + }, + }; + } else { + // Convert all non-date values to strings + return {stringValue: value.toString()}; + } + }); + return {values: convertedValues}; + }; + + async function deidentifyWithDateShift() { + // Read and parse a CSV file + const csvLines = fs + .readFileSync(inputCsvFile) + .toString() + .split('\n') + .filter(line => line.includes(',')); + const csvHeaders = csvLines[0].split(','); + const csvRows = csvLines.slice(1); + + // Construct the table object + const tableItem = { + table: { + headers: csvHeaders.map(header => { + return {name: header}; + }), + rows: csvRows.map(row => rowToProto(row)), + }, + }; + + // Construct DateShiftConfig + const dateShiftConfig = { + lowerBoundDays: lowerBoundDays, + upperBoundDays: upperBoundDays, + }; + + if (contextFieldId && keyName && wrappedKey) { + dateShiftConfig.context = {name: contextFieldId}; + dateShiftConfig.cryptoKey = { + kmsWrapped: { + wrappedKey: wrappedKey, + cryptoKeyName: keyName, + }, + }; + } else if (contextFieldId || keyName || wrappedKey) { + throw new Error( + 'You must set either ALL or NONE of {contextFieldId, keyName, wrappedKey}!' + ); + } + + // Construct deidentification request + const request = { + parent: `projects/${projectId}/locations/global`, + deidentifyConfig: { + recordTransformations: { + fieldTransformations: [ + { + fields: dateFields, + primitiveTransformation: { + dateShiftConfig: dateShiftConfig, + }, + }, + ], + }, + }, + item: tableItem, + }; + + // Run deidentification request + const [response] = await dlp.deidentifyContent(request); + const tableRows = response.item.table.rows; + + // Write results to a CSV file + tableRows.forEach((row, rowIndex) => { + const rowValues = row.values.map( + value => + value.stringValue || + `${value.dateValue.month}/${value.dateValue.day}/${value.dateValue.year}` + ); + csvLines[rowIndex + 1] = rowValues.join(','); + }); + csvLines.push(''); + fs.writeFileSync(outputCsvFile, csvLines.join('\n')); + + // Print status + console.log(`Successfully saved date-shift output to ${outputCsvFile}`); + } + + deidentifyWithDateShift(); + // [END dlp_deidentify_date_shift] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(dateFields) { + return (dateFields = dateFields.split(',').map(type => { + return {name: type}; + })); +} diff --git a/dlp/deidentifyWithFpe.js b/dlp/deidentifyWithFpe.js new file mode 100644 index 0000000000..015b24d7ad --- /dev/null +++ b/dlp/deidentifyWithFpe.js @@ -0,0 +1,101 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Deidentify with FPE +// description: Deidentify sensitive data in a string using Format Preserving Encryption (FPE). +// usage: node deidentifyWithFpe.js my-project "My SSN is 372819127" projects/my-project/locations/global/keyrings/my-keyring SSN_TOKEN + +function main(projectId, string, alphabet, keyName, wrappedKey, surrogateType) { + // [START dlp_deidentify_fpe] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The string to deidentify + // const string = 'My SSN is 372819127'; + + // The set of characters to replace sensitive ones with + // For more information, see https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#ffxcommonnativealphabet + // const alphabet = 'ALPHA_NUMERIC'; + + // The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key + // const keyName = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'; + + // The encrypted ('wrapped') AES-256 key to use + // This key should be encrypted using the Cloud KMS key specified above + // const wrappedKey = 'YOUR_ENCRYPTED_AES_256_KEY' + + // (Optional) The name of the surrogate custom info type to use + // Only necessary if you want to reverse the deidentification process + // Can be essentially any arbitrary string, as long as it doesn't appear + // in your dataset otherwise. + // const surrogateType = 'SOME_INFO_TYPE_DEID'; + + async function deidentifyWithFpe() { + // Construct FPE config + const cryptoReplaceFfxFpeConfig = { + cryptoKey: { + kmsWrapped: { + wrappedKey: wrappedKey, + cryptoKeyName: keyName, + }, + }, + commonAlphabet: alphabet, + }; + if (surrogateType) { + cryptoReplaceFfxFpeConfig.surrogateInfoType = { + name: surrogateType, + }; + } + + // Construct deidentification request + const item = {value: string}; + const request = { + parent: `projects/${projectId}/locations/global`, + deidentifyConfig: { + infoTypeTransformations: { + transformations: [ + { + primitiveTransformation: { + cryptoReplaceFfxFpeConfig: cryptoReplaceFfxFpeConfig, + }, + }, + ], + }, + }, + item: item, + }; + + // Run deidentification request + const [response] = await dlp.deidentifyContent(request); + const deidentifiedItem = response.item; + console.log(deidentifiedItem.value); + } + deidentifyWithFpe(); + // [END dlp_deidentify_fpe] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/deidentifyWithMask.js b/dlp/deidentifyWithMask.js new file mode 100644 index 0000000000..6e6df7e7ef --- /dev/null +++ b/dlp/deidentifyWithMask.js @@ -0,0 +1,80 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Deidentify with Mask +// description: Deidentify sensitive data in a string by masking it with a character. +// usage: node deidentifyWithMask.js my-project string maskingCharacter numberToMask + +function main(projectId, string, maskingCharacter, numberToMask) { + // [START dlp_deidentify_masking] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project-id'; + + // The string to deidentify + // const string = 'My SSN is 372819127'; + + // (Optional) The maximum number of sensitive characters to mask in a match + // If omitted from the request or set to 0, the API will mask any matching characters + // const numberToMask = 5; + + // (Optional) The character to mask matching sensitive data with + // const maskingCharacter = 'x'; + + // Construct deidentification request + const item = {value: string}; + + async function deidentifyWithMask() { + const request = { + parent: `projects/${projectId}/locations/global`, + deidentifyConfig: { + infoTypeTransformations: { + transformations: [ + { + primitiveTransformation: { + characterMaskConfig: { + maskingCharacter: maskingCharacter, + numberToMask: numberToMask, + }, + }, + }, + ], + }, + }, + item: item, + }; + + // Run deidentification request + const [response] = await dlp.deidentifyContent(request); + const deidentifiedItem = response.item; + console.log(deidentifiedItem.value); + } + + deidentifyWithMask(); + // [END dlp_deidentify_masking] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/deidentifyWithReplacement.js b/dlp/deidentifyWithReplacement.js new file mode 100644 index 0000000000..84ead95f20 --- /dev/null +++ b/dlp/deidentifyWithReplacement.js @@ -0,0 +1,76 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Deidentify with Replacement +// description: Deidentify sensitive data in a string by replacing it with a given replacement string. +// usage: node deidentifyWithMask.js my-project string replacement + +function main(projectId, string, replacement) { + // [START dlp_deidentify_replace] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The string to deidentify + // const string = 'My SSN is 372819127'; + + // The string to replace sensitive information with + // const replacement = "[REDACTED]" + + async function deidentifyWithReplacement() { + // Construct deidentification request + const item = {value: string}; + const request = { + parent: `projects/${projectId}/locations/global`, + deidentifyConfig: { + infoTypeTransformations: { + transformations: [ + { + primitiveTransformation: { + replaceConfig: { + newValue: { + stringValue: replacement, + }, + }, + }, + }, + ], + }, + }, + item: item, + }; + + // Run deidentification request + const [response] = await dlp.deidentifyContent(request); + const deidentifiedItem = response.item; + console.log(deidentifiedItem.value); + } + + deidentifyWithReplacement(); + // [END dlp_deidentify_replace] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/deleteInspectTemplate.js b/dlp/deleteInspectTemplate.js new file mode 100644 index 0000000000..61a8021151 --- /dev/null +++ b/dlp/deleteInspectTemplate.js @@ -0,0 +1,55 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Delete Inspect Templates +// description: Delete the DLP inspection configuration template with the specified name. +// usage: node deleteInspectTemplates.js my-project projects/my-project/inspectTemplates/##### + +function main(projectId, templateName) { + // [START dlp_delete_inspect_template] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The name of the template to delete + // Parent project ID is automatically extracted from this parameter + // const templateName = 'projects/YOUR_PROJECT_ID/inspectTemplates/#####' + async function deleteInspectTemplate() { + // Construct template-deletion request + const request = { + name: templateName, + }; + + // Run template-deletion request + await dlp.deleteInspectTemplate(request); + console.log(`Successfully deleted template ${templateName}.`); + } + + deleteInspectTemplate(); + // [END dlp_delete_inspect_template] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/deleteJob.js b/dlp/deleteJob.js new file mode 100644 index 0000000000..78202468e2 --- /dev/null +++ b/dlp/deleteJob.js @@ -0,0 +1,59 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: Delete Job +// description: Delete results of a Data Loss Prevention API job. +// usage: node deleteJob.js my-project projects/YOUR_GCLOUD_PROJECT/dlpJobs/X-##### + +function main(projectId, jobName) { + // [START dlp_delete_job] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The name of the job whose results should be deleted + // Parent project ID is automatically extracted from this parameter + // const jobName = 'projects/my-project/dlpJobs/X-#####' + + function deleteJob() { + // Construct job deletion request + const request = { + name: jobName, + }; + + // Run job deletion request + dlp + .deleteDlpJob(request) + .then(() => { + console.log(`Successfully deleted job ${jobName}.`); + }) + .catch(err => { + console.log(`Error in deleteJob: ${err.message || err}`); + }); + } + + deleteJob(); + // [END dlp_delete_job] +} +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/deleteTrigger.js b/dlp/deleteTrigger.js new file mode 100644 index 0000000000..9fca52f798 --- /dev/null +++ b/dlp/deleteTrigger.js @@ -0,0 +1,54 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: Delete Trigger +// description: Delete results of a Data Loss Prevention API job. +// usage: node deleteTrigger.js my-rpoject projects/my-project/jobTriggers/my-trigger + +function main(projectId, triggerId) { + // [START dlp_delete_trigger] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project' + + // The name of the trigger to be deleted + // Parent project ID is automatically extracted from this parameter + // const triggerId = 'projects/my-project/triggers/my-trigger'; + + async function deleteTrigger() { + // Construct trigger deletion request + const request = { + name: triggerId, + }; + + // Run trigger deletion request + await dlp.deleteJobTrigger(request); + console.log(`Successfully deleted trigger ${triggerId}.`); + } + + deleteTrigger(); + // [END dlp_delete_trigger] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/inspectBigQuery.js b/dlp/inspectBigQuery.js new file mode 100644 index 0000000000..32d2950af4 --- /dev/null +++ b/dlp/inspectBigQuery.js @@ -0,0 +1,195 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: Inspect Bigquery +// description: Inspects a BigQuery table using the Data Loss Prevention API using Pub/Sub for job notifications. +// usage: node inspectBigQuery.js my-project dataProjectId datasetId tableId topicId subscriptionId minLikelihood maxFindings infoTypes customInfoTypes + +function main( + projectId, + dataProjectId, + datasetId, + tableId, + topicId, + subscriptionId, + minLikelihood, + maxFindings, + infoTypes, + customInfoTypes +) { + [infoTypes, customInfoTypes] = transformCLI(infoTypes, customInfoTypes); + + // [START dlp_inspect_bigquery] + // Import the Google Cloud client libraries + const DLP = require('@google-cloud/dlp'); + const {PubSub} = require('@google-cloud/pubsub'); + + // Instantiates clients + const dlp = new DLP.DlpServiceClient(); + const pubsub = new PubSub(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The project ID the table is stored under + // This may or (for public datasets) may not equal the calling project ID + // const dataProjectId = 'my-project'; + + // The ID of the dataset to inspect, e.g. 'my_dataset' + // const datasetId = 'my_dataset'; + + // The ID of the table to inspect, e.g. 'my_table' + // const tableId = 'my_table'; + + // The minimum likelihood required before returning a match + // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; + + // The maximum number of findings to report per request (0 = server maximum) + // const maxFindings = 0; + + // The infoTypes of information to match + // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; + + // The customInfoTypes of information to match + // const customInfoTypes = [{ infoType: { name: 'DICT_TYPE' }, dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, + // { infoType: { name: 'REGEX_TYPE' }, regex: {pattern: '\\(\\d{3}\\) \\d{3}-\\d{4}'}}]; + + // The name of the Pub/Sub topic to notify once the job completes + // TODO(developer): create a Pub/Sub topic to use for this + // const topicId = 'MY-PUBSUB-TOPIC' + + // The name of the Pub/Sub subscription to use when listening for job + // completion notifications + // TODO(developer): create a Pub/Sub subscription to use for this + // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' + + async function inspectBigquery() { + // Construct item to be inspected + const storageItem = { + bigQueryOptions: { + tableReference: { + projectId: dataProjectId, + datasetId: datasetId, + tableId: tableId, + }, + }, + }; + + // Construct request for creating an inspect job + const request = { + parent: `projects/${projectId}/locations/global`, + inspectJob: { + inspectConfig: { + infoTypes: infoTypes, + customInfoTypes: customInfoTypes, + minLikelihood: minLikelihood, + limits: { + maxFindingsPerRequest: maxFindings, + }, + }, + storageConfig: storageItem, + actions: [ + { + pubSub: { + topic: `projects/${projectId}/topics/${topicId}`, + }, + }, + ], + }, + }; + + // Run inspect-job creation request + const [topicResponse] = await pubsub.topic(topicId).get(); + // Verify the Pub/Sub topic and listen for job notifications via an + // existing subscription. + const subscription = await topicResponse.subscription(subscriptionId); + const [jobsResponse] = await dlp.createDlpJob(request); + const jobName = jobsResponse.name; + // Watch the Pub/Sub topic until the DLP job finishes + await new Promise((resolve, reject) => { + const messageHandler = message => { + if (message.attributes && message.attributes.DlpJobName === jobName) { + message.ack(); + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + resolve(jobName); + } else { + message.nack(); + } + }; + + const errorHandler = err => { + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + reject(err); + }; + + subscription.on('message', messageHandler); + subscription.on('error', errorHandler); + }); + // Wait for DLP job to fully complete + setTimeout(() => { + console.log('Waiting for DLP job to fully complete'); + }, 500); + const [job] = await dlp.getDlpJob({name: jobName}); + console.log(`Job ${job.name} status: ${job.state}`); + + const infoTypeStats = job.inspectDetails.result.infoTypeStats; + if (infoTypeStats.length > 0) { + infoTypeStats.forEach(infoTypeStat => { + console.log( + ` Found ${infoTypeStat.count} instance(s) of infoType ${infoTypeStat.infoType.name}.` + ); + }); + } else { + console.log('No findings.'); + } + } + + inspectBigquery(); + // [END dlp_inspect_bigquery] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(infoTypes, customInfoTypes) { + infoTypes = infoTypes + ? infoTypes.split(',').map(type => { + return {name: type}; + }) + : undefined; + + if (customInfoTypes) { + customInfoTypes = customInfoTypes.includes(',') + ? customInfoTypes.split(',').map((dict, idx) => { + return { + infoType: {name: 'CUSTOM_DICT_'.concat(idx.toString())}, + dictionary: {wordList: {words: dict.split(',')}}, + }; + }) + : customInfoTypes.split(',').map((rgx, idx) => { + return { + infoType: {name: 'CUSTOM_REGEX_'.concat(idx.toString())}, + regex: {pattern: rgx}, + }; + }); + } + + return [infoTypes, customInfoTypes]; +} diff --git a/dlp/inspectDatastore.js b/dlp/inspectDatastore.js new file mode 100644 index 0000000000..e6674332d6 --- /dev/null +++ b/dlp/inspectDatastore.js @@ -0,0 +1,198 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Inspect Datastore +// description: Inspect a Datastore instance using the Data Loss Prevention API using Pub/Sub for job notifications. +// usage: node inspectDatastore.js my-project dataProjectId namespaceId kind topicId subscriptionId minLikelihood maxFindings infoTypes customInfoTypes + +function main( + projectId, + dataProjectId, + namespaceId, + kind, + topicId, + subscriptionId, + minLikelihood, + maxFindings, + infoTypes, + customInfoTypes +) { + [infoTypes, customInfoTypes] = transformCLI(infoTypes, customInfoTypes); + + // [START dlp_inspect_datastore] + // Import the Google Cloud client libraries + const DLP = require('@google-cloud/dlp'); + const {PubSub} = require('@google-cloud/pubsub'); + + // Instantiates clients + const dlp = new DLP.DlpServiceClient(); + const pubsub = new PubSub(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The project ID the target Datastore is stored under + // This may or may not equal the calling project ID + // const dataProjectId = 'my-project'; + + // (Optional) The ID namespace of the Datastore document to inspect. + // To ignore Datastore namespaces, set this to an empty string ('') + // const namespaceId = ''; + + // The kind of the Datastore entity to inspect. + // const kind = 'Person'; + + // The minimum likelihood required before returning a match + // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; + + // The maximum number of findings to report per request (0 = server maximum) + // const maxFindings = 0; + + // The infoTypes of information to match + // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; + + // The customInfoTypes of information to match + // const customInfoTypes = [{ infoType: { name: 'DICT_TYPE' }, dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, + // { infoType: { name: 'REGEX_TYPE' }, regex: {pattern: '\\(\\d{3}\\) \\d{3}-\\d{4}'}}]; + + // The name of the Pub/Sub topic to notify once the job completes + // TODO(developer): create a Pub/Sub topic to use for this + // const topicId = 'MY-PUBSUB-TOPIC' + + // The name of the Pub/Sub subscription to use when listening for job + // completion notifications + // TODO(developer): create a Pub/Sub subscription to use for this + // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' + + async function inspectDatastore() { + // Construct items to be inspected + const storageItems = { + datastoreOptions: { + partitionId: { + projectId: dataProjectId, + namespaceId: namespaceId, + }, + kind: { + name: kind, + }, + }, + }; + + // Construct request for creating an inspect job + const request = { + parent: `projects/${projectId}/locations/global`, + inspectJob: { + inspectConfig: { + infoTypes: infoTypes, + customInfoTypes: customInfoTypes, + minLikelihood: minLikelihood, + limits: { + maxFindingsPerRequest: maxFindings, + }, + }, + storageConfig: storageItems, + actions: [ + { + pubSub: { + topic: `projects/${projectId}/topics/${topicId}`, + }, + }, + ], + }, + }; + // Run inspect-job creation request + const [topicResponse] = await pubsub.topic(topicId).get(); + // Verify the Pub/Sub topic and listen for job notifications via an + // existing subscription. + const subscription = await topicResponse.subscription(subscriptionId); + const [jobsResponse] = await dlp.createDlpJob(request); + const jobName = jobsResponse.name; + // Watch the Pub/Sub topic until the DLP job finishes + await new Promise((resolve, reject) => { + const messageHandler = message => { + if (message.attributes && message.attributes.DlpJobName === jobName) { + message.ack(); + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + resolve(jobName); + } else { + message.nack(); + } + }; + + const errorHandler = err => { + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + reject(err); + }; + + subscription.on('message', messageHandler); + subscription.on('error', errorHandler); + }); + // Wait for DLP job to fully complete + setTimeout(() => { + console.log('Waiting for DLP job to fully complete'); + }, 500); + const [job] = await dlp.getDlpJob({name: jobName}); + console.log(`Job ${job.name} status: ${job.state}`); + + const infoTypeStats = job.inspectDetails.result.infoTypeStats; + if (infoTypeStats.length > 0) { + infoTypeStats.forEach(infoTypeStat => { + console.log( + ` Found ${infoTypeStat.count} instance(s) of infoType ${infoTypeStat.infoType.name}.` + ); + }); + } else { + console.log('No findings.'); + } + } + inspectDatastore(); + // [END dlp_inspect_datastore] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(infoTypes, customInfoTypes) { + infoTypes = infoTypes + ? infoTypes.split(',').map(type => { + return {name: type}; + }) + : undefined; + + if (customInfoTypes) { + customInfoTypes = customInfoTypes.includes(',') + ? customInfoTypes.split(',').map((dict, idx) => { + return { + infoType: {name: 'CUSTOM_DICT_'.concat(idx.toString())}, + dictionary: {wordList: {words: dict.split(',')}}, + }; + }) + : customInfoTypes.split(',').map((rgx, idx) => { + return { + infoType: {name: 'CUSTOM_REGEX_'.concat(idx.toString())}, + regex: {pattern: rgx}, + }; + }); + } + + return [infoTypes, customInfoTypes]; +} diff --git a/dlp/inspectFile.js b/dlp/inspectFile.js new file mode 100644 index 0000000000..26da96e5a7 --- /dev/null +++ b/dlp/inspectFile.js @@ -0,0 +1,143 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: Inspect File +// description: Inspects a local text, PNG, or JPEG file using the Data Loss Prevention API. +// usage: node inspectFile.js my-project filepath minLikelihood maxFindings infoTypes customInfoTypes includeQuote + +function main( + projectId, + filepath, + minLikelihood, + maxFindings, + infoTypes, + customInfoTypes, + includeQuote +) { + [infoTypes, customInfoTypes] = transformCLI(infoTypes, customInfoTypes); + + // [START dlp_inspect_file] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Import other required libraries + const fs = require('fs'); + const mime = require('mime'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The path to a local file to inspect. Can be a text, JPG, or PNG file. + // const filepath = 'path/to/image.png'; + + // The minimum likelihood required before returning a match + // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; + + // The maximum number of findings to report per request (0 = server maximum) + // const maxFindings = 0; + + // The infoTypes of information to match + // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; + + // The customInfoTypes of information to match + // const customInfoTypes = [{ infoType: { name: 'DICT_TYPE' }, dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, + // { infoType: { name: 'REGEX_TYPE' }, regex: {pattern: '\\(\\d{3}\\) \\d{3}-\\d{4}'}}]; + + // Whether to include the matching string + // const includeQuote = true; + + async function inspectFile() { + // Construct file data to inspect + const fileTypeConstant = + ['image/jpeg', 'image/bmp', 'image/png', 'image/svg'].indexOf( + mime.getType(filepath) + ) + 1; + const fileBytes = Buffer.from(fs.readFileSync(filepath)).toString('base64'); + const item = { + byteItem: { + type: fileTypeConstant, + data: fileBytes, + }, + }; + + // Construct request + const request = { + parent: `projects/${projectId}/locations/global`, + inspectConfig: { + infoTypes: infoTypes, + customInfoTypes: customInfoTypes, + minLikelihood: minLikelihood, + includeQuote: includeQuote, + limits: { + maxFindingsPerRequest: maxFindings, + }, + }, + item: item, + }; + + // Run request + const [response] = await dlp.inspectContent(request); + const findings = response.result.findings; + if (findings.length > 0) { + console.log('Findings:'); + findings.forEach(finding => { + if (includeQuote) { + console.log(`\tQuote: ${finding.quote}`); + } + console.log(`\tInfo type: ${finding.infoType.name}`); + console.log(`\tLikelihood: ${finding.likelihood}`); + }); + } else { + console.log('No findings.'); + } + } + // [END dlp_inspect_file] + inspectFile(); +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(infoTypes, customInfoTypes) { + infoTypes = infoTypes + ? infoTypes.split(',').map(type => { + return {name: type}; + }) + : undefined; + + if (customInfoTypes) { + customInfoTypes = customInfoTypes.includes(',') + ? customInfoTypes.split(',').map((dict, idx) => { + return { + infoType: {name: 'CUSTOM_DICT_'.concat(idx.toString())}, + dictionary: {wordList: {words: dict.split(',')}}, + }; + }) + : customInfoTypes.split(',').map((rgx, idx) => { + return { + infoType: {name: 'CUSTOM_REGEX_'.concat(idx.toString())}, + regex: {pattern: rgx}, + }; + }); + } + + return [infoTypes, customInfoTypes]; +} diff --git a/dlp/inspectGCSFile.js b/dlp/inspectGCSFile.js new file mode 100644 index 0000000000..19e6eff170 --- /dev/null +++ b/dlp/inspectGCSFile.js @@ -0,0 +1,187 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: Inspect GCS File +// description: Inspects a text file stored on Google Cloud Storage with the Data Loss Prevention API, using Pub/Sub for job notifications. +// usage: node inspectGCSFile.js my-project filepath minLikelihood maxFindings infoTypes customInfoTypes includeQuote + +function main( + projectId, + bucketName, + fileName, + topicId, + subscriptionId, + minLikelihood, + maxFindings, + infoTypes, + customInfoTypes +) { + [infoTypes, customInfoTypes] = transformCLI(infoTypes, customInfoTypes); + + // [START dlp_inspect_gcs] + // Import the Google Cloud client libraries + const DLP = require('@google-cloud/dlp'); + const {PubSub} = require('@google-cloud/pubsub'); + + // Instantiates clients + const dlp = new DLP.DlpServiceClient(); + const pubsub = new PubSub(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The name of the bucket where the file resides. + // const bucketName = 'YOUR-BUCKET'; + + // The path to the file within the bucket to inspect. + // Can contain wildcards, e.g. "my-image.*" + // const fileName = 'my-image.png'; + + // The minimum likelihood required before returning a match + // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; + + // The maximum number of findings to report per request (0 = server maximum) + // const maxFindings = 0; + + // The infoTypes of information to match + // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; + + // The customInfoTypes of information to match + // const customInfoTypes = [{ infoType: { name: 'DICT_TYPE' }, dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, + // { infoType: { name: 'REGEX_TYPE' }, regex: {pattern: '\\(\\d{3}\\) \\d{3}-\\d{4}'}}]; + + // The name of the Pub/Sub topic to notify once the job completes + // TODO(developer): create a Pub/Sub topic to use for this + // const topicId = 'MY-PUBSUB-TOPIC' + + // The name of the Pub/Sub subscription to use when listening for job + // completion notifications + // TODO(developer): create a Pub/Sub subscription to use for this + // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' + + async function inspectGCSFile() { + // Get reference to the file to be inspected + const storageItem = { + cloudStorageOptions: { + fileSet: {url: `gs://${bucketName}/${fileName}`}, + }, + }; + + // Construct request for creating an inspect job + const request = { + parent: `projects/${projectId}/locations/global`, + inspectJob: { + inspectConfig: { + infoTypes: infoTypes, + customInfoTypes: customInfoTypes, + minLikelihood: minLikelihood, + limits: { + maxFindingsPerRequest: maxFindings, + }, + }, + storageConfig: storageItem, + actions: [ + { + pubSub: { + topic: `projects/${projectId}/topics/${topicId}`, + }, + }, + ], + }, + }; + + // Create a GCS File inspection job and wait for it to complete + const [topicResponse] = await pubsub.topic(topicId).get(); + // Verify the Pub/Sub topic and listen for job notifications via an + // existing subscription. + const subscription = await topicResponse.subscription(subscriptionId); + const [jobsResponse] = await dlp.createDlpJob(request); + // Get the job's ID + const jobName = jobsResponse.name; + // Watch the Pub/Sub topic until the DLP job finishes + await new Promise((resolve, reject) => { + const messageHandler = message => { + if (message.attributes && message.attributes.DlpJobName === jobName) { + message.ack(); + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + resolve(jobName); + } else { + message.nack(); + } + }; + + const errorHandler = err => { + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + reject(err); + }; + + subscription.on('message', messageHandler); + subscription.on('error', errorHandler); + }); + + setTimeout(() => { + console.log('Waiting for DLP job to fully complete'); + }, 500); + const [job] = await dlp.getDlpJob({name: jobName}); + console.log(`Job ${job.name} status: ${job.state}`); + + const infoTypeStats = job.inspectDetails.result.infoTypeStats; + if (infoTypeStats.length > 0) { + infoTypeStats.forEach(infoTypeStat => { + console.log( + ` Found ${infoTypeStat.count} instance(s) of infoType ${infoTypeStat.infoType.name}.` + ); + }); + } else { + console.log('No findings.'); + } + } + inspectGCSFile(); + // [END dlp_inspect_gcs] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(infoTypes, customInfoTypes) { + infoTypes = infoTypes + ? infoTypes.split(',').map(type => { + return {name: type}; + }) + : undefined; + + if (customInfoTypes) { + customInfoTypes = customInfoTypes.includes(',') + ? customInfoTypes.split(',').map((dict, idx) => { + return { + infoType: {name: 'CUSTOM_DICT_'.concat(idx.toString())}, + dictionary: {wordList: {words: dict.split(',')}}, + }; + }) + : customInfoTypes.split(',').map((rgx, idx) => { + return { + infoType: {name: 'CUSTOM_REGEX_'.concat(idx.toString())}, + regex: {pattern: rgx}, + }; + }); + } + + return [infoTypes, customInfoTypes]; +} diff --git a/dlp/inspectString.js b/dlp/inspectString.js new file mode 100644 index 0000000000..cfeda45c5c --- /dev/null +++ b/dlp/inspectString.js @@ -0,0 +1,131 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Inspects strings +// description: Inspect a string using the Data Loss Prevention API. +// usage: node inspectString.js my-project string minLikelihood maxFindings infoTypes customInfoTypes includeQuote + +function main( + projectId, + string, + minLikelihood, + maxFindings, + infoTypes, + customInfoTypes, + includeQuote +) { + [infoTypes, customInfoTypes] = transformCLI(infoTypes, customInfoTypes); + + // [START dlp_inspect_string] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The string to inspect + // const string = 'My name is Gary and my email is gary@example.com'; + + // The minimum likelihood required before returning a match + // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; + + // The maximum number of findings to report per request (0 = server maximum) + // const maxFindings = 0; + + // The infoTypes of information to match + // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; + + // The customInfoTypes of information to match + // const customInfoTypes = [{ infoType: { name: 'DICT_TYPE' }, dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, + // { infoType: { name: 'REGEX_TYPE' }, regex: {pattern: '\\(\\d{3}\\) \\d{3}-\\d{4}'}}]; + + // Whether to include the matching string + // const includeQuote = true; + + async function inspectString() { + // Construct item to inspect + const item = {value: string}; + + // Construct request + const request = { + parent: `projects/${projectId}/locations/global`, + inspectConfig: { + infoTypes: infoTypes, + customInfoTypes: customInfoTypes, + minLikelihood: minLikelihood, + includeQuote: includeQuote, + limits: { + maxFindingsPerRequest: maxFindings, + }, + }, + item: item, + }; + + // Run request + const [response] = await dlp.inspectContent(request); + const findings = response.result.findings; + if (findings.length > 0) { + console.log('Findings:'); + findings.forEach(finding => { + if (includeQuote) { + console.log(`\tQuote: ${finding.quote}`); + } + console.log(`\tInfo type: ${finding.infoType.name}`); + console.log(`\tLikelihood: ${finding.likelihood}`); + }); + } else { + console.log('No findings.'); + } + } + inspectString(); + // [END dlp_inspect_string] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(infoTypes, customInfoTypes) { + infoTypes = infoTypes + ? infoTypes.split(',').map(type => { + return {name: type}; + }) + : undefined; + + if (customInfoTypes) { + customInfoTypes = customInfoTypes.includes(',') + ? customInfoTypes.split(',').map((dict, idx) => { + return { + infoType: {name: 'CUSTOM_DICT_'.concat(idx.toString())}, + dictionary: {wordList: {words: dict.split(',')}}, + }; + }) + : customInfoTypes.split(',').map((rgx, idx) => { + return { + infoType: {name: 'CUSTOM_REGEX_'.concat(idx.toString())}, + regex: {pattern: rgx}, + }; + }); + } + + return [infoTypes, customInfoTypes]; +} diff --git a/dlp/kAnonymityAnalysis.js b/dlp/kAnonymityAnalysis.js new file mode 100644 index 0000000000..b604c04991 --- /dev/null +++ b/dlp/kAnonymityAnalysis.js @@ -0,0 +1,165 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: kAnonymity Analysis +// description: Computes the k-anonymity of a column set in a Google BigQuery table +// usage: node kAnonymityAnalysis.js my-project tableProjectId datasetId tableId topicId subscriptionId quasiIds + +function main( + projectId, + tableProjectId, + datasetId, + tableId, + topicId, + subscriptionId, + quasiIds +) { + quasiIds = transformCLI(quasiIds); + + // [START dlp_k_anonymity] + // Import the Google Cloud client libraries + const DLP = require('@google-cloud/dlp'); + const {PubSub} = require('@google-cloud/pubsub'); + + // Instantiates clients + const dlp = new DLP.DlpServiceClient(); + const pubsub = new PubSub(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The project ID the table is stored under + // This may or (for public datasets) may not equal the calling project ID + // const tableProjectId = 'my-project'; + + // The ID of the dataset to inspect, e.g. 'my_dataset' + // const datasetId = 'my_dataset'; + + // The ID of the table to inspect, e.g. 'my_table' + // const tableId = 'my_table'; + + // The name of the Pub/Sub topic to notify once the job completes + // TODO(developer): create a Pub/Sub topic to use for this + // const topicId = 'MY-PUBSUB-TOPIC' + + // The name of the Pub/Sub subscription to use when listening for job + // completion notifications + // TODO(developer): create a Pub/Sub subscription to use for this + // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' + + // A set of columns that form a composite key ('quasi-identifiers') + // const quasiIds = [{ name: 'age' }, { name: 'city' }]; + async function kAnonymityAnalysis() { + const sourceTable = { + projectId: tableProjectId, + datasetId: datasetId, + tableId: tableId, + }; + // Construct request for creating a risk analysis job + + const request = { + parent: `projects/${projectId}/locations/global`, + riskJob: { + privacyMetric: { + kAnonymityConfig: { + quasiIds: quasiIds, + }, + }, + sourceTable: sourceTable, + actions: [ + { + pubSub: { + topic: `projects/${projectId}/topics/${topicId}`, + }, + }, + ], + }, + }; + + // Create helper function for unpacking values + const getValue = obj => obj[Object.keys(obj)[0]]; + + // Run risk analysis job + const [topicResponse] = await pubsub.topic(topicId).get(); + const subscription = await topicResponse.subscription(subscriptionId); + const [jobsResponse] = await dlp.createDlpJob(request); + const jobName = jobsResponse.name; + // Watch the Pub/Sub topic until the DLP job finishes + await new Promise((resolve, reject) => { + const messageHandler = message => { + if (message.attributes && message.attributes.DlpJobName === jobName) { + message.ack(); + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + resolve(jobName); + } else { + message.nack(); + } + }; + + const errorHandler = err => { + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + reject(err); + }; + + subscription.on('message', messageHandler); + subscription.on('error', errorHandler); + }); + setTimeout(() => { + console.log(' Waiting for DLP job to fully complete'); + }, 500); + const [job] = await dlp.getDlpJob({name: jobName}); + const histogramBuckets = + job.riskDetails.kAnonymityResult.equivalenceClassHistogramBuckets; + + histogramBuckets.forEach((histogramBucket, histogramBucketIdx) => { + console.log(`Bucket ${histogramBucketIdx}:`); + console.log( + ` Bucket size range: [${histogramBucket.equivalenceClassSizeLowerBound}, ${histogramBucket.equivalenceClassSizeUpperBound}]` + ); + + histogramBucket.bucketValues.forEach(valueBucket => { + const quasiIdValues = valueBucket.quasiIdsValues + .map(getValue) + .join(', '); + console.log(` Quasi-ID values: {${quasiIdValues}}`); + console.log(` Class size: ${valueBucket.equivalenceClassSize}`); + }); + }); + } + kAnonymityAnalysis(); + // [END dlp_k_anonymity] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(quasiIds) { + quasiIds = quasiIds + ? quasiIds.split(',').map((name, idx) => { + return { + name: name, + infoType: { + name: idx, + }, + }; + }) + : undefined; + return quasiIds; +} diff --git a/dlp/kMapEstimationAnalysis.js b/dlp/kMapEstimationAnalysis.js new file mode 100644 index 0000000000..204350d27d --- /dev/null +++ b/dlp/kMapEstimationAnalysis.js @@ -0,0 +1,179 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: kMap Estimation Analysis +// description: Computes the k-map risk estimation of a column set in a Google BigQuery table. +// usage: node kMapEstimationAnalysis.js my-project tableProjectId datasetId tableId topicId subscriptionId regionCode quasiIds + +function main( + projectId, + tableProjectId, + datasetId, + tableId, + topicId, + subscriptionId, + regionCode, + quasiIds, + infoTypes +) { + quasiIds = transformCLI(quasiIds, infoTypes); + + // [START dlp_k_map] + // Import the Google Cloud client libraries + const DLP = require('@google-cloud/dlp'); + const {PubSub} = require('@google-cloud/pubsub'); + + // Instantiates clients + const dlp = new DLP.DlpServiceClient(); + const pubsub = new PubSub(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The project ID the table is stored under + // This may or (for public datasets) may not equal the calling project ID + // const tableProjectId = 'my-project'; + + // The ID of the dataset to inspect, e.g. 'my_dataset' + // const datasetId = 'my_dataset'; + + // The ID of the table to inspect, e.g. 'my_table' + // const tableId = 'my_table'; + + // The name of the Pub/Sub topic to notify once the job completes + // TODO(developer): create a Pub/Sub topic to use for this + // const topicId = 'MY-PUBSUB-TOPIC' + + // The name of the Pub/Sub subscription to use when listening for job + // completion notifications + // TODO(developer): create a Pub/Sub subscription to use for this + // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' + + // The ISO 3166-1 region code that the data is representative of + // Can be omitted if using a region-specific infoType (such as US_ZIP_5) + // const regionCode = 'USA'; + + // A set of columns that form a composite key ('quasi-identifiers'), and + // optionally their reidentification distributions + // const quasiIds = [{ field: { name: 'age' }, infoType: { name: 'AGE' }}]; + async function kMapEstimationAnalysis() { + const sourceTable = { + projectId: tableProjectId, + datasetId: datasetId, + tableId: tableId, + }; + + // Construct request for creating a risk analysis job + const request = { + parent: `projects/${projectId}/locations/global`, + riskJob: { + privacyMetric: { + kMapEstimationConfig: { + quasiIds: quasiIds, + regionCode: regionCode, + }, + }, + sourceTable: sourceTable, + actions: [ + { + pubSub: { + topic: `projects/${projectId}/topics/${topicId}`, + }, + }, + ], + }, + }; + // Create helper function for unpacking values + const getValue = obj => obj[Object.keys(obj)[0]]; + + // Run risk analysis job + const [topicResponse] = await pubsub.topic(topicId).get(); + const subscription = await topicResponse.subscription(subscriptionId); + const [jobsResponse] = await dlp.createDlpJob(request); + const jobName = jobsResponse.name; + + // Watch the Pub/Sub topic until the DLP job finishes + await new Promise((resolve, reject) => { + const messageHandler = message => { + if (message.attributes && message.attributes.DlpJobName === jobName) { + message.ack(); + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + resolve(jobName); + } else { + message.nack(); + } + }; + + const errorHandler = err => { + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + reject(err); + }; + + subscription.on('message', messageHandler); + subscription.on('error', errorHandler); + }); + setTimeout(() => { + console.log(' Waiting for DLP job to fully complete'); + }, 500); + const [job] = await dlp.getDlpJob({name: jobName}); + + const histogramBuckets = + job.riskDetails.kMapEstimationResult.kMapEstimationHistogram; + + histogramBuckets.forEach((histogramBucket, histogramBucketIdx) => { + console.log(`Bucket ${histogramBucketIdx}:`); + console.log( + ` Anonymity range: [${histogramBucket.minAnonymity}, ${histogramBucket.maxAnonymity}]` + ); + console.log(` Size: ${histogramBucket.bucketSize}`); + histogramBucket.bucketValues.forEach(valueBucket => { + const values = valueBucket.quasiIdsValues.map(value => getValue(value)); + console.log(` Values: ${values.join(' ')}`); + console.log( + ` Estimated k-map anonymity: ${valueBucket.estimatedAnonymity}` + ); + }); + }); + } + + kMapEstimationAnalysis(); + // [END dlp_k_map] +} +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(quasiIds, infoTypes) { + infoTypes = infoTypes ? infoTypes.split(',') : null; + + quasiIds = quasiIds + ? quasiIds.split(',').map((name, index) => { + return { + field: { + name: name, + }, + infoType: { + name: infoTypes[index], + }, + }; + }) + : undefined; + + return quasiIds; +} diff --git a/dlp/lDiversityAnalysis.js b/dlp/lDiversityAnalysis.js new file mode 100644 index 0000000000..7ca245d2d6 --- /dev/null +++ b/dlp/lDiversityAnalysis.js @@ -0,0 +1,180 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: l Diversity Analysis +// description: Computes the l-diversity of a column set in a Google BigQuery table. +// usage: node lDiversityAnalysis.js my-project tableProjectId datasetId tableId topicId subscriptionId sensitiveAttribute quasiIds + +function main( + projectId, + tableProjectId, + datasetId, + tableId, + topicId, + subscriptionId, + sensitiveAttribute, + quasiIds +) { + quasiIds = transformCLI(quasiIds); + // [START dlp_l_diversity] + // Import the Google Cloud client libraries + const DLP = require('@google-cloud/dlp'); + const {PubSub} = require('@google-cloud/pubsub'); + + // Instantiates clients + const dlp = new DLP.DlpServiceClient(); + const pubsub = new PubSub(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The project ID the table is stored under + // This may or (for public datasets) may not equal the calling project ID + // const tableProjectId = 'my-project'; + + // The ID of the dataset to inspect, e.g. 'my_dataset' + // const datasetId = 'my_dataset'; + + // The ID of the table to inspect, e.g. 'my_table' + // const tableId = 'my_table'; + + // The name of the Pub/Sub topic to notify once the job completes + // TODO(developer): create a Pub/Sub topic to use for this + // const topicId = 'MY-PUBSUB-TOPIC' + + // The name of the Pub/Sub subscription to use when listening for job + // completion notifications + // TODO(developer): create a Pub/Sub subscription to use for this + // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' + + // The column to measure l-diversity relative to, e.g. 'firstName' + // const sensitiveAttribute = 'name'; + + // A set of columns that form a composite key ('quasi-identifiers') + // const quasiIds = [{ name: 'age' }, { name: 'city' }]; + + async function lDiversityAnalysis() { + const sourceTable = { + projectId: tableProjectId, + datasetId: datasetId, + tableId: tableId, + }; + + // Construct request for creating a risk analysis job + const request = { + parent: `projects/${projectId}/locations/global`, + riskJob: { + privacyMetric: { + lDiversityConfig: { + quasiIds: quasiIds, + sensitiveAttribute: { + name: sensitiveAttribute, + }, + }, + }, + sourceTable: sourceTable, + actions: [ + { + pubSub: { + topic: `projects/${projectId}/topics/${topicId}`, + }, + }, + ], + }, + }; + + // Create helper function for unpacking values + const getValue = obj => obj[Object.keys(obj)[0]]; + + // Run risk analysis job + const [topicResponse] = await pubsub.topic(topicId).get(); + const subscription = await topicResponse.subscription(subscriptionId); + const [jobsResponse] = await dlp.createDlpJob(request); + const jobName = jobsResponse.name; + // Watch the Pub/Sub topic until the DLP job finishes + await new Promise((resolve, reject) => { + const messageHandler = message => { + if (message.attributes && message.attributes.DlpJobName === jobName) { + message.ack(); + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + resolve(jobName); + } else { + message.nack(); + } + }; + + const errorHandler = err => { + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + reject(err); + }; + + subscription.on('message', messageHandler); + subscription.on('error', errorHandler); + }); + setTimeout(() => { + console.log(' Waiting for DLP job to fully complete'); + }, 500); + const [job] = await dlp.getDlpJob({name: jobName}); + const histogramBuckets = + job.riskDetails.lDiversityResult.sensitiveValueFrequencyHistogramBuckets; + + histogramBuckets.forEach((histogramBucket, histogramBucketIdx) => { + console.log(`Bucket ${histogramBucketIdx}:`); + + console.log( + `Bucket size range: [${histogramBucket.sensitiveValueFrequencyLowerBound}, ${histogramBucket.sensitiveValueFrequencyUpperBound}]` + ); + histogramBucket.bucketValues.forEach(valueBucket => { + const quasiIdValues = valueBucket.quasiIdsValues + .map(getValue) + .join(', '); + console.log(` Quasi-ID values: {${quasiIdValues}}`); + console.log(` Class size: ${valueBucket.equivalenceClassSize}`); + valueBucket.topSensitiveValues.forEach(valueObj => { + console.log( + ` Sensitive value ${getValue(valueObj.value)} occurs ${ + valueObj.count + } time(s).` + ); + }); + }); + }); + } + + lDiversityAnalysis(); + // [END dlp_l_diversity] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(quasiIds) { + quasiIds = quasiIds + ? quasiIds.split(',').map((name, idx) => { + return { + name: name, + infoType: { + name: idx, + }, + }; + }) + : undefined; + return quasiIds; +} diff --git a/dlp/listInspectTemplates.js b/dlp/listInspectTemplates.js new file mode 100644 index 0000000000..73e41aaebc --- /dev/null +++ b/dlp/listInspectTemplates.js @@ -0,0 +1,74 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: List Inspect Templates +// description: List DLP inspection configuration templates. +// usage: node listInspectTemplates.js my-project + +function main(projectId) { + // [START dlp_list_inspect_templates] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // Helper function to pretty-print dates + const formatDate = date => { + const msSinceEpoch = parseInt(date.seconds, 10) * 1000; + return new Date(msSinceEpoch).toLocaleString('en-US'); + }; + + async function listInspectTemplates() { + // Construct template-listing request + const request = { + parent: `projects/${projectId}/locations/global`, + }; + + // Run template-deletion request + const [templates] = await dlp.listInspectTemplates(request); + + templates.forEach(template => { + console.log(`Template ${template.name}`); + if (template.displayName) { + console.log(` Display name: ${template.displayName}`); + } + + console.log(` Created: ${formatDate(template.createTime)}`); + console.log(` Updated: ${formatDate(template.updateTime)}`); + + const inspectConfig = template.inspectConfig; + const infoTypes = inspectConfig.infoTypes.map(x => x.name); + console.log(' InfoTypes:', infoTypes.join(' ')); + console.log(' Minimum likelihood:', inspectConfig.minLikelihood); + console.log(' Include quotes:', inspectConfig.includeQuote); + + const limits = inspectConfig.limits; + console.log(' Max findings per request:', limits.maxFindingsPerRequest); + }); + } + + listInspectTemplates(); + // [END dlp_list_inspect_templates] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/listJobs.js b/dlp/listJobs.js new file mode 100644 index 0000000000..41469ed254 --- /dev/null +++ b/dlp/listJobs.js @@ -0,0 +1,62 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: List jobs +// description: List Data Loss Prevention API jobs corresponding to a given filter. +// usage: node listJobs.js my-project filter jobType + +function main(projectId, filter, jobType) { + // [START dlp_list_jobs] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The filter expression to use + // For more information and filter syntax, see https://cloud.google.com/dlp/docs/reference/rest/v2/projects.dlpJobs/list + // const filter = `state=DONE`; + + // The type of job to list (either 'INSPECT_JOB' or 'RISK_ANALYSIS_JOB') + // const jobType = 'INSPECT_JOB'; + async function listJobs() { + // Construct request for listing DLP scan jobs + const request = { + parent: `projects/${projectId}/locations/global`, + filter: filter, + type: jobType, + }; + + // Run job-listing request + const [jobs] = await dlp.listDlpJobs(request); + jobs.forEach(job => { + console.log(`Job ${job.name} status: ${job.state}`); + }); + } + + listJobs(); + // [END dlp_list_jobs] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/listTriggers.js b/dlp/listTriggers.js new file mode 100644 index 0000000000..c7fd234a7f --- /dev/null +++ b/dlp/listTriggers.js @@ -0,0 +1,71 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: List Triggers +// description: List Data Loss Prevention API job triggers. +// usage: node listTriggers.js my-project + +function main(projectId) { + // [START dlp_list_triggers] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project' + + async function listTriggers() { + // Construct trigger listing request + const request = { + parent: `projects/${projectId}/locations/global`, + }; + + // Helper function to pretty-print dates + const formatDate = date => { + const msSinceEpoch = parseInt(date.seconds, 10) * 1000; + return new Date(msSinceEpoch).toLocaleString('en-US'); + }; + + // Run trigger listing request + const [triggers] = await dlp.listJobTriggers(request); + triggers.forEach(trigger => { + // Log trigger details + console.log(`Trigger ${trigger.name}:`); + console.log(` Created: ${formatDate(trigger.createTime)}`); + console.log(` Updated: ${formatDate(trigger.updateTime)}`); + if (trigger.displayName) { + console.log(` Display Name: ${trigger.displayName}`); + } + if (trigger.description) { + console.log(` Description: ${trigger.description}`); + } + console.log(` Status: ${trigger.status}`); + console.log(` Error count: ${trigger.errors.length}`); + }); + } + + listTriggers(); + // [END dlp_list_triggers] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/metadata.js b/dlp/metadata.js new file mode 100644 index 0000000000..02cf404666 --- /dev/null +++ b/dlp/metadata.js @@ -0,0 +1,60 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; +// sample-metadata: +// title: Metadata +// description: List the types of sensitive information the DLP API supports +// usage: node metadata.js my-project langaugeCode filter + +function main(projectId, languageCode, filter) { + // [START dlp_list_info_types] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The BCP-47 language code to use, e.g. 'en-US' + // const languageCode = 'en-US'; + + // The filter to use + // const filter = 'supported_by=INSPECT' + + async function listInfoTypes() { + const [response] = await dlp.listInfoTypes({ + languageCode: languageCode, + filter: filter, + }); + const infoTypes = response.infoTypes; + console.log('Info types:'); + infoTypes.forEach(infoType => { + console.log(`\t${infoType.name} (${infoType.displayName})`); + }); + } + + listInfoTypes(); + // [END dlp_list_info_types] +} + +module.exports.main = main; + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/numericalRiskAnalysis.js b/dlp/numericalRiskAnalysis.js new file mode 100644 index 0000000000..2e404c7ea0 --- /dev/null +++ b/dlp/numericalRiskAnalysis.js @@ -0,0 +1,161 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Numerical Risk Analysis +// description: Computes risk metrics of a column of numbers in a Google BigQuery table. +// usage: node numericalRiskAnalysis.js my-project tableProjectId datasetId tableId columnName topicId subscriptionId + +function main( + projectId, + tableProjectId, + datasetId, + tableId, + columnName, + topicId, + subscriptionId +) { + // [START dlp_numerical_stats] + // Import the Google Cloud client libraries + const DLP = require('@google-cloud/dlp'); + const {PubSub} = require('@google-cloud/pubsub'); + + // Instantiates clients + const dlp = new DLP.DlpServiceClient(); + const pubsub = new PubSub(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The project ID the table is stored under + // This may or (for public datasets) may not equal the calling project ID + // const tableProjectId = 'my-project'; + + // The ID of the dataset to inspect, e.g. 'my_dataset' + // const datasetId = 'my_dataset'; + + // The ID of the table to inspect, e.g. 'my_table' + // const tableId = 'my_table'; + + // The name of the column to compute risk metrics for, e.g. 'age' + // Note that this column must be a numeric data type + // const columnName = 'firstName'; + + // The name of the Pub/Sub topic to notify once the job completes + // TODO(developer): create a Pub/Sub topic to use for this + // const topicId = 'MY-PUBSUB-TOPIC' + + // The name of the Pub/Sub subscription to use when listening for job + // completion notifications + // TODO(developer): create a Pub/Sub subscription to use for this + // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' + + async function numericalRiskAnalysis() { + const sourceTable = { + projectId: tableProjectId, + datasetId: datasetId, + tableId: tableId, + }; + + // Construct request for creating a risk analysis job + const request = { + parent: `projects/${projectId}/locations/global`, + riskJob: { + privacyMetric: { + numericalStatsConfig: { + field: { + name: columnName, + }, + }, + }, + sourceTable: sourceTable, + actions: [ + { + pubSub: { + topic: `projects/${projectId}/topics/${topicId}`, + }, + }, + ], + }, + }; + + // Create helper function for unpacking values + const getValue = obj => obj[Object.keys(obj)[0]]; + + // Run risk analysis job + const [topicResponse] = await pubsub.topic(topicId).get(); + const subscription = await topicResponse.subscription(subscriptionId); + const [jobsResponse] = await dlp.createDlpJob(request); + const jobName = jobsResponse.name; + // Watch the Pub/Sub topic until the DLP job finishes + await new Promise((resolve, reject) => { + const messageHandler = message => { + if (message.attributes && message.attributes.DlpJobName === jobName) { + message.ack(); + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + resolve(jobName); + } else { + message.nack(); + } + }; + + const errorHandler = err => { + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + reject(err); + }; + + subscription.on('message', messageHandler); + subscription.on('error', errorHandler); + }); + setTimeout(() => { + console.log(' Waiting for DLP job to fully complete'); + }, 500); + const [job] = await dlp.getDlpJob({name: jobName}); + const results = job.riskDetails.numericalStatsResult; + + console.log( + `Value Range: [${getValue(results.minValue)}, ${getValue( + results.maxValue + )}]` + ); + + // Print unique quantile values + let tempValue = null; + results.quantileValues.forEach((result, percent) => { + const value = getValue(result); + + // Only print new values + if ( + tempValue !== value && + !(tempValue && tempValue.equals && tempValue.equals(value)) + ) { + console.log(`Value at ${percent}% quantile: ${value}`); + tempValue = value; + } + }); + } + + numericalRiskAnalysis(); + // [END dlp_numerical_stats] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/package.json b/dlp/package.json new file mode 100644 index 0000000000..c0dc1f0975 --- /dev/null +++ b/dlp/package.json @@ -0,0 +1,30 @@ +{ + "name": "dlp-samples", + "description": "Code samples for Google Cloud Platform's Data Loss Prevention API", + "private": true, + "license": "Apache-2.0", + "author": "Google Inc.", + "repository": "googleapis/nodejs-dlp", + "files": [ + "*.js" + ], + "engines": { + "node": ">=12.0.0" + }, + "scripts": { + "test": "mocha system-test/*.test.js --timeout=600000" + }, + "dependencies": { + "@google-cloud/dlp": "^4.3.0", + "@google-cloud/pubsub": "^3.0.0", + "mime": "^3.0.0", + "yargs": "^16.0.0" + }, + "devDependencies": { + "chai": "^4.2.0", + "mocha": "^8.0.0", + "pixelmatch": "^5.0.0", + "pngjs": "^6.0.0", + "uuid": "^9.0.0" + } +} diff --git a/dlp/quickstart.js b/dlp/quickstart.js new file mode 100644 index 0000000000..0d60c0e433 --- /dev/null +++ b/dlp/quickstart.js @@ -0,0 +1,91 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Quickstart +// description: Inspects and assesses a string. +// usage: node quickstart.js my-project + +function main(projectId) { + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // [START dlp_quickstart] + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The string to inspect + const string = 'Robert Frost'; + + // The project ID to run the API call under + // const projectId = 'my-project'; + + async function quickStart() { + // The minimum likelihood required before returning a match + const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; + + // The maximum number of findings to report (0 = server maximum) + const maxFindings = 0; + + // The infoTypes of information to match + const infoTypes = [{name: 'PERSON_NAME'}, {name: 'US_STATE'}]; + + // Whether to include the matching string + const includeQuote = true; + + // Construct item to inspect + const item = {value: string}; + + // Construct request + const request = { + parent: `projects/${projectId}/locations/global`, + inspectConfig: { + infoTypes: infoTypes, + minLikelihood: minLikelihood, + limits: { + maxFindingsPerRequest: maxFindings, + }, + includeQuote: includeQuote, + }, + item: item, + }; + + // Run request + const [response] = await dlp.inspectContent(request); + const findings = response.result.findings; + if (findings.length > 0) { + console.log('Findings:'); + findings.forEach(finding => { + if (includeQuote) { + console.log(`\tQuote: ${finding.quote}`); + } + console.log(`\tInfo type: ${finding.infoType.name}`); + console.log(`\tLikelihood: ${finding.likelihood}`); + }); + } else { + console.log('No findings.'); + } + } + quickStart(); + // [END dlp_quickstart] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/redactImage.js b/dlp/redactImage.js new file mode 100644 index 0000000000..da893c2e85 --- /dev/null +++ b/dlp/redactImage.js @@ -0,0 +1,96 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: Redact Image +// description: Redact sensitive data from an image using the Data Loss Prevention API. +// usage: node redactImage.js my-project filepath minLikelihood infoTypes outputPath + +function main(projectId, filepath, minLikelihood, infoTypes, outputPath) { + infoTypes = transformCLI(infoTypes); + // [START dlp_redact_image] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Imports required Node.js libraries + const mime = require('mime'); + const fs = require('fs'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The path to a local file to inspect. Can be a JPG or PNG image file. + // const filepath = 'path/to/image.png'; + + // The minimum likelihood required before redacting a match + // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; + + // The infoTypes of information to redact + // const infoTypes = [{ name: 'EMAIL_ADDRESS' }, { name: 'PHONE_NUMBER' }]; + + // The local path to save the resulting image to. + // const outputPath = 'result.png'; + async function redactImage() { + const imageRedactionConfigs = infoTypes.map(infoType => { + return {infoType: infoType}; + }); + + // Load image + const fileTypeConstant = + ['image/jpeg', 'image/bmp', 'image/png', 'image/svg'].indexOf( + mime.getType(filepath) + ) + 1; + const fileBytes = Buffer.from(fs.readFileSync(filepath)).toString('base64'); + + // Construct image redaction request + const request = { + parent: `projects/${projectId}/locations/global`, + byteItem: { + type: fileTypeConstant, + data: fileBytes, + }, + inspectConfig: { + minLikelihood: minLikelihood, + infoTypes: infoTypes, + }, + imageRedactionConfigs: imageRedactionConfigs, + }; + + // Run image redaction request + const [response] = await dlp.redactImage(request); + const image = response.redactedImage; + fs.writeFileSync(outputPath, image); + console.log(`Saved image redaction results to path: ${outputPath}`); + } + redactImage(); + // [END dlp_redact_image] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(infoTypes) { + infoTypes = infoTypes + ? infoTypes.split(',').map(type => { + return {name: type}; + }) + : undefined; + return infoTypes; +} diff --git a/dlp/reidentifyWithFpe.js b/dlp/reidentifyWithFpe.js new file mode 100644 index 0000000000..0fe8e14133 --- /dev/null +++ b/dlp/reidentifyWithFpe.js @@ -0,0 +1,103 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Reidentify with FPE +// description: Reidentify sensitive data in a string using Format Preserving Encryption (FPE). +// usage: node reidentifyWithFpe.js my-project string alphabet surrogateType keyName wrappedKey + +function main(projectId, string, alphabet, surrogateType, keyName, wrappedKey) { + // [START dlp_reidentify_fpe] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The string to reidentify + // const string = 'My SSN is PHONE_TOKEN(9):#########'; + + // The set of characters to replace sensitive ones with + // For more information, see https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#ffxcommonnativealphabet + // const alphabet = 'ALPHA_NUMERIC'; + + // The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key + // const keyName = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'; + + // The encrypted ('wrapped') AES-256 key to use + // This key should be encrypted using the Cloud KMS key specified above + // const wrappedKey = 'YOUR_ENCRYPTED_AES_256_KEY' + + // The name of the surrogate custom info type to use when reidentifying data + // const surrogateType = 'SOME_INFO_TYPE_DEID'; + + async function reidentifyWithFpe() { + // Construct deidentification request + const item = {value: string}; + const request = { + parent: `projects/${projectId}/locations/global`, + reidentifyConfig: { + infoTypeTransformations: { + transformations: [ + { + primitiveTransformation: { + cryptoReplaceFfxFpeConfig: { + cryptoKey: { + kmsWrapped: { + wrappedKey: wrappedKey, + cryptoKeyName: keyName, + }, + }, + commonAlphabet: alphabet, + surrogateInfoType: { + name: surrogateType, + }, + }, + }, + }, + ], + }, + }, + inspectConfig: { + customInfoTypes: [ + { + infoType: { + name: surrogateType, + }, + surrogateType: {}, + }, + ], + }, + item: item, + }; + + // Run reidentification request + const [response] = await dlp.reidentifyContent(request); + const reidentifiedItem = response.item; + console.log(reidentifiedItem.value); + } + reidentifyWithFpe(); + // [END dlp_reidentify_fpe] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/resources/accounts.txt b/dlp/resources/accounts.txt new file mode 100644 index 0000000000..2763cd0ab8 --- /dev/null +++ b/dlp/resources/accounts.txt @@ -0,0 +1 @@ +My credit card number is 1234 5678 9012 3456, and my CVV is 789. \ No newline at end of file diff --git a/dlp/resources/dates.csv b/dlp/resources/dates.csv new file mode 100644 index 0000000000..6a80d40a49 --- /dev/null +++ b/dlp/resources/dates.csv @@ -0,0 +1,5 @@ +name,birth_date,register_date,credit_card +Ann,01/01/1980,07/21/1996,4532908762519852 +James,03/06/1988,04/09/2001,4301261899725540 +Dan,08/14/1945,11/15/2011,4620761856015295 +Laura,11/03/1992,01/04/2017,4564981067258901 diff --git a/dlp/resources/harmless.txt b/dlp/resources/harmless.txt new file mode 100644 index 0000000000..5666de37ab --- /dev/null +++ b/dlp/resources/harmless.txt @@ -0,0 +1 @@ +This file is mostly harmless. diff --git a/dlp/resources/test.png b/dlp/resources/test.png new file mode 100644 index 0000000000..137e14cd2c Binary files /dev/null and b/dlp/resources/test.png differ diff --git a/dlp/resources/test.txt b/dlp/resources/test.txt new file mode 100644 index 0000000000..c2ee3815bc --- /dev/null +++ b/dlp/resources/test.txt @@ -0,0 +1 @@ +My phone number is (223) 456-7890 and my email address is gary@somedomain.com. \ No newline at end of file diff --git a/dlp/system-test/deid.test.js b/dlp/system-test/deid.test.js new file mode 100644 index 0000000000..c145ef5c7c --- /dev/null +++ b/dlp/system-test/deid.test.js @@ -0,0 +1,109 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +const path = require('path'); +const {assert} = require('chai'); +const {describe, it, before} = require('mocha'); +const fs = require('fs'); +const cp = require('child_process'); +const DLP = require('@google-cloud/dlp'); + +const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); + +const harmfulString = 'My SSN is 372819127'; +const harmlessString = 'My favorite color is blue'; +const surrogateType = 'SSN_TOKEN'; +const csvFile = 'resources/dates.csv'; +const tempOutputFile = path.join(__dirname, 'temp.result.csv'); +const dateShiftAmount = 30; +const dateFields = 'birth_date,register_date'; + +const client = new DLP.DlpServiceClient(); +describe('deid', () => { + let projectId; + + before(async () => { + projectId = await client.getProjectId(); + }); + // deidentify_masking + it('should mask sensitive data in a string', () => { + const output = execSync( + `node deidentifyWithMask.js ${projectId} "${harmfulString}" x 5` + ); + assert.include(output, 'My SSN is xxxxx9127'); + }); + + it('should ignore insensitive data when masking a string', () => { + const output = execSync( + `node deidentifyWithMask.js ${projectId} "${harmlessString}"` + ); + assert.include(output, harmlessString); + }); + + // deidentify_fpe + it('should handle FPE encryption errors', () => { + let output; + try { + output = execSync( + `node deidentifyWithFpe.js ${projectId} "${harmfulString}" '[0-9A-Za-z]' 'BAD_KEY_NAME' 'BAD_KEY_NAME'` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'invalid encoding'); + }); + + // reidentify_fpe + it('should handle FPE decryption errors', () => { + let output; + try { + output = execSync( + `node reidentifyWithFpe.js ${projectId} "${harmfulString}" '[0-9A-Za-z]' ${surrogateType} 'BAD_KEY_NAME' 'BAD_KEY_NAME NUMERIC'` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'invalid encoding'); + }); + + // deidentify_date_shift + it('should date-shift a CSV file', () => { + const outputCsvFile = 'dates.actual.csv'; + const output = execSync( + `node deidentifyWithDateShift.js ${projectId} "${csvFile}" "${outputCsvFile}" ${dateFields} ${dateShiftAmount} ${dateShiftAmount}` + ); + assert.include( + output, + `Successfully saved date-shift output to ${outputCsvFile}` + ); + assert.notInclude( + fs.readFileSync(outputCsvFile).toString(), + fs.readFileSync(csvFile).toString() + ); + }); + + it('should handle date-shift errors', () => { + let output; + try { + output = execSync( + `node deidentifyWithDateShift.js ${projectId} "${csvFile}" "${tempOutputFile}" ${dateShiftAmount} ${dateShiftAmount}` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'INVALID_ARGUMENT'); + }); +}); diff --git a/dlp/system-test/inspect.test.js b/dlp/system-test/inspect.test.js new file mode 100644 index 0000000000..7704f8036a --- /dev/null +++ b/dlp/system-test/inspect.test.js @@ -0,0 +1,282 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +const {assert} = require('chai'); +const {describe, it, before, after} = require('mocha'); +const cp = require('child_process'); +const {PubSub} = require('@google-cloud/pubsub'); +const pubsub = new PubSub(); +const uuid = require('uuid'); +const DLP = require('@google-cloud/dlp'); + +const bucket = 'nodejs-docs-samples-dlp'; +const dataProject = 'nodejs-docs-samples'; + +const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); + +const client = new DLP.DlpServiceClient(); +describe('inspect', () => { + let projectId; + + before(async () => { + projectId = await client.getProjectId(); + }); + let topic, subscription; + const topicName = `dlp-inspect-topic-${uuid.v4()}`; + const subscriptionName = `dlp-inspect-subscription-${uuid.v4()}`; + before(async () => { + [topic] = await pubsub.createTopic(topicName); + [subscription] = await topic.createSubscription(subscriptionName); + }); + + // Delete custom topic/subscription + after(async () => { + await subscription.delete(); + await topic.delete(); + }); + + // inspect_string + it('should inspect a string', () => { + const output = execSync( + `node inspectString.js ${projectId} "I'm Gary and my email is gary@example.com"` + ); + assert.match(output, /Info type: EMAIL_ADDRESS/); + }); + + it('should inspect a string with custom dictionary', () => { + const output = execSync( + `node inspectString.js ${projectId} "I'm Gary and my email is gary@example.com" 'LIKELIHOOD_UNSPECIFIED' '0' 'PHONE_NUMBER' "Gary,email"` + ); + assert.match(output, /Info type: CUSTOM_DICT_0/); + }); + + it('should inspect a string with custom regex', () => { + const output = execSync( + `node inspectString.js ${projectId} "I'm Gary and my email is gary@example.com" 'LIKELIHOOD_UNSPECIFIED' '0' 'PHONE_NUMBER' "gary@example\\.com"` + ); + assert.match(output, /Info type: CUSTOM_REGEX_0/); + }); + + it('should handle a string with no sensitive data', () => { + const output = execSync(`node inspectString.js ${projectId} string "foo"`); + assert.include(output, 'No findings.'); + }); + + it('should report string inspection handling errors', () => { + let output; + try { + output = execSync( + `node inspectString.js ${projectId} "I'm Gary and my email is gary@example.com" 'LIKELIHOOD_UNSPECIFIED' '0' BAD_TYPE` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'BAD_TYPE'); + }); + + // inspect_file + it('should inspect a local text file', () => { + const output = execSync( + `node inspectFile.js ${projectId} resources/test.txt` + ); + assert.match(output, /Info type: PHONE_NUMBER/); + assert.match(output, /Info type: EMAIL_ADDRESS/); + }); + + it('should inspect a local text file with custom dictionary', () => { + const output = execSync( + `node inspectFile.js ${projectId} resources/test.txt 'LIKELIHOOD_UNSPECIFIED' '0' 'PHONE_NUMBER' "Gary,email"` + ); + assert.match(output, /Info type: CUSTOM_DICT_0/); + }); + + it('should inspect a local text file with custom regex', () => { + const output = execSync( + `node inspectFile.js ${projectId} resources/test.txt 'LIKELIHOOD_UNSPECIFIED' '0' 'PHONE_NUMBER' "\\(\\d{3}\\) \\d{3}-\\d{4}"` + ); + assert.match(output, /Info type: CUSTOM_REGEX_0/); + }); + + it('should inspect a local image file', () => { + const output = execSync( + `node inspectFile.js ${projectId} resources/test.png` + ); + assert.match(output, /Info type: EMAIL_ADDRESS/); + }); + + it('should handle a local file with no sensitive data', () => { + const output = execSync( + `node inspectFile.js ${projectId} resources/harmless.txt` + ); + assert.match(output, /No findings/); + }); + + it('should report local file handling errors', () => { + let output; + try { + output = execSync( + `node inspectFile.js ${projectId} resources/harmless.txt 'LIKELIHOOD_UNSPECIFIED' '0' 'BAD_TYPE'` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'INVALID_ARGUMENT'); + }); + + // inspect_gcs_file_promise + it.skip('should inspect a GCS text file', () => { + const output = execSync( + `node inspectGCSFile.js ${projectId} ${bucket} test.txt ${topicName} ${subscriptionName}` + ); + assert.match(output, /Found \d instance\(s\) of infoType PHONE_NUMBER/); + assert.match(output, /Found \d instance\(s\) of infoType EMAIL_ADDRESS/); + }); + + it.skip('should inspect multiple GCS text files', () => { + const output = execSync( + `node inspectGCSFile.js ${projectId} ${bucket} "*.txt" ${topicName} ${subscriptionName}` + ); + assert.match(output, /Found \d instance\(s\) of infoType PHONE_NUMBER/); + assert.match(output, /Found \d instance\(s\) of infoType EMAIL_ADDRESS/); + }); + + it.skip('should handle a GCS file with no sensitive data', () => { + const output = execSync( + `node inspectGCSFile.js ${projectId} ${bucket} harmless.txt ${topicName} ${subscriptionName}` + ); + assert.match(output, /No findings/); + }); + + it('should report GCS file handling errors', () => { + let output; + try { + output = execSync( + `node inspectGCSFile.js ${projectId} ${bucket} harmless.txt ${topicName} ${subscriptionName} 'LIKELIHOOD_UNSPECIFIED' '0' 'BAD_TYPE'` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'INVALID_ARGUMENT'); + }); + + // inspect_datastore + it.skip('should inspect Datastore', () => { + const output = execSync( + `node inspectDatastore.js ${projectId} Person ${topicName} ${subscriptionName} --namespaceId DLP -p ${dataProject}` + ); + assert.match(output, /Found \d instance\(s\) of infoType EMAIL_ADDRESS/); + }); + + it.skip('should handle Datastore with no sensitive data', () => { + const output = execSync( + `node inspectDatastore.js ${projectId} Harmless ${topicName} ${subscriptionName} --namespaceId DLP -p ${dataProject}` + ); + assert.match(output, /No findings/); + }); + + it('should report Datastore errors', () => { + let output; + try { + output = execSync( + `node inspectDatastore.js ${projectId} ${projectId} 'DLP' 'Person' ${topicName} ${subscriptionName} 'LIKELIHOOD_UNSPECIFIED' '0' 'BAD_TYPE'` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'INVALID_ARGUMENT'); + }); + + // inspect_bigquery + it.skip('should inspect a Bigquery table', () => { + const output = execSync( + `node inspectBigQuery.js ${projectId} integration_tests_dlp harmful ${topicName} ${subscriptionName} -p ${dataProject}` + ); + assert.match(output, /Found \d instance\(s\) of infoType PHONE_NUMBER/); + }); + + it.skip('should handle a Bigquery table with no sensitive data', () => { + const output = execSync( + `node inspectBigQuery.js ${projectId} integration_tests_dlp harmless ${topicName} ${subscriptionName} -p ${dataProject}` + ); + assert.match(output, /No findings/); + }); + + it('should report Bigquery table handling errors', () => { + let output; + try { + output = execSync( + `node inspectBigQuery.js ${projectId} ${dataProject} integration_tests_dlp harmless ${topicName} ${subscriptionName} 'LIKELIHOOD_UNSPECIFIED' '0' 'BAD_TYPE'` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'INVALID_ARGUMENT'); + }); + + // CLI options + // This test is potentially flaky, possibly because of model changes. + it('should have a minLikelihood option', () => { + const outputA = execSync( + `node inspectString.js ${projectId} "My phone number is (123) 456-7890." VERY_LIKELY` + ); + const outputB = execSync( + `node inspectString.js ${projectId} "My phone number is (123) 456-7890." UNLIKELY` + ); + assert.ok(outputA); + assert.notMatch(outputA, /PHONE_NUMBER/); + assert.match(outputB, /PHONE_NUMBER/); + }); + + it('should have a maxFindings option', () => { + const outputA = execSync( + `node inspectString.js ${projectId} "My email is gary@example.com and my phone number is (223) 456-7890." LIKELIHOOD_UNSPECIFIED 1` + ); + const outputB = execSync( + `node inspectString.js ${projectId} "My email is gary@example.com and my phone number is (223) 456-7890." LIKELIHOOD_UNSPECIFIED 2` + ); + assert.notStrictEqual( + outputA.includes('PHONE_NUMBER'), + outputA.includes('EMAIL_ADDRESS') + ); // Exactly one of these should be included + assert.match(outputB, /PHONE_NUMBER/); + assert.match(outputB, /EMAIL_ADDRESS/); + }); + + it('should have an option to include quotes', () => { + const outputA = execSync( + `node inspectString.js ${projectId} "My phone number is (223) 456-7890." '' '' '' '' false` + ); + const outputB = execSync( + `node inspectString.js ${projectId} "My phone number is (223) 456-7890." '' '' '' '' ` + ); + assert.ok(outputA); + assert.notMatch(outputB, /\(223\) 456-7890/); + assert.match(outputA, /\(223\) 456-7890/); + }); + + it('should have an option to filter results by infoType', () => { + const outputA = execSync( + `node inspectString.js ${projectId} "My email is gary@example.com and my phone number is (223) 456-7890."` + ); + const outputB = execSync( + `node inspectString.js ${projectId} "My email is gary@example.com and my phone number is (223) 456-7890." LIKELIHOOD_UNSPECIFIED 0 PHONE_NUMBER` + ); + assert.match(outputA, /EMAIL_ADDRESS/); + assert.match(outputA, /PHONE_NUMBER/); + assert.notMatch(outputB, /EMAIL_ADDRESS/); + assert.match(outputB, /PHONE_NUMBER/); + }); +}); diff --git a/dlp/system-test/jobs.test.js b/dlp/system-test/jobs.test.js new file mode 100644 index 0000000000..8258ad7f04 --- /dev/null +++ b/dlp/system-test/jobs.test.js @@ -0,0 +1,159 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +const {assert} = require('chai'); +const {describe, it, before} = require('mocha'); +const cp = require('child_process'); +const DLP = require('@google-cloud/dlp'); + +const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); + +const badJobName = 'projects/not-a-project/dlpJobs/i-123456789'; + +const testTableProjectId = 'bigquery-public-data'; +const testDatasetId = 'san_francisco'; +const testTableId = 'bikeshare_trips'; +const testColumnName = 'zip_code'; + +const client = new DLP.DlpServiceClient(); + +// createTestJob needs time to finish creating a DLP job, before listing +// tests will succeed. +const delay = async test => { + const retries = test.currentRetry(); + if (retries === 0) return; // no retry on the first failure. + // see: https://cloud.google.com/storage/docs/exponential-backoff: + const ms = Math.pow(2, retries) * 1000 + Math.random() * 2000; + return new Promise(done => { + console.info(`retrying "${test.title}" in ${ms}ms`); + setTimeout(done, ms); + }); +}; + +describe('test', () => { + let projectId; + + before(async () => { + projectId = await client.getProjectId(); + }); + // Helper function for creating test jobs + const createTestJob = async () => { + // Initialize client library + const DLP = require('@google-cloud/dlp').v2; + const dlp = new DLP.DlpServiceClient(); + + // Construct job request + const request = { + parent: `projects/${projectId}/locations/global`, + riskJob: { + privacyMetric: { + categoricalStatsConfig: { + field: { + name: testColumnName, + }, + }, + }, + sourceTable: { + projectId: testTableProjectId, + datasetId: testDatasetId, + tableId: testTableId, + }, + }, + }; + + // Create job + const [response] = await dlp.createDlpJob(request); + return response.name; + }; + + // Create a test job + let testJobName; + before(async () => { + testJobName = await createTestJob(); + await deleteStaleJobs(); + }); + + async function deleteStaleJobs() { + const dlp = new DLP.DlpServiceClient(); + const request = { + parent: `projects/${projectId}/locations/global`, + filter: 'state=DONE', + type: 'RISK_ANALYSIS_JOB', + }; + const [jobs] = await dlp.listDlpJobs(request); + for (const job of jobs) { + const TEN_HOURS_MS = 1000 * 60 * 60 * 10; + const created = Number(job.createTime.seconds) * 1000; + const now = Date.now(); + if (now - created > TEN_HOURS_MS) { + console.info(`delete ${job.name}`); + await dlp.deleteDlpJob({name: job.name}); + } + } + } + + // dlp_list_jobs + it('should list jobs', async function () { + this.retries(5); + await delay(this.test); + const output = execSync( + `node listJobs.js ${projectId} 'state=DONE' RISK_ANALYSIS_JOB` + ); + assert.match( + output, + /Job projects\/(\w|-)+\/locations\/global\/dlpJobs\/\w-\d+ status: DONE/ + ); + }); + + it('should list jobs of a given type', async function () { + this.retries(7); + await delay(this.test); + const output = execSync( + `node listJobs.js ${projectId} 'state=DONE' RISK_ANALYSIS_JOB` + ); + assert.match( + output, + /Job projects\/(\w|-)+\/locations\/global\/dlpJobs\/r-\d+ status: DONE/ + ); + }); + + it('should handle job listing errors', () => { + let output; + try { + output = execSync(`node listJobs.js ${projectId} 'state=NOPE'`); + } catch (err) { + output = err.message; + } + assert.include(output, 'INVALID_ARGUMENT'); + }); + + // dlp_delete_job + it('should delete job', () => { + const output = execSync(`node deleteJob.js ${projectId} ${testJobName}`); + assert.include(output, `Successfully deleted job ${testJobName}.`); + }); + + it('should handle job deletion errors', () => { + let output; + try { + output = execSync(`node deleteJob.js ${projectId} ${badJobName}`); + } catch (err) { + output = err.message; + } + console.log(output); + assert.match(output, /Error in deleteJob/); + }); +}); diff --git a/dlp/system-test/metadata.test.js b/dlp/system-test/metadata.test.js new file mode 100644 index 0000000000..c8ec161ea3 --- /dev/null +++ b/dlp/system-test/metadata.test.js @@ -0,0 +1,42 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +const {assert} = require('chai'); +const {describe, it, before} = require('mocha'); +const cp = require('child_process'); +const DLP = require('@google-cloud/dlp'); + +const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); + +const client = new DLP.DlpServiceClient(); +describe('metadata', () => { + let projectId; + + before(async () => { + projectId = await client.getProjectId(); + }); + it('should list info types', () => { + const output = execSync(`node metadata.js ${projectId} infoTypes`); + assert.match(output, /US_DRIVERS_LICENSE_NUMBER/); + }); + + it('should filter listed info types', () => { + const output = execSync( + `node metadata.js ${projectId} infoTypes "supported_by=RISK_ANALYSIS"` + ); + assert.notMatch(output, /US_DRIVERS_LICENSE_NUMBER/); + }); +}); diff --git a/dlp/system-test/quickstart.test.js b/dlp/system-test/quickstart.test.js new file mode 100644 index 0000000000..2e000674b7 --- /dev/null +++ b/dlp/system-test/quickstart.test.js @@ -0,0 +1,35 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +const {assert} = require('chai'); +const {describe, it, before} = require('mocha'); +const cp = require('child_process'); +const DLP = require('@google-cloud/dlp'); + +const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); + +const client = new DLP.DlpServiceClient(); +describe('quickstart', () => { + let projectId; + + before(async () => { + projectId = await client.getProjectId(); + }); + it('should run', () => { + const output = execSync(`node quickstart.js ${projectId}`); + assert.match(output, /Info type: PERSON_NAME/); + }); +}); diff --git a/dlp/system-test/resources/date-shift-context.expected.csv b/dlp/system-test/resources/date-shift-context.expected.csv new file mode 100644 index 0000000000..2329cb63ce --- /dev/null +++ b/dlp/system-test/resources/date-shift-context.expected.csv @@ -0,0 +1,5 @@ +name,birth_date,register_date,credit_card +Ann,1/31/1980,8/20/1996,4532908762519852 +James,4/5/1988,5/9/2001,4301261899725540 +Dan,9/13/1945,12/15/2011,4620761856015295 +Laura,12/3/1992,2/3/2017,4564981067258901 diff --git a/dlp/system-test/risk.test.js b/dlp/system-test/risk.test.js new file mode 100644 index 0000000000..b62d937e07 --- /dev/null +++ b/dlp/system-test/risk.test.js @@ -0,0 +1,208 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +const {assert} = require('chai'); +const {describe, it, before, after} = require('mocha'); +const uuid = require('uuid'); +const {PubSub} = require('@google-cloud/pubsub'); +const cp = require('child_process'); +const DLP = require('@google-cloud/dlp'); + +const execSync = cmd => { + return cp.execSync(cmd, { + encoding: 'utf-8', + stdio: [null, null, null], + }); +}; + +const dataset = 'integration_tests_dlp'; +const uniqueField = 'Name'; +const numericField = 'Age'; +const pubsub = new PubSub(); +const client = new DLP.DlpServiceClient(); + +/* + * The tests in this file rely on a table in BigQuery entitled + * "integration_tests_dlp.harmful" with the following fields: + * + * Age NUMERIC NULLABLE + * Name STRING NULLABLE + * + * Insert into this table a few rows of Age/Name pairs. + */ +describe('risk', () => { + let projectId; + // Create new custom topic/subscription + let topic, subscription, topicName, subscriptionName; + + before(async () => { + topicName = `dlp-risk-topic-${uuid.v4()}-${Date.now()}`; + subscriptionName = `dlp-risk-subscription-${uuid.v4()}-${Date.now()}`; + projectId = await client.getProjectId(); + [topic] = await pubsub.createTopic(topicName); + [subscription] = await topic.createSubscription(subscriptionName); + await deleteOldTopics(); + }); + + async function deleteOldTopics() { + const [topics] = await pubsub.getTopics(); + const now = Date.now(); + const TEN_HOURS_MS = 1000 * 60 * 60 * 10; + for (const topic of topics) { + const created = Number(topic.name.split('-').pop()); + if ( + topic.name.includes('dlp-risk-topic') && + now - created > TEN_HOURS_MS + ) { + const [subscriptions] = await topic.getSubscriptions(); + for (const subscription of subscriptions) { + console.info(`deleting ${subscription.name}`); + await subscription.delete(); + } + console.info(`deleting ${topic.name}`); + await topic.delete(); + } + } + } + + // Delete custom topic/subscription + after(async () => { + await subscription.delete(); + await topic.delete(); + }); + + // numericalRiskAnalysis + it('should perform numerical risk analysis', () => { + const output = execSync( + `node numericalRiskAnalysis.js ${projectId} ${projectId} ${dataset} harmful ${numericField} ${topicName} ${subscriptionName}` + ); + assert.match(output, /Value at 0% quantile:/); + assert.match(output, /Value at \d+% quantile:/); + }); + + it('should handle numerical risk analysis errors', () => { + let output; + try { + output = execSync( + `node numericalRiskAnalysis.js ${projectId} ${projectId} ${dataset} nonexistent ${numericField} ${topicName} ${subscriptionName}` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'NOT_FOUND'); + }); + + // categoricalRiskAnalysis + it('should perform categorical risk analysis on a string field', () => { + const output = execSync( + `node categoricalRiskAnalysis.js ${projectId} ${projectId} ${dataset} harmful ${uniqueField} ${topicName} ${subscriptionName}` + ); + assert.match(output, /Most common value occurs \d time\(s\)/); + }); + + it('should perform categorical risk analysis on a number field', () => { + const output = execSync( + `node categoricalRiskAnalysis.js ${projectId} ${projectId} ${dataset} harmful ${numericField} ${topicName} ${subscriptionName}` + ); + assert.match(output, /Most common value occurs \d time\(s\)/); + }); + + it('should handle categorical risk analysis errors', () => { + let output; + try { + output = execSync( + `node categoricalRiskAnalysis.js ${projectId} ${projectId} ${dataset} nonexistent ${uniqueField} ${topicName} ${subscriptionName}` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'fail'); + }); + + // kAnonymityAnalysis + it('should perform k-anonymity analysis on a single field', () => { + const output = execSync( + `node kAnonymityAnalysis.js ${projectId} ${projectId} ${dataset} harmful ${topicName} ${subscriptionName} ${numericField}` + ); + console.log(output); + assert.include(output, 'Quasi-ID values:'); + assert.include(output, 'Class size:'); + }); + + it('should handle k-anonymity analysis errors', () => { + let output; + try { + output = execSync( + `node kAnonymityAnalysis.js ${projectId} ${projectId} ${dataset} nonexistent ${topicName} ${subscriptionName} ${numericField}` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'fail'); + }); + + // kMapAnalysis + it('should perform k-map analysis on a single field', () => { + const output = execSync( + `node kMapEstimationAnalysis.js ${projectId} ${projectId} ${dataset} harmful ${topicName} ${subscriptionName} 'US' ${numericField} AGE` + ); + assert.match(output, /Anonymity range: \[\d+, \d+\]/); + assert.match(output, /Size: \d/); + assert.match(output, /Values: \d{2}/); + }); + + it('should handle k-map analysis errors', () => { + let output; + try { + output = execSync( + `node kMapEstimationAnalysis.js ${projectId} ${projectId} ${dataset} nonexistent ${topicName} ${subscriptionName} ${numericField} AGE` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'fail'); + }); + + it('should check that numbers of quasi-ids and info types are equal', () => { + assert.throws(() => { + execSync( + `node kMapEstimationAnalysis.js ${projectId} ${projectId} ${dataset} harmful ${topicName} ${subscriptionName} 'US' 'Age,Gender' AGE` + ); + }, /3 INVALID_ARGUMENT: InfoType name cannot be empty of a TaggedField/); + }); + + // lDiversityAnalysis + it('should perform l-diversity analysis on a single field', () => { + const output = execSync( + `node lDiversityAnalysis.js ${projectId} ${projectId} ${dataset} harmful ${topicName} ${subscriptionName} ${uniqueField} ${numericField}` + ); + assert.match(output, /Quasi-ID values:/); + assert.match(output, /Class size: \d/); + assert.match(output, /Sensitive value/); + }); + + it('should handle l-diversity analysis errors', () => { + let output; + try { + output = execSync( + `node lDiversityAnalysis.js ${projectId} ${projectId} ${dataset} nonexistent ${topicName} ${subscriptionName} ${numericField}` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'fail'); + }); +}); diff --git a/dlp/system-test/temp.result.csv b/dlp/system-test/temp.result.csv new file mode 100644 index 0000000000..2329cb63ce --- /dev/null +++ b/dlp/system-test/temp.result.csv @@ -0,0 +1,5 @@ +name,birth_date,register_date,credit_card +Ann,1/31/1980,8/20/1996,4532908762519852 +James,4/5/1988,5/9/2001,4301261899725540 +Dan,9/13/1945,12/15/2011,4620761856015295 +Laura,12/3/1992,2/3/2017,4564981067258901 diff --git a/dlp/system-test/templates.test.js b/dlp/system-test/templates.test.js new file mode 100644 index 0000000000..16b330d9d6 --- /dev/null +++ b/dlp/system-test/templates.test.js @@ -0,0 +1,103 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +const {assert} = require('chai'); +const {describe, it, before} = require('mocha'); +const cp = require('child_process'); +const uuid = require('uuid'); +const DLP = require('@google-cloud/dlp'); + +const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); + +const templateName = ''; +const client = new DLP.DlpServiceClient(); + +describe('templates', () => { + let projectId; + let fullTemplateName; + const INFO_TYPE = 'PERSON_NAME'; + const MIN_LIKELIHOOD = 'VERY_LIKELY'; + const MAX_FINDINGS = 5; + const INCLUDE_QUOTE = false; + const DISPLAY_NAME = `My Template ${uuid.v4()}`; + const TEMPLATE_NAME = `my-template-${uuid.v4()}`; + + before(async () => { + projectId = await client.getProjectId(); + fullTemplateName = `projects/${projectId}/locations/global/inspectTemplates/${TEMPLATE_NAME}`; + }); + + // create_inspect_template + it('should create template', () => { + const output = execSync( + `node createInspectTemplate.js ${projectId} "${TEMPLATE_NAME}" "${DISPLAY_NAME}" ${INFO_TYPE} ${INCLUDE_QUOTE} ${MIN_LIKELIHOOD} ${MAX_FINDINGS}` + ); + console.log(output); + assert.include(output, `Successfully created template ${fullTemplateName}`); + }); + + it('should handle template creation errors', () => { + let output; + try { + output = execSync( + `node createInspectTemplate.js ${projectId} invalid_template#id` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'INVALID_ARGUMENT'); + }); + + // list_inspect_templates + it('should list templates', () => { + const output = execSync(`node listInspectTemplates.js ${projectId}`); + assert.include(output, `Template ${templateName}`); + assert.match(output, /Created: \d{1,2}\/\d{1,2}\/\d{4}/); + assert.match(output, /Updated: \d{1,2}\/\d{1,2}\/\d{4}/); + }); + + it('should pass creation settings to template', () => { + const output = execSync(`node listInspectTemplates.js ${projectId}`); + assert.include(output, fullTemplateName); + assert.include(output, DISPLAY_NAME); + assert.include(output, INFO_TYPE); + assert.include(output, MIN_LIKELIHOOD); + assert.include(output, MAX_FINDINGS); + }); + + // delete_inspect_template + it('should delete template', () => { + const output = execSync( + `node deleteInspectTemplate.js ${projectId} ${fullTemplateName}` + ); + assert.include( + output, + `Successfully deleted template ${fullTemplateName}.` + ); + }); + + it('should handle template deletion errors', () => { + let output; + try { + output = execSync( + `node deleteInspectTemplate.js ${projectId} BAD_TEMPLATE` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'INVALID_ARGUMENT'); + }); +}); diff --git a/dlp/system-test/triggers.test.js b/dlp/system-test/triggers.test.js new file mode 100644 index 0000000000..907b03b9dd --- /dev/null +++ b/dlp/system-test/triggers.test.js @@ -0,0 +1,91 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +const {assert} = require('chai'); +const {describe, it, before} = require('mocha'); +const cp = require('child_process'); +const uuid = require('uuid'); +const DLP = require('@google-cloud/dlp'); + +const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); + +const client = new DLP.DlpServiceClient(); + +describe('triggers', () => { + let projectId; + let fullTriggerName; + const triggerName = `my-trigger-${uuid.v4()}`; + const triggerDisplayName = `My Trigger Display Name: ${uuid.v4()}`; + const triggerDescription = `My Trigger Description: ${uuid.v4()}`; + const infoType = 'PERSON_NAME'; + const minLikelihood = 'VERY_LIKELY'; + const maxFindings = 5; + const bucketName = process.env.BUCKET_NAME; + + before(async () => { + projectId = await client.getProjectId(); + fullTriggerName = `projects/${projectId}/locations/global/jobTriggers/${triggerName}`; + }); + + it('should create a trigger', () => { + const output = execSync( + `node createTrigger.js ${projectId} ${triggerName} "${triggerDisplayName}" "${triggerDescription}" ${bucketName} true '1' ${infoType} ${minLikelihood} ${maxFindings}` + ); + assert.include(output, `Successfully created trigger ${fullTriggerName}`); + }); + + it('should list triggers', () => { + const output = execSync(`node listTriggers.js ${projectId}`); + assert.include(output, `Trigger ${fullTriggerName}`); + assert.include(output, `Display Name: ${triggerDisplayName}`); + assert.include(output, `Description: ${triggerDescription}`); + assert.match(output, /Created: \d{1,2}\/\d{1,2}\/\d{4}/); + assert.match(output, /Updated: \d{1,2}\/\d{1,2}\/\d{4}/); + assert.match(output, /Status: HEALTHY/); + assert.match(output, /Error count: 0/); + }); + + it('should delete a trigger', () => { + const output = execSync( + `node deleteTrigger.js ${projectId} ${fullTriggerName}` + ); + assert.include(output, `Successfully deleted trigger ${fullTriggerName}.`); + }); + + it('should handle trigger creation errors', () => { + let output; + try { + output = execSync( + `node createTrigger.js ${projectId} 'name' "${triggerDisplayName}" ${bucketName} true 1 "@@@@@" ${minLikelihood} ${maxFindings}` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'fail'); + }); + + it('should handle trigger deletion errors', () => { + let output; + try { + output = execSync( + `node deleteTrigger.js ${projectId} 'bad-trigger-path'` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'fail'); + }); +});