Skip to content

Commit

Permalink
Share logic for building custom info types
Browse files Browse the repository at this point in the history
  • Loading branch information
mwdaub authored Jun 14, 2018
1 parent 5258658 commit 47fc04f
Showing 1 changed file with 28 additions and 75 deletions.
103 changes: 28 additions & 75 deletions dlp/inspect_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,21 +53,8 @@ def inspect_string(project, content_string, info_types,

# Prepare custom_info_types by parsing the dictionary word lists and
# regex patterns.
if custom_dictionaries is None:
custom_dictionaries = []
dictionaries = [{
'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
'dictionary': {
'word_list': {'words': custom_dictionaries[i].split(',')}
}
} for i in range(len(custom_dictionaries))]
if custom_regexes is None:
custom_regexes = []
regexes = [{
'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
'regex': {'pattern': custom_regexes[i]}
} for i in range(len(custom_regexes))]
custom_info_types = dictionaries + regexes
custom_info_types = build_custom_info_types(custom_dictionaries,
custom_info_types)

# Construct the configuration dictionary. Keys which are None may
# optionally be omitted entirely.
Expand Down Expand Up @@ -141,21 +128,8 @@ def inspect_file(project, filename, info_types, min_likelihood=None,

# Prepare custom_info_types by parsing the dictionary word lists and
# regex patterns.
if custom_dictionaries is None:
custom_dictionaries = []
dictionaries = [{
'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
'dictionary': {
'word_list': {'words': custom_dictionaries[i].split(',')}
}
} for i in range(len(custom_dictionaries))]
if custom_regexes is None:
custom_regexes = []
regexes = [{
'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
'regex': {'pattern': custom_regexes[i]}
} for i in range(len(custom_regexes))]
custom_info_types = dictionaries + regexes
custom_info_types = build_custom_info_types(custom_dictionaries,
custom_regexes)

# Construct the configuration dictionary. Keys which are None may
# optionally be omitted entirely.
Expand Down Expand Up @@ -254,21 +228,8 @@ def inspect_gcs_file(project, bucket, filename, topic_id, subscription_id,

# Prepare custom_info_types by parsing the dictionary word lists and
# regex patterns.
if custom_dictionaries is None:
custom_dictionaries = []
dictionaries = [{
'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
'dictionary': {
'word_list': {'words': custom_dictionaries[i].split(',')}
}
} for i in range(len(custom_dictionaries))]
if custom_regexes is None:
custom_regexes = []
regexes = [{
'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
'regex': {'pattern': custom_regexes[i]}
} for i in range(len(custom_regexes))]
custom_info_types = dictionaries + regexes
custom_info_types = build_custom_info_types(custom_dictionaries,
custom_regexes)

# Construct the configuration dictionary. Keys which are None may
# optionally be omitted entirely.
Expand Down Expand Up @@ -400,21 +361,8 @@ def inspect_datastore(project, datastore_project, kind,

# Prepare custom_info_types by parsing the dictionary word lists and
# regex patterns.
if custom_dictionaries is None:
custom_dictionaries = []
dictionaries = [{
'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
'dictionary': {
'word_list': {'words': custom_dictionaries[i].split(',')}
}
} for i in range(len(custom_dictionaries))]
if custom_regexes is None:
custom_regexes = []
regexes = [{
'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
'regex': {'pattern': custom_regexes[i]}
} for i in range(len(custom_regexes))]
custom_info_types = dictionaries + regexes
custom_info_types = build_custom_info_types(custom_dictionaries,
custom_regexes)

# Construct the configuration dictionary. Keys which are None may
# optionally be omitted entirely.
Expand Down Expand Up @@ -551,21 +499,8 @@ def inspect_bigquery(project, bigquery_project, dataset_id, table_id,

# Prepare custom_info_types by parsing the dictionary word lists and
# regex patterns.
if custom_dictionaries is None:
custom_dictionaries = []
dictionaries = [{
'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
'dictionary': {
'word_list': {'words': custom_dictionaries[i].split(',')}
}
} for i in range(len(custom_dictionaries))]
if custom_regexes is None:
custom_regexes = []
regexes = [{
'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
'regex': {'pattern': custom_regexes[i]}
} for i in range(len(custom_regexes))]
custom_info_types = dictionaries + regexes
custom_info_types = build_custom_info_types(custom_dictionaries,
custom_regexes)

# Construct the configuration dictionary. Keys which are None may
# optionally be omitted entirely.
Expand Down Expand Up @@ -651,6 +586,24 @@ def callback(message):
# [END dlp_inspect_bigquery]


def build_custom_info_types(custom_dictionaries, custom_regexes):
if custom_dictionaries is None:
custom_dictionaries = []
dictionaries = [{
'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
'dictionary': {
'word_list': {'words': custom_dictionaries[i].split(',')}
}
} for i in range(len(custom_dictionaries))]
if custom_regexes is None:
custom_regexes = []
regexes = [{
'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
'regex': {'pattern': custom_regexes[i]}
} for i in range(len(custom_regexes))]
return dictionaries + regexes


if __name__ == '__main__':
default_project = os.environ.get('GCLOUD_PROJECT')

Expand Down

0 comments on commit 47fc04f

Please sign in to comment.