Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: Map Extracted Files to Artifact Definitions in image_export.py #4949

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 46 additions & 27 deletions plaso/cli/image_export_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,6 @@ class ImageExportTool(storage_media_tool.StorageMediaTool):

_COPY_BUFFER_SIZE = 32768

_DIRTY_CHARACTERS = frozenset([
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
os.path.sep, '!', '$', '%', '&', '*', '+', ':', ';', '<', '>',
'?', '@', '|', '~', '\x7f'])

_HASHES_FILENAME = 'hashes.json'

_READ_BUFFER_SIZE = 4096
Expand Down Expand Up @@ -99,6 +91,8 @@ def __init__(self, input_reader=None, output_writer=None):

self.has_filters = False
self.list_signature_identifiers = False
self._enable_artifacts_map = False
self._artifacts_paths_map = collections.defaultdict(list)

def _CalculateDigestHash(self, file_entry, data_stream_name):
"""Calculates a SHA-256 digest of the contents of the file entry.
Expand Down Expand Up @@ -128,8 +122,11 @@ def _CalculateDigestHash(self, file_entry, data_stream_name):
return hasher_object.GetStringDigest()

def _CreateSanitizedDestination(
self, source_file_entry, file_system_path_spec, source_data_stream_name,
destination_path):
self,
source_file_entry,
file_system_path_spec,
source_data_stream_name,
destination_path):
"""Creates a sanitized path of both destination directory and filename.

This function replaces non-printable and other characters defined in
Expand All @@ -151,11 +148,7 @@ def _CreateSanitizedDestination(
path = getattr(file_system_path_spec, 'location', None)
path_segments = file_system.SplitPath(path)

# Sanitize each path segment.
for index, path_segment in enumerate(path_segments):
path_segments[index] = ''.join([
character if character not in self._DIRTY_CHARACTERS else '_'
for character in path_segment])
path_segments = path_helper.PathHelper.SanitizePathSegments(path_segments)

target_filename = path_segments.pop()

Expand Down Expand Up @@ -213,17 +206,9 @@ def _ExtractDataStream(

target_directory, target_filename = self._CreateSanitizedDestination(
file_entry, file_entry.path_spec, data_stream_name, destination_path)

# If does not exist, append path separator to have consistent behaviour.
if not destination_path.endswith(os.path.sep):
destination_path = destination_path + os.path.sep

# TODO: refactor
path = None

path = path_helper.PathHelper.GetRelativePath(
target_directory, target_filename, destination_path)
target_path = os.path.join(target_directory, target_filename)
if target_path.startswith(destination_path):
path = target_path[len(destination_path):]

self._paths_by_hash[digest].append(path)

Expand All @@ -247,6 +232,13 @@ def _ExtractDataStream(
f'exists.'))
return

# Generate a map between artifacts and extracted paths
if self._enable_artifacts_map:
for artifact_name in self._filter_collection.GetMatchingArtifacts(
path, os.sep):
self._artifacts_paths_map.setdefault(
artifact_name, []).append(path)

try:
self._WriteFileEntry(file_entry, data_stream_name, target_path)
except (IOError, dfvfs_errors.BackEndError) as exception:
Expand Down Expand Up @@ -348,13 +340,19 @@ def _Extract(

try:
extraction_engine.BuildCollectionFilters(
environment_variables, user_accounts,
environment_variables,
user_accounts,
artifact_filter_names=artifact_filters,
filter_file_path=filter_file)
filter_file_path=filter_file,
enable_artifacts_map=self._enable_artifacts_map)
except errors.InvalidFilter as exception:
raise errors.BadConfigOption(
f'Unable to build collection filters with error: {exception!s}')

if self._enable_artifacts_map:
self._filter_collection.SetArtifactsTrie(
extraction_engine.GetArtifactsTrie())

excluded_find_specs = extraction_engine.GetCollectionExcludedFindSpecs()
included_find_specs = extraction_engine.GetCollectionIncludedFindSpecs()

Expand Down Expand Up @@ -654,6 +652,12 @@ def ParseArguments(self, arguments):

self.AddFilterOptions(argument_parser)

argument_parser.add_argument(
'--enable_artifacts_map', dest='enable_artifacts_map',
action='store_true', default=False, help=(
'Output a JSON file mapping extracted files/directories to '
'artifact definitions.'))

argument_parser.add_argument(
'-w', '--write', action='store', dest='path', type=str,
metavar='PATH', default='export', help=(
Expand Down Expand Up @@ -785,6 +789,9 @@ def ParseOptions(self, options):

self._EnforceProcessMemoryLimit(self._process_memory_limit)

self._enable_artifacts_map = getattr(
options, 'enable_artifacts_map', False)

def PrintFilterCollection(self):
"""Prints the filter collection."""
self._filter_collection.Print(self._output_writer)
Expand All @@ -799,6 +806,12 @@ def ProcessSource(self):
"""
try:
self.ScanSource(self._source_path)
if self._source_type not in self._SOURCE_TYPES_TO_PREPROCESS:
self._output_writer.Write(
(
f'Input must be in {list(self._SOURCE_TYPES_TO_PREPROCESS)} '
f'the "{self._source_type}" type is not supported.\n'))
return
except dfvfs_errors.UserAbort as exception:
raise errors.UserAbort(exception)

Expand All @@ -823,5 +836,11 @@ def ProcessSource(self):
json_data.append({'sha256': sha256, 'paths': paths})
json.dump(json_data, file_object)

if self._enable_artifacts_map:
artifacts_map_file = os.path.join(
self._destination_path, 'artifacts_map.json')
with open(artifacts_map_file, 'w', encoding='utf-8') as file_object:
json.dump(self._artifacts_paths_map, file_object)

self._output_writer.Write('Export completed.\n')
self._output_writer.Write('\n')
Loading
Loading