From 08b20753cc882ad438cc1254632841bcdefd0fcd Mon Sep 17 00:00:00 2001 From: daschwanden Date: Tue, 10 Sep 2024 23:49:34 +0200 Subject: [PATCH] Adds functionality to autocreate signed URLs for large file upload (#1116) --- grr/core/grr_response_core/config/server.py | 24 +++++++++ .../flows/general/large_file.py | 52 +++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/grr/core/grr_response_core/config/server.py b/grr/core/grr_response_core/config/server.py index db0c5e25c..dfed4118c 100644 --- a/grr/core/grr_response_core/config/server.py +++ b/grr/core/grr_response_core/config/server.py @@ -382,3 +382,27 @@ "`/etc/passwd.cache` file." ), ) + +config_lib.DEFINE_string( + "Server.signed_url_service_account_email", + default=None, + help=( + "The email of the Service Account to use for signing the URL (https://cloud.google.com/storage/docs/access-control/signed-urls#signing-resumable)." + ), +) + +config_lib.DEFINE_string( + "Server.signed_url_gcs_bucket_name", + default=None, + help=( + "The GCS bucket name to include in the signed URL (https://cloud.google.com/storage/docs/access-control/signed-urls#signing-resumable)." + ), +) + +config_lib.DEFINE_integer( + "Server.signed_url_expire_hours", + default=12, + help=( + "The TTL until the signed URL expires (https://cloud.google.com/storage/docs/access-control/signed-urls#signing-resumable)." + ), +) diff --git a/grr/server/grr_response_server/flows/general/large_file.py b/grr/server/grr_response_server/flows/general/large_file.py index eacf71534..733c08751 100644 --- a/grr/server/grr_response_server/flows/general/large_file.py +++ b/grr/server/grr_response_server/flows/general/large_file.py @@ -1,6 +1,15 @@ #!/usr/bin/env python """A module with the implementation of the large file collection flow.""" +import logging import os +from datetime import datetime, timedelta + +import google.auth +from google.auth import compute_engine +from google.auth.transport import requests +from google.cloud import storage + +from grr_response_core import config from grr_response_core.lib.rdfvalues import large_file as rdf_large_file from grr_response_core.lib.rdfvalues import paths as rdf_paths @@ -64,11 +73,32 @@ def Start(self) -> None: # analyst decrypt the file later. self.state.encryption_key = os.urandom(16) + sa_email = config.CONFIG["Server.signed_url_service_account_email"] + bucket_name = config.CONFIG["Server.signed_url_gcs_bucket_name"] + expires_hours = config.CONFIG["Server.signed_url_expire_hours"] + args = rdf_large_file.CollectLargeFileArgs() args.path_spec = self.args.path_spec args.signed_url = self.args.signed_url args.encryption_key = self.state.encryption_key + if not args.signed_url: + if not sa_email: + # Log that no Service Account Email config has been provided + self.Log("To autocreate a signed URL you need to provide a Service Account Email: https://cloud.google.com/storage/docs/access-control/signed-urls#signing-resumable") + elif not bucket_name: + # Log that no GCS config has been provided + self.Log("To autocreate a signed URL you need to provide a GCS Bucket: https://cloud.google.com/storage/docs/access-control/signed-urls#signing-resumable") + else: + head_tail = os.path.split(args.path_spec.path) + blob_name = self.rdf_flow.client_id+'-'+self.rdf_flow.flow_id+'-'+head_tail[1] + self.Log("Signed URL Service Account email: %s", sa_email) + self.Log("Signed URL GCS Bucket Name: %s", bucket_name) + self.Log("Signed URL expires in %s hours", expires_hours) + self.Log("GCS blob_name: %s", blob_name) + args.signed_url = self.generate_resumable_upload_url(bucket_name, blob_name, sa_email, expires_hours) + self.Log("Signed URL: %s", args.signed_url) + self.CallClient( server_stubs.CollectLargeFile, args, @@ -102,3 +132,25 @@ def Callback(self, responses: _Responses) -> None: self.state.session_uri = response.session_uri self.state.progress.session_uri = response.session_uri + + def generate_resumable_upload_url(self, bucket_name, blob_name, sa_email, expires_hours): + """Generates a v4 signed URL for resumably uploading a blob using HTTP POST. + """ + + auth_request = requests.Request() + credentials, project = google.auth.default() + storage_client = storage.Client(project, credentials) + bucket = storage_client.lookup_bucket(bucket_name) + blob = bucket.blob(blob_name) + expires_at = datetime.now() + timedelta(hours=expires_hours) + signing_credentials = compute_engine.IDTokenCredentials(auth_request, "", service_account_email=sa_email) + + signed_url = blob.generate_signed_url( + version='v4', + expiration=expires_at, + method='POST', + content_type="application/octet-stream", + headers={"X-Goog-Resumable": "start", "Content-Type": "application/octet-stream"}, + credentials=signing_credentials) + + return(signed_url)