Skip to content

Option to Raise FILESYSTEM and WEBSCREENSHOT with base64 blobs #1438

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 48 additions & 2 deletions bbot/core/event/base.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
import io
import re
import json
import base64
import asyncio
import logging
import tarfile
import ipaddress
import traceback

from copy import copy
from typing import Optional
from datetime import datetime
from contextlib import suppress
from urllib.parse import urljoin
from pydantic import BaseModel, field_validator
from pathlib import Path

from .helpers import *
from bbot.core.errors import *
Expand Down Expand Up @@ -769,11 +774,48 @@ def _host(self):
if isinstance(self.data, dict) and "host" in self.data:
return make_ip_type(self.data["host"])
else:
parsed = getattr(self, "parsed")
parsed = getattr(self, "parsed", None)
if parsed is not None:
return make_ip_type(parsed.hostname)


class DictPathEvent(DictEvent):
_path_keywords = ["path", "filename"]

def sanitize_data(self, data):
new_data = dict(data)
file_blobs = getattr(self.scan, "_file_blobs", False)
folder_blobs = getattr(self.scan, "_folder_blobs", False)
for path_keyword in self._path_keywords:
blob = None
try:
data_path = Path(data[path_keyword])
except KeyError:
continue
if data_path.is_file():
self.add_tag("file")
if file_blobs:
with open(data_path, "rb") as file:
blob = file.read()
elif data_path.is_dir():
self.add_tag("folder")
if folder_blobs:
blob = self._tar_directory(data_path)
else:
continue
if blob:
new_data["blob"] = base64.b64encode(blob).decode("utf-8")

return new_data

def _tar_directory(self, dir_path):
tar_buffer = io.BytesIO()
with tarfile.open(fileobj=tar_buffer, mode="w:gz") as tar:
# Add the entire directory to the tar archive
tar.add(dir_path, arcname=dir_path.name)
return tar_buffer.getvalue()


class ASN(DictEvent):
_always_emit = True
_quick_emit = True
Expand Down Expand Up @@ -1177,7 +1219,7 @@ class SOCIAL(DictHostEvent):
_scope_distance_increment_same_host = True


class WEBSCREENSHOT(DictHostEvent):
class WEBSCREENSHOT(DictPathEvent, DictHostEvent):
_always_emit = True
_quick_emit = True

Expand All @@ -1203,6 +1245,10 @@ def _pretty_string(self):
return self.data["waf"]


class FILESYSTEM(DictPathEvent):
pass


def make_event(
data,
event_type=None,
Expand Down
4 changes: 4 additions & 0 deletions bbot/defaults.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ status_frequency: 15
http_proxy:
# Web user-agent
user_agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.2151.97
# Include the raw data of files (i.e. PDFs, web screenshots) as base64 in the event
file_blobs: false
# Include the raw data of directories (i.e. git repos) as tar.gz base64 in the event
folder_blobs: false

### WEB SPIDER ###

Expand Down
5 changes: 3 additions & 2 deletions bbot/modules/docker_pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,9 @@ async def handle_event(self, event):
tags=["docker", "tarball"],
source=event,
)
codebase_event.scope_distance = event.scope_distance
await self.emit_event(codebase_event)
if codebase_event:
codebase_event.scope_distance = event.scope_distance
await self.emit_event(codebase_event)

def get_registry_and_repository(self, repository_url):
"""Function to get the registry and repository from a html repository URL."""
Expand Down
2 changes: 2 additions & 0 deletions bbot/modules/filedownload.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,12 @@ class filedownload(BaseModule):
"yaml", # YAML Ain't Markup Language
],
"max_filesize": "10MB",
"base_64_encoded_file": "false",
}
options_desc = {
"extensions": "File extensions to download",
"max_filesize": "Cancel download if filesize is greater than this size",
"base_64_encoded_file": "Stream the bytes of a file and encode them in base 64 for event data.",
}

scope_distance_modifier = 3
Expand Down
4 changes: 1 addition & 3 deletions bbot/modules/git_clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,7 @@ async def handle_event(self, event):
repo_path = await self.clone_git_repository(repo_url)
if repo_path:
self.verbose(f"Cloned {repo_url} to {repo_path}")
codebase_event = self.make_event(
{"path": str(repo_path)}, "FILESYSTEM", tags=["git", "folder"], source=event
)
codebase_event = self.make_event({"path": str(repo_path)}, "FILESYSTEM", tags=["git"], source=event)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The removal of the folder tag here will stop the unstructured module from crawling the folder

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tag will still be there, only now it's added automatically during the creation of the event.

codebase_event.scope_distance = event.scope_distance
await self.emit_event(codebase_event)

Expand Down
4 changes: 2 additions & 2 deletions bbot/modules/gowitness.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,8 @@ async def handle_batch(self, *events):
for filename, screenshot in self.new_screenshots.items():
url = screenshot["url"]
final_url = screenshot["final_url"]
filename = screenshot["filename"]
webscreenshot_data = {"filename": filename, "url": final_url}
filename = self.screenshot_path / screenshot["filename"]
webscreenshot_data = {"filename": str(filename), "url": final_url}
source_event = event_dict[url]
await self.emit_event(webscreenshot_data, "WEBSCREENSHOT", source=source_event)

Expand Down
4 changes: 4 additions & 0 deletions bbot/scanner/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,10 @@ def __init__(
self.url_extension_blacklist = set(e.lower() for e in self.config.get("url_extension_blacklist", []))
self.url_extension_httpx_only = set(e.lower() for e in self.config.get("url_extension_httpx_only", []))

# blob inclusion
self._file_blobs = self.config.get("file_blobs", False)
self._folder_blobs = self.config.get("folder_blobs", False)

# custom HTTP headers warning
self.custom_http_headers = self.config.get("http_headers", {})
if self.custom_http_headers:
Expand Down
26 changes: 24 additions & 2 deletions bbot/test/test_step_2/module_tests/test_module_git_clone.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import io
import base64
import shutil
import tarfile
import subprocess
from pathlib import Path

Expand Down Expand Up @@ -156,6 +160,7 @@ async def setup_before_prep(self, module_test):

async def setup_after_prep(self, module_test):
temp_path = Path("/tmp/.bbot_test")
shutil.rmtree(temp_path / "test_keys", ignore_errors=True)
subprocess.run(["git", "init", "test_keys"], cwd=temp_path)
temp_repo_path = temp_path / "test_keys"
with open(temp_repo_path / "keys.txt", "w") as f:
Expand All @@ -176,15 +181,15 @@ async def setup_after_prep(self, module_test):
cwd=temp_repo_path,
)

old_filter_event = module_test.module.filter_event
old_filter_event = module_test.scan.modules["git_clone"].filter_event

def new_filter_event(event):
event.data["url"] = event.data["url"].replace(
"https://github.com/blacklanternsecurity", f"file://{temp_path}"
)
return old_filter_event(event)

module_test.monkeypatch.setattr(module_test.module, "filter_event", new_filter_event)
module_test.monkeypatch.setattr(module_test.scan.modules["git_clone"], "filter_event", new_filter_event)

def check(self, module_test, events):
filesystem_events = [
Expand All @@ -196,9 +201,26 @@ def check(self, module_test, events):
and e.scope_distance == 1
]
assert 1 == len(filesystem_events), "Failed to git clone CODE_REPOSITORY"
# make sure the binary blob isn't here
assert not any(["blob" in e.data for e in [e for e in events if e.type == "FILESYSTEM"]])
filesystem_event = filesystem_events[0]
folder = Path(filesystem_event.data["path"])
assert folder.is_dir(), "Destination folder doesn't exist"
with open(folder / "keys.txt") as f:
content = f.read()
assert content == self.file_content, "File content doesn't match"


class TestGit_CloneWithBlob(TestGit_Clone):
config_overrides = {"folder_blobs": True}

def check(self, module_test, events):
filesystem_events = [e for e in events if e.type == "FILESYSTEM"]
assert len(filesystem_events) == 1
assert all(["blob" in e.data for e in filesystem_events])
filesystem_event = filesystem_events[0]
blob = filesystem_event.data["blob"]
tar_bytes = base64.b64decode(blob)
tar_stream = io.BytesIO(tar_bytes)
with tarfile.open(fileobj=tar_stream, mode="r:gz") as tar:
assert "test_keys/keys.txt" in tar.getnames()
17 changes: 17 additions & 0 deletions bbot/test/test_step_2/module_tests/test_module_gowitness.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ async def new_emit_event(event):
module_test.monkeypatch.setattr(module_test.scan.modules["social"], "emit_event", new_emit_event)

def check(self, module_test, events):
webscreenshots = [e for e in events if e.type == "WEBSCREENSHOT"]
assert webscreenshots, "failed to raise WEBSCREENSHOT events"
assert not any(
["blob" in e.data for e in webscreenshots]
), "blob was included in WEBSCREENSHOT data when it shouldn't have been"

screenshots_path = self.home_dir / "scans" / module_test.scan.name / "gowitness" / "screenshots"
screenshots = list(screenshots_path.glob("*.png"))
assert (
Expand Down Expand Up @@ -69,3 +75,14 @@ def check(self, module_test, events):
and e.source.type == "SOCIAL"
]
)


class TestGoWitnessWithBlob(TestGowitness):
config_overrides = {"file_blobs": True}

def check(self, module_test, events):
webscreenshots = [e for e in events if e.type == "WEBSCREENSHOT"]
assert webscreenshots, "failed to raise WEBSCREENSHOT events"
assert all(
["blob" in e.data and e.data["blob"] for e in webscreenshots]
), "blob not found in WEBSCREENSHOT data"
Loading