Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .venv/lib64
20 changes: 19 additions & 1 deletion sherlock_project/resources/data.json
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,6 @@
"urlMain": "https://bsky.app/",
"username_claimed": "mcuban"
},

"BongaCams": {
"errorType": "status_code",
"isNSFW": true,
Expand Down Expand Up @@ -2941,5 +2940,24 @@
"urlMain": "https://platzi.com/",
"username_claimed": "freddier",
"request_method": "GET"
},
"Modrinth": {
"urlMain": "https://modrinth.com",
"url": "https://modrinth.com/user/{}",
"errorType": ["status_code", "message"],
"errorCode": [404],
"errorMsg": ["Not Found"],
"username_claimed": "modrinth",
"headers": { "User-Agent": "Mozilla/5.0" }
},

"StackBlitz": {
"urlMain": "https://stackblitz.com",
"url": "https://stackblitz.com/@{}",
"errorType": ["message", "status_code"],
"errorMsg": ["Not Found"],
"errorCode": [404],
"username_claimed": "stackblitz",
"headers": { "User-Agent": "Mozilla/5.0" }
}
}
188 changes: 153 additions & 35 deletions sherlock_project/resources/data.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,101 +4,189 @@
"description": "Social media targets to probe for the existence of known usernames",
"type": "object",
"properties": {
"$schema": { "type": "string" }
"$schema": {
"type": "string"
}
},
"patternProperties": {
"^(?!\\$).*?$": {
"type": "object",
"description": "Target name and associated information (key should be human readable name)",
"required": ["url", "urlMain", "errorType", "username_claimed"],
"required": [
"url",
"urlMain",
"errorType",
"username_claimed"
],
"properties": {
"url": { "type": "string" },
"urlMain": { "type": "string" },
"urlProbe": { "type": "string" },
"username_claimed": { "type": "string" },
"regexCheck": { "type": "string" },
"isNSFW": { "type": "boolean" },
"headers": { "type": "object" },
"request_payload": { "type": "object" },
"url": {
"type": "string"
},
"urlMain": {
"type": "string"
},
"urlProbe": {
"type": "string"
},
"username_claimed": {
"type": "string"
},
"username_unclaimed": {
"type": "string"
},
"regexCheck": {
"type": "string"
},
"isNSFW": {
"type": "boolean"
},
"headers": {
"type": "object"
},
"request_payload": {
"type": "object"
},
"__comment__": {
"type": "string",
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
},
"tags": {
"oneOf": [
{ "$ref": "#/$defs/tag" },
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
{
"$ref": "#/$defs/tag"
},
{
"type": "array",
"items": {
"$ref": "#/$defs/tag"
}
}
]
},
"request_method": {
"type": "string",
"enum": ["GET", "POST", "HEAD", "PUT"]
"enum": [
"GET",
"POST",
"HEAD",
"PUT"
]
},
"errorType": {
"oneOf": [
{
"type": "string",
"enum": ["message", "response_url", "status_code"]
"enum": [
"message",
"response_url",
"status_code"
]
},
{
"type": "array",
"items": {
"type": "string",
"enum": ["message", "response_url", "status_code"]
"enum": [
"message",
"response_url",
"status_code"
]
}
}
]
},
"errorMsg": {
"oneOf": [
{ "type": "string" },
{ "type": "array", "items": { "type": "string" } }
{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
}
}
]
},
"errorCode": {
"oneOf": [
{ "type": "integer" },
{ "type": "array", "items": { "type": "integer" } }
{
"type": "integer"
},
{
"type": "array",
"items": {
"type": "integer"
}
}
]
},
"errorUrl": { "type": "string" },
"response_url": { "type": "string" }
"errorUrl": {
"type": "string"
},
"response_url": {
"type": "string"
}
},
"dependencies": {
"errorMsg": {
"oneOf": [
{ "properties": { "errorType": { "const": "message" } } },
{
"properties": {
"errorType": {
"const": "message"
}
}
},
{
"properties": {
"errorType": {
"type": "array",
"contains": { "const": "message" }
"contains": {
"const": "message"
}
}
}
}
]
},
"errorUrl": {
"oneOf": [
{ "properties": { "errorType": { "const": "response_url" } } },
{
"properties": {
"errorType": {
"const": "response_url"
}
}
},
{
"properties": {
"errorType": {
"type": "array",
"contains": { "const": "response_url" }
"contains": {
"const": "response_url"
}
}
}
}
]
},
"errorCode": {
"oneOf": [
{ "properties": { "errorType": { "const": "status_code" } } },
{
"properties": {
"errorType": {
"const": "status_code"
}
}
},
{
"properties": {
"errorType": {
"type": "array",
"contains": { "const": "status_code" }
"contains": {
"const": "status_code"
}
}
}
}
Expand All @@ -109,41 +197,71 @@
{
"if": {
"anyOf": [
{ "properties": { "errorType": { "const": "message" } } },
{
"properties": {
"errorType": {
"const": "message"
}
}
},
{
"properties": {
"errorType": {
"type": "array",
"contains": { "const": "message" }
"contains": {
"const": "message"
}
}
}
}
]
},
"then": { "required": ["errorMsg"] }
"then": {
"required": [
"errorMsg"
]
}
},
{
"if": {
"anyOf": [
{ "properties": { "errorType": { "const": "response_url" } } },
{
"properties": {
"errorType": {
"const": "response_url"
}
}
},
{
"properties": {
"errorType": {
"type": "array",
"contains": { "const": "response_url" }
"contains": {
"const": "response_url"
}
}
}
}
]
},
"then": { "required": ["errorUrl"] }
"then": {
"required": [
"errorUrl"
]
}
}
],
"additionalProperties": false
}
},
"additionalProperties": false,
"$defs": {
"tag": { "type": "string", "enum": ["adult", "gaming"] }
"tag": {
"type": "string",
"enum": [
"adult",
"gaming"
]
}
}
}
}
25 changes: 22 additions & 3 deletions sherlock_project/sherlock.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,10 +392,29 @@ def sherlock(
if error_text is not None:
error_context = error_text

elif any(hitMsg in r.text for hitMsg in WAFHitMsgs):
query_status = QueryStatus.WAF

else:
# Use regex matching for WAF fingerprints (the WAFHitMsgs entries
# are intended as regex patterns). Using substring membership here
# produced false positives on some targets. Try a regex search and
# ignore any bad patterns silently.
try:
if any(re.search(hitMsg, r.text) for hitMsg in WAFHitMsgs):
query_status = QueryStatus.WAF
except re.error:
# If any of the WAF patterns are invalid regexes, ignore them
# and continue with normal detection rather than failing.
pass











if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
error_context = f"Unknown error type '{error_type}' for {social_network}"
query_status = QueryStatus.UNKNOWN
Expand Down
19 changes: 19 additions & 0 deletions tests/test_new_sites_presence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import json
from pathlib import Path

REQUIRED_FIELDS = {"urlMain", "url", "errorType"}

def load_data():
data_path = Path(__file__).parent.parent / "sherlock_project" / "resources" / "data.json"
with open(data_path, "r", encoding="utf-8") as f:
return json.load(f)

def test_sites_present_with_required_fields():
data = load_data()
for site in ["Ko-fi", "StackBlitz", "Modrinth"]:
assert site in data, f"{site} entry missing in data.json"
fields = set(data[site].keys())
missing = REQUIRED_FIELDS - fields
assert not missing, f"{site} missing fields: {missing}"

print("All tests passed.")