Skip to content

Commit

Permalink
Merge pull request #1380 from yogeshojha/1358-feat-support-for-regex-…
Browse files Browse the repository at this point in the history
…in-out-of-scope-subdomain-scan-param

feat: Enhanced Out of Scope Subdomain Checking, Support for regex in out of scope scan parameter #1358
  • Loading branch information
yogeshojha authored Aug 21, 2024
2 parents dcfe8b5 + ab11e52 commit 2340324
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 17 deletions.
6 changes: 4 additions & 2 deletions web/reNgine/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,7 @@ def subdomain_discovery(
custom_subdomain_tools = [tool.name.lower() for tool in InstalledExternalTool.objects.filter(is_default=False).filter(is_subdomain_gathering=True)]
send_subdomain_changes, send_interesting = False, False
notif = Notification.objects.first()
subdomain_scope_checker = SubdomainScopeChecker(self.out_of_scope_subdomains)
if notif:
send_subdomain_changes = notif.send_subdomain_changes_notif
send_interesting = notif.send_interesting_notif
Expand Down Expand Up @@ -565,7 +566,7 @@ def subdomain_discovery(
if valid_url:
subdomain_name = urlparse(subdomain_name).netloc

if subdomain_name in self.out_of_scope_subdomains:
if subdomain_scope_checker.is_out_of_scope(subdomain_name):
logger.error(f'Subdomain {subdomain_name} is out of scope. Skipping.')
continue

Expand Down Expand Up @@ -4421,6 +4422,7 @@ def save_subdomain(subdomain_name, ctx={}):
scan_id = ctx.get('scan_history_id')
subscan_id = ctx.get('subscan_id')
out_of_scope_subdomains = ctx.get('out_of_scope_subdomains', [])
subdomain_checker = SubdomainScopeChecker(out_of_scope_subdomains)
valid_domain = (
validators.domain(subdomain_name) or
validators.ipv4(subdomain_name) or
Expand All @@ -4430,7 +4432,7 @@ def save_subdomain(subdomain_name, ctx={}):
logger.error(f'{subdomain_name} is not an invalid domain. Skipping.')
return None, False

if subdomain_name in out_of_scope_subdomains:
if subdomain_checker.is_out_of_scope(subdomain_name):
logger.error(f'{subdomain_name} is out-of-scope. Skipping.')
return None, False

Expand Down
47 changes: 46 additions & 1 deletion web/reNgine/utilities.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
import os
import validators

Expand Down Expand Up @@ -113,4 +114,48 @@ def is_valid_url(url, validate_only_http_scheme=True):
if validate_only_http_scheme:
return url.startswith('http://') or url.startswith('https://')
return True
return False
return False


class SubdomainScopeChecker:
"""
SubdomainScopeChecker is a utility class to check if a subdomain is in scope or not.
it supports both regex and string matching.
"""

def __init__(self, patterns):
self.regex_patterns = set()
self.plain_patterns = set()
self.load_patterns(patterns)

def load_patterns(self, patterns):
"""
Load patterns into the checker.
Args:
patterns (list): List of patterns to load.
Returns:
None
"""
for pattern in patterns:
# skip empty patterns
if not pattern:
continue
try:
self.regex_patterns.add(re.compile(pattern, re.IGNORECASE))
except re.error:
self.plain_patterns.add(pattern.lower())

def is_out_of_scope(self, subdomain):
"""
Check if a subdomain is out of scope.
Args:
subdomain (str): The subdomain to check.
Returns:
bool: True if the subdomain is out of scope, False otherwise.
"""
subdomain = subdomain.lower() # though we wont encounter this, but just in case
if subdomain in self.plain_patterns:
return True
return any(pattern.search(subdomain) for pattern in self.regex_patterns)
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,15 @@ <h4 class="text-info">Import Subdomains(Optional)</h4>
<textarea class="form-control" id="importSubdomainFormControlTextarea" name="importSubdomainTextArea" rows="6" spellcheck="false"></textarea>
</div>
<div class="mb-3">
<h4 class="text-warning">Out of Scope Subdomains(Optional)</h4>
<span class="">You can import subdomains for <b>{{domain.name}}</b> using your private recon tools.</span>
<br>
<div class="alert bg-soft-warning border-0 mt-1 mb-1" role="alert">
<span class="">Separate the out of scope subdomains/keywords using new line.(No regex currently supported.)</span>
<h4 class="text-warning">Out of Scope Subdomains (Optional)</h4>
<p>Specify subdomains of <strong>{{domain.name}}</strong> to exclude from scanning. These subdomains will be omitted from all subsequent scans, including URL discovery and vulnerability assessments.</p>
<p>Enter one subdomain or pattern per line. Both plain text and regex patterns are supported. <span class="badge bg-soft-primary text-primary ms-2">New</span></p>
<div class="mt-2 mb-2">
<li>For plain text: <code>admin.example.com</code></li>
<li>For regex: <code>^.*outofscope.*\.com$</code>, <code>admin.*</code> etc</li>
</div>
<label for="outOfScopeSubdomainTextarea" class="form-label mt-1">Out of Scope Subdomains List</label>
<textarea class="form-control" id="outOfScopeSubdomainTextarea" name="outOfScopeSubdomainTextarea" rows="6" spellcheck="false"></textarea>
<textarea class="form-control" id="outOfScopeSubdomainTextarea" name="outOfScopeSubdomainTextarea" rows="6" spellcheck="false" placeholder="Enter subdomains or patterns, one per line"></textarea>
</div>
</div>
<h3>URL Scope and Exclusions</h3>
Expand Down
17 changes: 9 additions & 8 deletions web/startScan/templates/startScan/_items/start_scan_wizard.html
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,24 @@ <h4>Import/Ignore Subdomains</h4>
<div class="">
<div class="mb-3">
<h4 class="text-info">Import Subdomains(Optional)</h4>
<span class="">You can import subdomains for <b>{{domain.name}}</b> using your private recon tools.</span>
<p>You can import subdomains for <strong>{{domain.name}}</strong> discovered through your private reconnaissance tools.</p>
<br>
<div class="alert bg-soft-primary border-0 mt-1 mb-1" role="alert">
<span class="">Separate the subdomains using <b>new line</b>. If the subdomain does not belong to <b>{{domain.name}}</b> it will be skipped.</span>
<span>Enter one subdomain per line. Subdomains not belonging to <strong>{{domain.name}}</strong> will be automatically skipped.</span>
</div>
<label for="importSubdomainFormControlTextarea" class="form-label mt-1">Subdomains List</label>
<textarea class="form-control" id="importSubdomainFormControlTextarea" name="importSubdomainTextArea" rows="6" spellcheck="false"></textarea>
</div>
<div class="mb-3">
<h4 class="text-warning">Out of Scope Subdomains(Optional)</h4>
<span class="">You can import subdomains for <b>{{domain.name}}</b> using your private recon tools.</span>
<br>
<div class="alert bg-soft-warning border-0 mt-1 mb-1" role="alert">
<span class="">Separate the out of scope subdomains/keywords using new line.(No regex currently supported.)</span>
<h4 class="text-warning">Out of Scope Subdomains (Optional)</h4>
<p>Specify subdomains of <strong>{{domain.name}}</strong> to exclude from scanning. These subdomains will be omitted from all subsequent scans, including URL discovery and vulnerability assessments.</p>
<p>Enter one subdomain or pattern per line. Both plain text and regex patterns are supported. <span class="badge bg-soft-primary text-primary ms-2">New</span></p>
<div class="mt-2 mb-2">
<li>For plain text: <code>admin.example.com</code></li>
<li>For regex: <code>^.*outofscope.*\.com$</code>, <code>admin.*</code> etc</li>
</div>
<label for="outOfScopeSubdomainTextarea" class="form-label mt-1">Out of Scope Subdomains List</label>
<textarea class="form-control" id="outOfScopeSubdomainTextarea" name="outOfScopeSubdomainTextarea" rows="6" spellcheck="false"></textarea>
<textarea class="form-control" id="outOfScopeSubdomainTextarea" name="outOfScopeSubdomainTextarea" rows="6" spellcheck="false" placeholder="Enter subdomains or patterns, one per line"></textarea>
</div>
</div>

Expand Down

0 comments on commit 2340324

Please sign in to comment.