Skip to content

Commit

Permalink
filter exclude path 🚀
Browse files Browse the repository at this point in the history
  • Loading branch information
yogeshojha committed Aug 2, 2024
1 parent 79a266e commit fa761a7
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 5 deletions.
47 changes: 46 additions & 1 deletion web/reNgine/common_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -1158,4 +1158,49 @@ def update_or_create_port(port_number, service_name=None, description=None):
)
created = True
finally:
return port, created
return port, created


def exclude_urls_by_patterns(exclude_paths, urls):
"""
Filter out URLs based on a list of exclusion patterns provided from user
Args:
exclude_patterns (list of str): A list of patterns to exclude.
These can be plain path or regex.
urls (list of str): A list of URLs to filter from.
Returns:
list of str: A new list containing URLs that don't match any exclusion pattern.
"""
if not exclude_paths:
# if no exclude paths are passed and is empty list return all urls as it is
return urls

compiled_patterns = []
for path in exclude_paths:
# treat each path as either regex or plain path
try:
raw_pattern = r"{}".format(path)
compiled_patterns.append(re.compile(raw_pattern))
except re.error:
compiled_patterns.append(path)

filtered_urls = []
for url in urls:
exclude = False
for pattern in compiled_patterns:
if isinstance(pattern, re.Pattern):
if pattern.search(url):
exclude = True
break
else:
if pattern in url: #if the word matches anywhere in url exclude
exclude = True
break

# if none conditions matches then add the url to filtered urls
if not exclude:
filtered_urls.append(url)

return filtered_urls
19 changes: 17 additions & 2 deletions web/reNgine/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def initiate_scan(
'domain_id': domain.id,
'results_dir': scan.results_dir,
'starting_point_url': starting_point_url,
'excluded_paths': excluded_paths,
'yaml_configuration': config,
'out_of_scope_subdomains': out_of_scope_subdomains
}
Expand Down Expand Up @@ -227,7 +228,9 @@ def initiate_subscan(
engine_id=None,
scan_type=None,
results_dir=RENGINE_RESULTS,
starting_point_url=''):
starting_point_url='',
excluded_paths=[],
):
"""Initiate a new subscan.
Args:
Expand All @@ -237,6 +240,7 @@ def initiate_subscan(
scan_type (int): Scan type (periodic, live).
results_dir (str): Results directory.
starting_point_url (str): URL path. Default: ''
excluded_paths (list): Excluded paths. Default: [], url paths to exclude from scan.
"""

# Get Subdomain, Domain and ScanHistory
Expand Down Expand Up @@ -294,7 +298,8 @@ def initiate_subscan(
'subdomain_id': subdomain.id,
'yaml_configuration': config,
'results_dir': results_dir,
'starting_point_url': starting_point_url
'starting_point_url': starting_point_url,
'excluded_paths': excluded_paths,
}

# Create initial endpoints in DB: find domain HTTP endpoint so that HTTP
Expand Down Expand Up @@ -1936,6 +1941,10 @@ def fetch_url(self, urls=[], ctx={}, description=None):
if self.starting_point_url:
all_urls = [url for url in all_urls if self.starting_point_url in url]

# if exclude_paths is found, then remove urls matching those paths
if self.excluded_paths:
all_urls = exclude_urls_by_patterns(self.excluded_paths, all_urls)

# Write result to output path
with open(self.output_path, 'w') as f:
f.write('\n'.join(all_urls))
Expand Down Expand Up @@ -2830,8 +2839,14 @@ def http_crawl(
input_path = f'{self.results_dir}/httpx_input.txt'
history_file = f'{self.results_dir}/commands.txt'
if urls: # direct passing URLs to check
print(vars(self).items())
if self.starting_point_url:
urls = [u for u in urls if self.starting_point_url in u]

# exclude urls by pattern
if self.excluded_paths:
urls = exclude_urls_by_patterns(self.excluded_paths, urls)

with open(input_path, 'w') as f:
f.write('\n'.join(urls))
else:
Expand Down
4 changes: 2 additions & 2 deletions web/startScan/templates/startScan/start_scan_ui.html
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ <h4>URL Scope and Exclusions</h4>
<div class="mb-4">
<div class="mb-3">
<h4 class="text-info">Starting Point URL (Optional)</h4>
<input type="email" class="form-control" id="startingPointUrl" placeholder="e.g. /home">
<input type="email" class="form-control" id="startingPointUrl" placeholder="e.g. /home" name="startingPointUrl">
<small class="form-text text-muted">
Defines where the scan should begin. Leave blank to scan from the root (/) and include all subdomains.
</br>
Expand All @@ -80,7 +80,7 @@ <h4 class="text-info">Starting Point URL (Optional)</h4>
</div>
<div class="mb-3">
<h4 class="text-warning">Excluded Paths (Optional)</h4>
<input type="text" id="excludedPaths" value="{{excluded_paths}}">
<input type="text" id="excludedPaths" value="{{excluded_paths}}" name="excludedPaths">
<small class="form-text text-muted">
Enter paths or regex patterns to exclude from the scan. Type a path or pattern and press Enter to add it.
Supports both exact path matching and regex patterns. Examples:<br>
Expand Down

0 comments on commit fa761a7

Please sign in to comment.