-
-
Notifications
You must be signed in to change notification settings - Fork 546
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
536 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
class Bcolors: | ||
|
||
def __init__(self): | ||
self.HEADER = '\033[95m' | ||
self.OKBLUE = '\033[94m' | ||
self.OKGREEN = '\033[92m' | ||
self.WARNING = '\033[93m' | ||
self.FAIL = '\033[91m' | ||
self.ENDC = '\033[0m' | ||
self.BOLD = '\033[1m' | ||
self.UNDERLINE = '\033[4m' | ||
self.WHITE = '\033[97m' | ||
self.On_Black = '\033[40m' | ||
self.On_Red = '\033[41m' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
from modules.bcolors import Bcolors | ||
from bs4 import BeautifulSoup | ||
|
||
|
||
def getMails(soup): | ||
|
||
""" | ||
Searches for <a href> tags for links then checks if link contains the | ||
substring 'mailto' indicating that it's an email. If it is determined | ||
to be an email then the link is split and the username is appeneded to | ||
the list | ||
Args: | ||
soup: BeautifulSoup isntance that will be used for parsing | ||
Returns: | ||
emails: list of email IDs | ||
""" | ||
b_colors = Bcolors() | ||
|
||
if isinstance(type(soup), type(BeautifulSoup)): | ||
|
||
emails = [] | ||
links = soup.find_all('a') | ||
for ref in links: | ||
url = ref.get('href') | ||
if url and 'mailto' in url: | ||
"""Split email address on""" | ||
email_addr = url.split(':') | ||
if (len(email_addr) > 1): | ||
emails.append(email_addr[1]) | ||
|
||
"""Pretty print output as below""" | ||
print ('') | ||
print (b_colors.OKGREEN+'Mails Found - '+b_colors.ENDC+str(len(emails))) | ||
print ('-------------------------------') | ||
|
||
return emails | ||
|
||
else: | ||
raise('Method parameter is not of instance BeautifulSoup') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
import re | ||
import requests | ||
import tldextract | ||
|
||
from bs4 import BeautifulSoup | ||
from modules.bcolors import Bcolors | ||
from requests.exceptions import ConnectionError, HTTPError | ||
|
||
|
||
def valid_url(url, extensions=False): | ||
"""Checks for any valid url using regular expression matching | ||
Matches all possible url patterns with the url that is passed and | ||
returns True if it is a url and returns False if it is not. | ||
Args: | ||
url: string representing url to be checked | ||
Returns: | ||
bool: True if valid url format and False if not | ||
""" | ||
pattern = r"^https?:\/\/(www\.)?([a-z,A-Z,0-9]*)\.([a-z, A-Z]+)(.*)" | ||
regex = re.compile(pattern) | ||
if not extensions: | ||
if regex.match(url): | ||
return True | ||
return False | ||
|
||
parts = tldextract.extract(url) | ||
valid_sites = list() | ||
for ext in extensions: | ||
if regex.match(url) and '.'+parts.suffix in ext: | ||
valid_sites.append(url) | ||
return valid_sites | ||
|
||
|
||
def valid_onion_url(url): | ||
"""Checks for valid onion url using regular expression matching | ||
Only matches onion urls | ||
Args: | ||
url: string representing url to be checked | ||
Returns: | ||
bool: True if valid onion url format, False if not | ||
""" | ||
pattern = r"^https?:\/\/(www\.)?([a-z,A-Z,0-9]*)\.onion/(.*)" | ||
regex = re.compile(pattern) | ||
if regex.match(url): | ||
return True | ||
return False | ||
|
||
|
||
def get_link_status(link, colors): | ||
"""Generator that yields links as they come | ||
Uses head request because it uses less bandwith than get and timeout is | ||
set to 10 seconds and then link is automatically declared as dead. | ||
Args: | ||
link: link to be tested | ||
colors: object containing colors for link | ||
Yields: | ||
string: link with either no color or red which indicates failure | ||
""" | ||
|
||
try: | ||
resp = requests.head(link, timeout=10) | ||
resp.raise_for_status() | ||
yield '\t'+link | ||
except (ConnectionError, HTTPError): | ||
yield '\t'+colors.On_Red+link+colors.ENDC | ||
|
||
|
||
def getLinks(soup, ext=False, live=False): | ||
""" | ||
Searches through all <a ref> (hyperlinks) tags and stores them in a | ||
list then validates if the url is formatted correctly. | ||
Args: | ||
soup: BeautifulSoup instance currently being used. | ||
Returns: | ||
websites: List of websites that were found | ||
""" | ||
b_colors = Bcolors() | ||
if isinstance(soup, BeautifulSoup): | ||
websites = [] | ||
|
||
links = soup.find_all('a') | ||
for ref in links: | ||
url = ref.get('href') | ||
if ext: | ||
if url and valid_url(url, ext): | ||
websites.append(url) | ||
else: | ||
if url and valid_onion_url(url): | ||
websites.append(url) | ||
|
||
"""Pretty print output as below""" | ||
print(''.join((b_colors.OKGREEN, | ||
'Websites Found - ', b_colors.ENDC, str(len(websites))))) | ||
print('------------------------------------') | ||
|
||
for link in websites: | ||
print(next(get_link_status(link, b_colors))) | ||
return websites | ||
|
||
else: | ||
raise(Exception('Method parameter is not of instance BeautifulSoup')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
package main | ||
|
||
import "C" | ||
|
||
import ( | ||
"fmt" | ||
"golang.org/x/net/html" | ||
"log" | ||
"net/http" | ||
"net/url" | ||
"regexp" | ||
"sync" | ||
"time" | ||
) | ||
|
||
func validOnionUrl(url string) bool { | ||
pattern := `^https?:\/\/(www\.)?([a-z,A-Z,0-9]*)\.onion/(.*)` | ||
re := regexp.MustCompile(pattern) | ||
return re.Match([]byte(url)) | ||
} | ||
|
||
func asyncHead(client *http.Client, link string) { | ||
defer wg.Done() | ||
resp, err := client.Get(link) | ||
if err == nil && resp.StatusCode < 400 { | ||
fmt.Printf("%v is reachable.\n", link) | ||
} else { | ||
fmt.Printf("%v is not reachable.\n", link) | ||
} | ||
} | ||
|
||
var wg sync.WaitGroup | ||
|
||
//export GetLinks | ||
func GetLinks(searchUrl string, addr string, port string, timeout int) { | ||
var torProxy string = "socks5://" + addr + ":" + port | ||
torProxyUrl, err := url.Parse(torProxy) | ||
if err != nil { | ||
log.Fatal("Error parsing URL: ", err) | ||
} | ||
torTransport := &http.Transport{Proxy: http.ProxyURL(torProxyUrl)} | ||
client := &http.Client{Transport: torTransport, Timeout: time.Second * time.Duration(timeout)} | ||
resp, err := client.Get(searchUrl) | ||
if err != nil { | ||
log.Fatal("Error with GET request", err) | ||
} | ||
defer resp.Body.Close() | ||
bytes := resp.Body | ||
tokenizer := html.NewTokenizer(bytes) | ||
found_urls := make([]string, 0) | ||
for not_end := true; not_end; { | ||
currentTokenType := tokenizer.Next() | ||
switch { | ||
case currentTokenType == html.ErrorToken: | ||
not_end = false | ||
case currentTokenType == html.StartTagToken: | ||
token := tokenizer.Token() | ||
if token.Data == "a" { | ||
attributes := token.Attr | ||
for i := 0; i < len(attributes); i++ { | ||
if attributes[i].Key == "href" && validOnionUrl(attributes[i].Val) { | ||
found_urls = append(found_urls, attributes[i].Val) | ||
} | ||
} | ||
} | ||
} | ||
} | ||
fmt.Printf("Number of URLs found: %v\n", len(found_urls)) | ||
for _, link := range found_urls { | ||
_, err := url.ParseRequestURI(link) | ||
if err != nil { | ||
continue | ||
} | ||
wg.Add(1) | ||
go asyncHead(client, link) | ||
} | ||
wg.Wait() | ||
} | ||
|
||
func main() { | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
from ctypes import cdll, c_char_p, c_longlong, c_int, Structure | ||
import sys | ||
|
||
goBot = cdll.LoadLibrary("./goBot.so") | ||
|
||
|
||
class GoString(Structure): | ||
_fields_ = [("p", c_char_p), ("n", c_longlong)] | ||
|
||
|
||
goBot.GetLinks.argtypes = [GoString, GoString, GoString, c_int] | ||
url = sys.argv[1].encode('utf-8') | ||
addr = b"127.0.0.1" | ||
port = b"9050" | ||
goBot.GetLinks(GoString(url, len(url)), | ||
GoString(addr, len(addr)), | ||
GoString(port, len(port)), | ||
15) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
/* Created by "go tool cgo" - DO NOT EDIT. */ | ||
|
||
/* package command-line-arguments */ | ||
|
||
/* Start of preamble from import "C" comments. */ | ||
|
||
|
||
|
||
|
||
/* End of preamble from import "C" comments. */ | ||
|
||
|
||
/* Start of boilerplate cgo prologue. */ | ||
#line 1 "cgo-gcc-export-header-prolog" | ||
|
||
#ifndef GO_CGO_PROLOGUE_H | ||
#define GO_CGO_PROLOGUE_H | ||
|
||
typedef signed char GoInt8; | ||
typedef unsigned char GoUint8; | ||
typedef short GoInt16; | ||
typedef unsigned short GoUint16; | ||
typedef int GoInt32; | ||
typedef unsigned int GoUint32; | ||
typedef long long GoInt64; | ||
typedef unsigned long long GoUint64; | ||
typedef GoInt64 GoInt; | ||
typedef GoUint64 GoUint; | ||
typedef __SIZE_TYPE__ GoUintptr; | ||
typedef float GoFloat32; | ||
typedef double GoFloat64; | ||
typedef float _Complex GoComplex64; | ||
typedef double _Complex GoComplex128; | ||
|
||
/* | ||
static assertion to make sure the file is being used on architecture | ||
at least with matching size of GoInt. | ||
*/ | ||
typedef char _check_for_64_bit_pointer_matching_GoInt[sizeof(void*)==64/8 ? 1:-1]; | ||
|
||
typedef struct { const char *p; GoInt n; } GoString; | ||
typedef void *GoMap; | ||
typedef void *GoChan; | ||
typedef struct { void *t; void *v; } GoInterface; | ||
typedef struct { void *data; GoInt len; GoInt cap; } GoSlice; | ||
|
||
#endif | ||
|
||
/* End of boilerplate cgo prologue. */ | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
|
||
extern void GetLinks(GoString p0, GoString p1, GoString p2, GoInt p3); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif |
Binary file not shown.
Oops, something went wrong.