Skip to content

Commit

Permalink
Added modules folder
Browse files Browse the repository at this point in the history
  • Loading branch information
KingAkeem committed Feb 18, 2018
1 parent ad5f686 commit 00d4744
Show file tree
Hide file tree
Showing 11 changed files with 536 additions and 0 deletions.
14 changes: 14 additions & 0 deletions modules/bcolors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
class Bcolors:

def __init__(self):
self.HEADER = '\033[95m'
self.OKBLUE = '\033[94m'
self.OKGREEN = '\033[92m'
self.WARNING = '\033[93m'
self.FAIL = '\033[91m'
self.ENDC = '\033[0m'
self.BOLD = '\033[1m'
self.UNDERLINE = '\033[4m'
self.WHITE = '\033[97m'
self.On_Black = '\033[40m'
self.On_Red = '\033[41m'
41 changes: 41 additions & 0 deletions modules/getemails.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from modules.bcolors import Bcolors
from bs4 import BeautifulSoup


def getMails(soup):

"""
Searches for <a href> tags for links then checks if link contains the
substring 'mailto' indicating that it's an email. If it is determined
to be an email then the link is split and the username is appeneded to
the list
Args:
soup: BeautifulSoup isntance that will be used for parsing
Returns:
emails: list of email IDs
"""
b_colors = Bcolors()

if isinstance(type(soup), type(BeautifulSoup)):

emails = []
links = soup.find_all('a')
for ref in links:
url = ref.get('href')
if url and 'mailto' in url:
"""Split email address on"""
email_addr = url.split(':')
if (len(email_addr) > 1):
emails.append(email_addr[1])

"""Pretty print output as below"""
print ('')
print (b_colors.OKGREEN+'Mails Found - '+b_colors.ENDC+str(len(emails)))
print ('-------------------------------')

return emails

else:
raise('Method parameter is not of instance BeautifulSoup')
112 changes: 112 additions & 0 deletions modules/getweblinks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import re
import requests
import tldextract

from bs4 import BeautifulSoup
from modules.bcolors import Bcolors
from requests.exceptions import ConnectionError, HTTPError


def valid_url(url, extensions=False):
"""Checks for any valid url using regular expression matching
Matches all possible url patterns with the url that is passed and
returns True if it is a url and returns False if it is not.
Args:
url: string representing url to be checked
Returns:
bool: True if valid url format and False if not
"""
pattern = r"^https?:\/\/(www\.)?([a-z,A-Z,0-9]*)\.([a-z, A-Z]+)(.*)"
regex = re.compile(pattern)
if not extensions:
if regex.match(url):
return True
return False

parts = tldextract.extract(url)
valid_sites = list()
for ext in extensions:
if regex.match(url) and '.'+parts.suffix in ext:
valid_sites.append(url)
return valid_sites


def valid_onion_url(url):
"""Checks for valid onion url using regular expression matching
Only matches onion urls
Args:
url: string representing url to be checked
Returns:
bool: True if valid onion url format, False if not
"""
pattern = r"^https?:\/\/(www\.)?([a-z,A-Z,0-9]*)\.onion/(.*)"
regex = re.compile(pattern)
if regex.match(url):
return True
return False


def get_link_status(link, colors):
"""Generator that yields links as they come
Uses head request because it uses less bandwith than get and timeout is
set to 10 seconds and then link is automatically declared as dead.
Args:
link: link to be tested
colors: object containing colors for link
Yields:
string: link with either no color or red which indicates failure
"""

try:
resp = requests.head(link, timeout=10)
resp.raise_for_status()
yield '\t'+link
except (ConnectionError, HTTPError):
yield '\t'+colors.On_Red+link+colors.ENDC


def getLinks(soup, ext=False, live=False):
"""
Searches through all <a ref> (hyperlinks) tags and stores them in a
list then validates if the url is formatted correctly.
Args:
soup: BeautifulSoup instance currently being used.
Returns:
websites: List of websites that were found
"""
b_colors = Bcolors()
if isinstance(soup, BeautifulSoup):
websites = []

links = soup.find_all('a')
for ref in links:
url = ref.get('href')
if ext:
if url and valid_url(url, ext):
websites.append(url)
else:
if url and valid_onion_url(url):
websites.append(url)

"""Pretty print output as below"""
print(''.join((b_colors.OKGREEN,
'Websites Found - ', b_colors.ENDC, str(len(websites)))))
print('------------------------------------')

for link in websites:
print(next(get_link_status(link, b_colors)))
return websites

else:
raise(Exception('Method parameter is not of instance BeautifulSoup'))
81 changes: 81 additions & 0 deletions modules/go_modules/getLinks.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package main

import "C"

import (
"fmt"
"golang.org/x/net/html"
"log"
"net/http"
"net/url"
"regexp"
"sync"
"time"
)

func validOnionUrl(url string) bool {
pattern := `^https?:\/\/(www\.)?([a-z,A-Z,0-9]*)\.onion/(.*)`
re := regexp.MustCompile(pattern)
return re.Match([]byte(url))
}

func asyncHead(client *http.Client, link string) {
defer wg.Done()
resp, err := client.Get(link)
if err == nil && resp.StatusCode < 400 {
fmt.Printf("%v is reachable.\n", link)
} else {
fmt.Printf("%v is not reachable.\n", link)
}
}

var wg sync.WaitGroup

//export GetLinks
func GetLinks(searchUrl string, addr string, port string, timeout int) {
var torProxy string = "socks5://" + addr + ":" + port
torProxyUrl, err := url.Parse(torProxy)
if err != nil {
log.Fatal("Error parsing URL: ", err)
}
torTransport := &http.Transport{Proxy: http.ProxyURL(torProxyUrl)}
client := &http.Client{Transport: torTransport, Timeout: time.Second * time.Duration(timeout)}
resp, err := client.Get(searchUrl)
if err != nil {
log.Fatal("Error with GET request", err)
}
defer resp.Body.Close()
bytes := resp.Body
tokenizer := html.NewTokenizer(bytes)
found_urls := make([]string, 0)
for not_end := true; not_end; {
currentTokenType := tokenizer.Next()
switch {
case currentTokenType == html.ErrorToken:
not_end = false
case currentTokenType == html.StartTagToken:
token := tokenizer.Token()
if token.Data == "a" {
attributes := token.Attr
for i := 0; i < len(attributes); i++ {
if attributes[i].Key == "href" && validOnionUrl(attributes[i].Val) {
found_urls = append(found_urls, attributes[i].Val)
}
}
}
}
}
fmt.Printf("Number of URLs found: %v\n", len(found_urls))
for _, link := range found_urls {
_, err := url.ParseRequestURI(link)
if err != nil {
continue
}
wg.Add(1)
go asyncHead(client, link)
}
wg.Wait()
}

func main() {
}
18 changes: 18 additions & 0 deletions modules/go_modules/getLinks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from ctypes import cdll, c_char_p, c_longlong, c_int, Structure
import sys

goBot = cdll.LoadLibrary("./goBot.so")


class GoString(Structure):
_fields_ = [("p", c_char_p), ("n", c_longlong)]


goBot.GetLinks.argtypes = [GoString, GoString, GoString, c_int]
url = sys.argv[1].encode('utf-8')
addr = b"127.0.0.1"
port = b"9050"
goBot.GetLinks(GoString(url, len(url)),
GoString(addr, len(addr)),
GoString(port, len(port)),
15)
60 changes: 60 additions & 0 deletions modules/go_modules/goBot.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/* Created by "go tool cgo" - DO NOT EDIT. */

/* package command-line-arguments */

/* Start of preamble from import "C" comments. */




/* End of preamble from import "C" comments. */


/* Start of boilerplate cgo prologue. */
#line 1 "cgo-gcc-export-header-prolog"

#ifndef GO_CGO_PROLOGUE_H
#define GO_CGO_PROLOGUE_H

typedef signed char GoInt8;
typedef unsigned char GoUint8;
typedef short GoInt16;
typedef unsigned short GoUint16;
typedef int GoInt32;
typedef unsigned int GoUint32;
typedef long long GoInt64;
typedef unsigned long long GoUint64;
typedef GoInt64 GoInt;
typedef GoUint64 GoUint;
typedef __SIZE_TYPE__ GoUintptr;
typedef float GoFloat32;
typedef double GoFloat64;
typedef float _Complex GoComplex64;
typedef double _Complex GoComplex128;

/*
static assertion to make sure the file is being used on architecture
at least with matching size of GoInt.
*/
typedef char _check_for_64_bit_pointer_matching_GoInt[sizeof(void*)==64/8 ? 1:-1];

typedef struct { const char *p; GoInt n; } GoString;
typedef void *GoMap;
typedef void *GoChan;
typedef struct { void *t; void *v; } GoInterface;
typedef struct { void *data; GoInt len; GoInt cap; } GoSlice;

#endif

/* End of boilerplate cgo prologue. */

#ifdef __cplusplus
extern "C" {
#endif


extern void GetLinks(GoString p0, GoString p1, GoString p2, GoInt p3);

#ifdef __cplusplus
}
#endif
Binary file added modules/go_modules/goBot.so
Binary file not shown.
Loading

0 comments on commit 00d4744

Please sign in to comment.