Skip to content

Commit

Permalink
Merge pull request #46 from KingAkeem/dev
Browse files Browse the repository at this point in the history
Beautiful and comprehensible code base. (Documenting and Refactoring)
  • Loading branch information
PSNAppz authored Jan 11, 2018
2 parents 0889945 + 9701706 commit 6870714
Show file tree
Hide file tree
Showing 9 changed files with 218 additions and 158 deletions.
10 changes: 2 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,8 @@ Before you run the torBot make sure the following things are done properly:
* Run tor service
`sudo service tor start`

* Set a password for tor
`tor --hash-password "my_password" `

* Give the password inside torbot.py
`from stem.control import Controller
with Controller.from_port(port = 9051) as controller:
controller.authenticate("your_password_hash")
controller.signal(Signal.NEWNYM)`
* Make sure that your torrc is configured to SOCKS_PORT localhost:9050 which should be the
should default setting

`python3 torBot.py`
<pre>
Expand Down
50 changes: 27 additions & 23 deletions modules/getemails.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,40 @@
from modules.bcolors import Bcolors
from bs4 import BeautifulSoup
from modules.savefile import saveJson


"""Get all emails from the website"""
def getMails(soup):

"""
Searches for <a href> tags for links then checks if link ccontains the
substring 'mailto' indicating that it's an email. If it is determined
to be an email then the link is split and the username is appeneded to
the list
def getMails(soup, save=0):
Args:
soup: BeautifulSoup isntance that will be used for parsing
Returns:
emails: list of email IDs
"""
b_colors = Bcolors()
_soup_instance = BeautifulSoup
if isinstance(type(soup), type(_soup_instance)):

if isinstance(type(soup), type(BeautifulSoup)):

emails = []
for link in soup.find_all('a'):
email_link = link.get('href')
if email_link is not None:
if 'mailto' in email_link:
"""Split email address on"""
email_addr = email_link.split(':')
emails.append(email_addr[1])
else:
pass
links = soup.find_all('a')
for ref in links:
url = ref.get('href')
if url and 'mailto' in url:
"""Split email address on"""
email_addr = url.split(':')
emails.append(email_addr[1])

"""Pretty print output as below"""
print ('')
print (b_colors.OKGREEN+'Mails Found - '+b_colors.ENDC+str(len(emails)))
print ('-------------------------------')
for mail in emails:
print (mail)
if save:
saveJson("Extracted-Mail-IDs", emails)
return ''

return emails

else:
msg = ''.join((b_colors.FAIL,
'Method parameter is not of instance BeautifulSoup',
b_colors.ENDC))
raise(msg)
raise('Method parameter is not of instance BeautifulSoup')
81 changes: 57 additions & 24 deletions modules/getweblinks.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,75 @@
import urllib.request
import re
from modules.bcolors import Bcolors
from bs4 import BeautifulSoup


def valid_onion_url(link):

def link_status(web, out_queue, index):
b_colors = Bcolors()
out_queue[index] = web + " is_live = False "
try:
urllib.request.urlopen(web)
out_queue[index] = web + " is_live = True "
print(web)
except urllib.error.HTTPError:
print(b_colors.On_Red+web+b_colors.ENDC)
"""
Validates onion urls using regex
Args:
link: the url to be checked
Returns:
bool: True/False based on link
"""

pattern = r"^https?\b(://+)(.+)(.+)\bonion/(.*)"
re_obj = re.compile(pattern)
if re_obj.fullmatch(link):
return True

return False


def valid_url(link):

"""
Validates general urls using regex
Takes in string which is a link and returns decides validitity of url
using regex
Args:
link: the url to be checked
Returns:
bool: True/False based on link
"""

pattern = r"^https?\b(://+)(.+)(.+)\b...(.*)"
re_obj = re.compile(pattern)
if re_obj.fullmatch(link):
return True

return False


def getLinks(soup):

"""
Searches through all <a ref> (hyperlinks) tags and stores them in a
list then validates if the url is formatted correctly.
def getLinks(soup, ext, live=0, save=0):
Args:
soup: BeautifulSoup instance currently being used.
"""Get all onion links from the website"""
Returns:
websites: List of websites that were found
"""

b_colors = Bcolors()
extensions = []
if ext:
for e in ext:
extensions.append(e)

if isinstance(type(soup), type(BeautifulSoup)):
websites = []

for link in soup.find_all('a'):
web_link = link.get('href')
if web_link and ('http' in web_link or 'https' in web_link):
links = soup.find_all('a')
for ref in links:
url = ref.get('href')
if url and (valid_onion_url(url) or valid_url(url)):
websites.append(url)

for exten in extensions:
if web_link.endswith(exten):
websites.append(web_link)
else:
websites.append(web_link)
"""Pretty print output as below"""
print(''.join((b_colors.OKGREEN,
'Websites Found - ', b_colors.ENDC, str(len(websites)))))
Expand Down
20 changes: 13 additions & 7 deletions modules/savefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,21 @@


def saveJson(datatype, data):
"""
Creates json file and stores json
Args:
datatype: the type of the object being passed
data = data that is being stored with object
"""

"function_docstring"
timestr = time.strftime("%Y%m%d-%H%M%S")
# Json File Creation
file = open("TorBoT-Export-"+datatype+timestr+".json", "a")
# Store data in Json format
output = {datatype: data}
# Dump output to file
json.dump(output, file, indent=2)
file.close()
with open("TorBoT-Export-"+datatype+timestr+".json", "x") as file:
# Store data in Json format
output = {datatype: data}
# Dump output to file
json.dump(output, file, indent=2)

print("\nData will be saved with a File Name :",
"TorBoT-Export-"+datatype+timestr+".json")
7 changes: 7 additions & 0 deletions modules/updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@


def updateTor():

"""
Currently updates Tor by calling terminal commands using subprocess
Not a great method and will be replaced in the future.
"""

print("Checking for latest stable release")
isGit = subprocess.Popen(
["git", "branch"],
Expand Down
12 changes: 4 additions & 8 deletions tests/test_getemails.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))

from modules import pagereader, getemails
from io import StringIO
from modules.bcolors import Bcolors


Expand All @@ -19,15 +18,12 @@
class getMailsTestCase(unittest.TestCase):

def setUp(self):
self.held, sys.stdout = sys.stdout, StringIO()
self.b_colors = Bcolors()

def test_print_emails(self):
data = ''.join(("\n", self.b_colors.OKGREEN, "Mails Found - ",
self.b_colors.ENDC, "1\n------------------------",
"-------\n[email protected]\n"))
getemails.getMails(soup)
self.assertEqual(sys.stdout.getvalue(), data)
def test_getemails(self):
test_emails = ["[email protected]"]
emails = getemails.getMails(soup)
self.assertEqual(emails, test_emails)


if __name__ == '__main__':
Expand Down
12 changes: 5 additions & 7 deletions tests/test_getweblinks.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,14 @@ def setUp(self):
self.held, sys.stdout = sys.stdout, StringIO()
self.maxDiff = None

def test_print_links(self):
def test_get_links(self):

data = ['http://aff.ironsocket.com/SH7L',
'http://aff.ironsocket.com/SH7L',
'http://wsrs.net/',
'http://cmsgear.com/',
'http://cmsgear.com/']
'http://aff.ironsocket.com/SH7L',
'http://wsrs.net/',
'http://cmsgear.com/']

ext = ['.com/']
result = getweblinks.getLinks(soup, ext)
result = getweblinks.getLinks(soup)
self.assertEqual(result, data)


Expand Down
4 changes: 1 addition & 3 deletions tests/test_savetofile.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,8 @@ def test_save_links(self):
data = ['http://aff.ironsocket.com/SH7L',
'http://aff.ironsocket.com/SH7L',
'http://wsrs.net/',
'http://cmsgear.com/',
'http://cmsgear.com/']
ext = ['.com/']
result = getweblinks.getLinks(soup, ext, 0, 1)
result = getweblinks.getLinks(soup)
self.assertEqual(result, data)


Expand Down
Loading

0 comments on commit 6870714

Please sign in to comment.