Merge pull request #290 from DedSecInside/improving_release_process

KingAkeem · web-flow · commit 11f9fa5abf6f · 2023-08-29T14:27:03.000-04:00
Improving release process
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,9 +1,10 @@
 [tool.poetry]
 name = "torbot"
-version = "3.0.1"
+version = "3.1.1"
 description = "OSINT for the dark web"
 authors = ["Akeem King <akeemtlking@gmail.com>"]
 license = "GNU GPL"
+include = [".env"]
 
 [tool.poetry.dependencies]
 python = ">=3.9,<=3.11.4" 
@@ -37,6 +38,7 @@ validators = "0.20.0"
 yattag = "1.14.0"
 treelib = "^1.6.1"
 numpy = "1.24.4"
+unipath = "^1.1"
 
 [tool.poetry.dev-dependencies]
 
diff --git a/requirements.txt b/requirements.txt
@@ -7,7 +7,7 @@ idna==3.3 ; python_version >= "3.9" and python_full_version <= "3.11.4"
 igraph==0.10.6 ; python_version >= "3.9" and python_full_version <= "3.11.4"
 joblib==1.2.0 ; python_version >= "3.9" and python_full_version <= "3.11.4"
 macholib==1.16 ; python_version >= "3.9" and python_full_version <= "3.11.4"
-numpy==1.25.2 ; python_version >= "3.9" and python_full_version <= "3.11.4"
+numpy==1.24.4 ; python_version >= "3.9" and python_full_version <= "3.11.4"
 pefile==2023.2.7 ; python_version >= "3.9" and python_full_version <= "3.11.4" and sys_platform == "win32"
 progress==1.6 ; python_version >= "3.9" and python_full_version <= "3.11.4"
 pyinstaller-hooks-contrib==2022.7 ; python_version >= "3.9" and python_full_version <= "3.11.4"
@@ -19,15 +19,16 @@ requests-mock==1.9.3 ; python_version >= "3.9" and python_full_version <= "3.11.
 requests==2.31.0 ; python_version >= "3.9" and python_full_version <= "3.11.4"
 scikit-learn==1.3.0 ; python_version >= "3.9" and python_full_version <= "3.11.4"
 scipy==1.10.0 ; python_version >= "3.9" and python_full_version <= "3.11.4"
-setuptools==68.0.0 ; python_version >= "3.9" and python_full_version <= "3.11.4"
+setuptools==68.1.2 ; python_version >= "3.9" and python_full_version <= "3.11.4"
 six==1.16.0 ; python_version >= "3.9" and python_full_version <= "3.11.4"
 sklearn==0.0 ; python_version >= "3.9" and python_full_version <= "3.11.4"
 soupsieve==2.3.2.post1 ; python_version >= "3.9" and python_full_version <= "3.11.4"
 termcolor==1.1.0 ; python_version >= "3.9" and python_full_version <= "3.11.4"
 texttable==1.6.4 ; python_version >= "3.9" and python_full_version <= "3.11.4"
 threadpoolctl==3.1.0 ; python_version >= "3.9" and python_full_version <= "3.11.4"
 threadsafe==1.0.0 ; python_version >= "3.9" and python_full_version <= "3.11.4"
-treelib==1.6.4 ; python_version >= "3.9" and python_full_version <= "3.11.4"
+treelib==1.7.0 ; python_version >= "3.9" and python_full_version <= "3.11.4"
+unipath==1.1 ; python_version >= "3.9" and python_full_version <= "3.11.4"
 urllib3==1.26.9 ; python_version >= "3.9" and python_full_version <= "3.11.4"
 validators==0.20.0 ; python_version >= "3.9" and python_full_version <= "3.11.4"
 yattag==1.14.0 ; python_version >= "3.9" and python_full_version <= "3.11.4"
diff --git a/scripts/build.sh b/scripts/build.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+echo "Building distributable..." 
+poetry build
+echo
+
+echo "Publishing..."
+poetry publish
diff --git a/scripts/install.sh b/scripts/install.sh
@@ -18,4 +18,4 @@ cd gotor/cmd/main
 go install gotor.go
 echo "Gotor installed. Run with 'gotor'."
 
-cd ../../..
+cd ../../..
diff --git a/scripts/remove.sh b/scripts/remove.sh
@@ -0,0 +1,8 @@
+#! /bin/bash
+
+echo "Removing torbot as library..."
+pip uninstall torbot
+echo
+
+echo "Removing torbot as module..."
+python -m pip uninstall torbot
diff --git a/scripts/update-requirements.sh b/scripts/update-requirements.sh
@@ -0,0 +1,5 @@
+#! /bin/bash
+
+echo "Writing requirements.txt..."
+poetry export --without-hashes --format=requirements.txt > requirements.txt
+echo "requirements.txt updated..."
diff --git a/torbot/modules/collect_data.py b/torbot/modules/collect_data.py
@@ -3,33 +3,30 @@
 """
 import datetime
 import uuid
-
+import os
 import requests
+
 from bs4 import BeautifulSoup
 from progress.bar import Bar
 from threadsafe.safe_csv import SafeDictWriter
 
-from .utils import join_local_path
+from .config import get_data_directory
 from .validators import validate_link
 from .log import debug
 
 
-def parse_links(html: str):
-    """Parses HTML page to extract links.
-
-    Returns:
-        (list): List of all valid links found.
+def parse_links(html: str) -> list[str]:
+    """
+    Finds all anchor tags and parses the href attribute.
     """
     soup = BeautifulSoup(html, 'html.parser')
     tags = soup.find_all('a')
     return [tag['href'] for tag in tags if validate_link(tag['href'])]
 
 
-def parse_meta_tags(soup: BeautifulSoup):
-    """Retrieve all meta elements from HTML object.
-
-    Returns:
-        list: List containing content from meta tags
+def parse_meta_tags(soup: BeautifulSoup) -> list[object]:
+    """
+    Parses all meta tags.
     """
     meta_tags = soup.find_all('meta')
     content_list = list()
@@ -38,23 +35,23 @@ def parse_meta_tags(soup: BeautifulSoup):
     return content_list
 
 
-def get_links(url: str):
+def get_links(url: str) -> list[str]:
+    """
+    Returns all valid links found on the URL.
+    """
     resp = requests.get(url)
     links = parse_links(resp.text)
     return links
 
 
-default_url = 'https://thehiddenwiki.org'
-
-
-def collect_data(user_url: str):
-    url = user_url if user_url is not None else default_url
+def collect_data(url: str = 'https://thehiddenwiki.org'):
     print(f"Gathering data for {url}")
     links = get_links(url)
     current_time = datetime.datetime.now().isoformat()
     file_name = f'torbot_{current_time}.csv'
-    file_path = join_local_path(file_name)
-    with open(file_path, 'w+') as outcsv:
+    data_directory = get_data_directory()
+    local_file_path = os.path.join(data_directory, file_name)
+    with open(local_file_path, 'w+') as outcsv:
         fieldnames = ['ID', 'Title', 'Metadata', 'Content']
         writer = SafeDictWriter(outcsv, fieldnames=fieldnames)
         bar = Bar('Processing...', max=len(links))
@@ -71,8 +68,9 @@ def collect_data(user_url: str):
                 }
                 writer.writerow(entry)
             except requests.exceptions.RequestException as e:
+                print(f"Failed to connect to [{link}].")
                 debug(e)
-                debug(f"Failed to connect to [{link}].")
             bar.next()
     bar.finish()
-    print(f'Data has been saved to {file_path}.')
+
+    print(f'Data has been saved to {local_file_path}.')
diff --git a/torbot/modules/config.py b/torbot/modules/config.py
@@ -2,26 +2,42 @@
 import logging
 
 from dotenv import load_dotenv
+from inspect import getsourcefile
+from unipath import Path
+
+
+config_file_path = (os.path.abspath(getsourcefile(lambda: 0)))
+modules_directory = Path(config_file_path).parent
+torbot_directory = modules_directory.parent
+project_root_directory = torbot_directory.parent
+dotenv_path = os.path.join(project_root_directory, '.env')
+load_dotenv(dotenv_path=dotenv_path, verbose=True)
 
-load_dotenv()
 port = os.getenv("PORT")
 host = os.getenv("HOST")
-data_directory = os.getenv('TORBOT_DATA_DIR')
 
-log_level_str = os.getenv("LOG_LEVEL").lower() if os.getenv("LOG_LEVEL") else "info"
-LOG_LEVELS = {
-    "info": logging.INFO,
-    "error": logging.ERROR,
-    "debug": logging.DEBUG,
-    "warning": logging.WARNING,
-}
 
+def get_log_level() -> int:
+    log_level_str = os.getenv('LOG_LEVEL')
+    if log_level_str:
+        log_level_str = log_level_str.lower()
+        mapping = logging.getLevelNamesMapping()
+        if log_level_str in mapping:
+            return mapping[log_level_str]
+    return logging.INFO
+
+
+def get_data_directory():
+    data_directory = os.getenv('TORBOT_DATA_DIR')
+    # if a path is not set, write data to the config directory
+    if not data_directory:
+        data_directory = project_root_directory
 
-def get_log_level():
-    for str_input, log_level in LOG_LEVELS.items():
-        if log_level_str == str_input:
-            return log_level
+    if data_directory.strip() == "":
+        data_directory = project_root_directory
 
+    # create directory if it doesn't exist
+    if not os.path.exists(data_directory):
+        os.mkdir(data_directory)
 
-if not os.path.exists(data_directory):
-    os.mkdir(data_directory)
+    return data_directory
diff --git a/torbot/modules/linktree.py b/torbot/modules/linktree.py
@@ -1,10 +1,12 @@
 """
 Module is used for analyzing link relationships
 """
+import os
+
 from treelib import Tree, exceptions
 
 from .api import get_node
-from .utils import join_local_path
+from .config import get_data_directory
 from .log import debug
 
 
@@ -67,15 +69,17 @@ def save(self, file_name: str):
         Saves LinkTree to file with given file_name
         Current file types supported are .txt
         """
-        debug(f"saving link tree as {file_name}")
-        file_path = join_local_path(file_name)
+        print(f"saving link tree as {file_name}")
+        data_directory = get_data_directory()
+        file_path = os.path.join(data_directory, file_name)
         try:
             self._tree.save2file(file_path)
         except Exception as e:
-            debug(f"failed to save link tree to {file_path}")
+            print(f"failed to save link tree to {file_path}")
+            debug(e)
             raise e
 
-        debug(f"file saved successfully to {file_path}")
+        print(f"file saved successfully to {file_path}")
 
     def show(self):
         """
diff --git a/torbot/modules/savefile.py b/torbot/modules/savefile.py
@@ -3,8 +3,9 @@
 """
 import json
 import time
+import os
 
-from .utils import join_local_path
+from .config import get_data_directory
 
 
 def saveJson(datatype: str, data: list):
@@ -20,7 +21,9 @@ def saveJson(datatype: str, data: list):
     """
     timestr = time.strftime("%Y%m%d-%H%M%S")
     file_name = "TorBot-Export-" + datatype + timestr + ".json"
-    file_path = join_local_path(file_name)
+    data_directory = get_data_directory()
+    file_path = os.path.join(data_directory, file_name)
+
     # Json File Creation
     with open(file_path, 'w+') as f:
         # Store data in Json format
diff --git a/torbot/modules/utils.py b/torbot/modules/utils.py