add comments to main

lmBored · Jun 4, 2024 · e359e9b · e359e9b
1 parent f660ae9
commit e359e9b
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 21 deletions.
diff --git a/join_dataset.py b/join_dataset.py
@@ -10,4 +10,6 @@
 
 combined = pd.concat([file1, file2, file3, file4, file5, file6, file7])
 
-combined.to_csv('combined_dataset.csv', index=False)
+combined.to_csv('combined_dataset.csv', index=False)
+
+# cat tweets_dataset_khoi.csv <(tail -n +2 tweets_dataset_illija.csv) <(tail -n +2 tweets_dataset_sven.csv) > combined.csv
diff --git a/main.py b/main.py
@@ -69,27 +69,27 @@
             a = loader.csv_adder_tweets(data, output_file='dataset.csv')
             [next(a) for i in range(1)]
 
-        elif choice == 'setup':
+        elif choice == 'setup': # Run this to setup everything from beginning to conversation
             initializer.drop_all(connection)
             loader.tweets_loader_csv(connection)
             loader.users_loader_csv(connection)
             conversation.conversation_adder(connection)
             conversation.normalize(connection)
 
-        elif choice == "setup_conversation":
+        elif choice == "setup_conversation": # Run this to add data to conversation and hasher tables only
             conversation.conversation_clear(connection)
             conversation.conversation_adder(connection)
             conversation.normalize(connection)
 
-        elif choice == 'conversation_and_hasher_to_csv':
+        elif choice == 'conversation_and_hasher_to_csv': # Convert conversation and hasher tables to csv
             conversation.convert_conversations_table_to_csv(connection)
             conversation.convert_hasher_table_to_csv(connection)
 
-        elif choice == 'csvloadconversationshasher':
+        elif choice == 'csvloadconversationshasher': # Load conversation and hasher tables from csv
             conversation.csv_loader_hasher(connection)
             conversation.csv_loader_conversations(connection)
 
-        elif choice == 'category':
+        elif choice == 'categorize': # Categorize tweets into topics
             categorize.drop(connection)
             categorize.categorize(connection)
 

diff --git a/sentiment_evolution.ipynb b/sentiment_evolution.ipynb
@@ -2,23 +2,10 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "from pathlib import Path\n",
-    "import os\n",
-    "\n",
-    "lines = [553, 567]\n",
-    "    \n",
-    "files = []\n",
-    "with open('json_files.txt') as file:\n",
-    "    for i, line in enumerate(file):\n",
-    "        if lines[0] <= i < lines[1]:\n",
-    "            files.append(line.strip())\n",
-    "\n",
-    "data = [Path(\"data/\"+file) for file in os.listdir('data') if file in files]"
-   ]
+   "source": []
   }
  ],
  "metadata": {