diff --git a/join_dataset.py b/join_dataset.py index 90e8e1a..1972ff7 100644 --- a/join_dataset.py +++ b/join_dataset.py @@ -10,4 +10,6 @@ combined = pd.concat([file1, file2, file3, file4, file5, file6, file7]) -combined.to_csv('combined_dataset.csv', index=False) \ No newline at end of file +combined.to_csv('combined_dataset.csv', index=False) + +# cat tweets_dataset_khoi.csv <(tail -n +2 tweets_dataset_illija.csv) <(tail -n +2 tweets_dataset_sven.csv) > combined.csv \ No newline at end of file diff --git a/main.py b/main.py index ade1553..d015097 100644 --- a/main.py +++ b/main.py @@ -69,27 +69,27 @@ a = loader.csv_adder_tweets(data, output_file='dataset.csv') [next(a) for i in range(1)] - elif choice == 'setup': + elif choice == 'setup': # Run this to setup everything from beginning to conversation initializer.drop_all(connection) loader.tweets_loader_csv(connection) loader.users_loader_csv(connection) conversation.conversation_adder(connection) conversation.normalize(connection) - elif choice == "setup_conversation": + elif choice == "setup_conversation": # Run this to add data to conversation and hasher tables only conversation.conversation_clear(connection) conversation.conversation_adder(connection) conversation.normalize(connection) - elif choice == 'conversation_and_hasher_to_csv': + elif choice == 'conversation_and_hasher_to_csv': # Convert conversation and hasher tables to csv conversation.convert_conversations_table_to_csv(connection) conversation.convert_hasher_table_to_csv(connection) - elif choice == 'csvloadconversationshasher': + elif choice == 'csvloadconversationshasher': # Load conversation and hasher tables from csv conversation.csv_loader_hasher(connection) conversation.csv_loader_conversations(connection) - elif choice == 'category': + elif choice == 'categorize': # Categorize tweets into topics categorize.drop(connection) categorize.categorize(connection) diff --git a/sentiment_evolution.ipynb b/sentiment_evolution.ipynb index ddc0f03..7a7836a 100644 --- a/sentiment_evolution.ipynb +++ b/sentiment_evolution.ipynb @@ -2,23 +2,10 @@ "cells": [ { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "from pathlib import Path\n", - "import os\n", - "\n", - "lines = [553, 567]\n", - " \n", - "files = []\n", - "with open('json_files.txt') as file:\n", - " for i, line in enumerate(file):\n", - " if lines[0] <= i < lines[1]:\n", - " files.append(line.strip())\n", - "\n", - "data = [Path(\"data/\"+file) for file in os.listdir('data') if file in files]" - ] + "source": [] } ], "metadata": {