Clean repo

Former-commit-id: c8a89a773de743eeff106990ab5bdf716576439f
lmBored · Jun 27, 2024 · 9e0a576 · 9e0a576
1 parent 2028dd3
commit 9e0a576
Show file tree

Hide file tree

Showing 10 changed files with 442 additions and 3,626 deletions.
diff --git a/Final_conv_stats.ipynb b/Final_conv_stats.ipynb
@@ -12,11 +12,13 @@
     "import seaborn as sns\n",
     "import numpy as np\n",
     "\n",
+    "from config import config\n",
+    "\n",
     "connection = mysql.connector.connect(\n",
-    "    host='localhost',\n",
-    "    user='root',\n",
-    "    password='',\n",
-    "    database='jbg030'\n",
+    "    host=config.get('HOST'),\n",
+    "    user=config.get('USERNAME'),\n",
+    "    password=config.get('PASSWORD'),\n",
+    "    database=config.get('DATABASE')\n",
     ")\n",
     "\n",
     "cursor = connection.cursor()"

diff --git a/business_idea_stats_from_wh.ipynb b/business_idea_stats_from_wh.ipynb
diff --git a/final_demo copy.ipynb b/final_demo copy.ipynb
@@ -1,12 +1,5 @@
 {
  "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Clear all outputs before pulling and pushing to avoid much conflicts!"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -22,12 +15,13 @@
     "import numpy as np\n",
     "import datetime\n",
     "import plotly.graph_objects as go\n",
+    "from config import config\n",
     "\n",
     "connection = mysql.connector.connect(\n",
-    "    host='localhost',\n",
-    "    user='root',\n",
-    "    password='4ADhj130!',\n",
-    "    database='jbg030'\n",
+    "    host=config.get('HOST'),\n",
+    "    user=config.get('USERNAME'),\n",
+    "    password=config.get('PASSWORD'),\n",
+    "    database=config.get('DATABASE')\n",
     ")\n",
     "\n",
     "cursor = connection.cursor()"
@@ -1249,6 +1243,165 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_ba = df[df['mentioned_airlines'].apply(lambda x: 'AirFrance' in x) | df['user_mentions'].apply(lambda x: '106062176' in x)]\n",
+    "\n",
+    "categories = ['baggage', 'money', 'staff', 'delay_and_cancellation']\n",
+    "sentiments = ['positive', 'neutral', 'negative']\n",
+    "sentiment_colors = sns.color_palette(\"muted\", 3)\n",
+    "\n",
+    "# Calculate sentiment percentages for each category\n",
+    "sentiment_percentages = {category: [] for category in categories}\n",
+    "total_counts = {category: 0 for category in categories}\n",
+    "\n",
+    "for category in categories:\n",
+    "    category_data = df_ba[df_ba[category] == 1]\n",
+    "    total_count = category_data.shape[0]\n",
+    "    total_counts[category] = total_count\n",
+    "    for sentiment in sentiments:\n",
+    "        sentiment_count = category_data[category_data['label'] == sentiment].shape[0]\n",
+    "        sentiment_percentage = (sentiment_count / total_count) * 100 if total_count > 0 else 0\n",
+    "        sentiment_percentages[category].append(sentiment_percentage)\n",
+    "\n",
+    "# Custom color palette\n",
+    "custom_colors = ['#0D2859', '#D83B33', '#A7A9AC']\n",
+    "\n",
+    "# Prepare the DataFrame for plotting\n",
+    "plot_data = pd.DataFrame(sentiment_percentages, index=sentiments).T\n",
+    "\n",
+    "# Plot the horizontal stacked bar chart\n",
+    "fig, ax = plt.subplots(figsize=(20, 10))\n",
+    "bottoms = [0] * len(categories)\n",
+    "\n",
+    "for sentiment, color in zip(sentiments, custom_colors):\n",
+    "    ax.barh(categories, plot_data[sentiment], left=bottoms, label=sentiment.capitalize(), color=color)\n",
+    "    bottoms = [i + j for i, j in zip(bottoms, plot_data[sentiment])]\n",
+    "\n",
+    "    # Annotate percentages on the bars\n",
+    "    for i, (percent, total) in enumerate(zip(plot_data[sentiment], total_counts.values())):\n",
+    "        if total > 0:\n",
+    "            ax.text(bottoms[i] - percent / 2, i, f'{percent:.1f}%', va='center', ha='center', fontsize=12, color='white', weight='bold')\n",
+    "\n",
+    "ax.set_xlabel('Percentage', fontsize=14)\n",
+    "ax.set_title('Sentiment Analysis for Different Categories in AirFrance', fontsize=20, weight='bold')\n",
+    "ax.legend(title='Sentiment', bbox_to_anchor=(1.05, 1), loc='upper left', prop={'size': 15})\n",
+    "ax.set_yticklabels([category.capitalize().replace('_', ' ') for category in categories])\n",
+    "ax.tick_params(axis='both', which='major', labelsize=13)\n",
+    "plt.tight_layout(rect=[0, 0, 0.85, 1])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_ba = df[df['mentioned_airlines'].apply(lambda x: 'KLM' in x) | df['user_mentions'].apply(lambda x: '56377143' in x)]\n",
+    "\n",
+    "categories = ['baggage', 'money', 'staff', 'delay_and_cancellation']\n",
+    "sentiments = ['positive', 'neutral', 'negative']\n",
+    "sentiment_colors = sns.color_palette(\"muted\", 3)\n",
+    "\n",
+    "# Calculate sentiment percentages for each category\n",
+    "sentiment_percentages = {category: [] for category in categories}\n",
+    "total_counts = {category: 0 for category in categories}\n",
+    "\n",
+    "for category in categories:\n",
+    "    category_data = df_ba[df_ba[category] == 1]\n",
+    "    total_count = category_data.shape[0]\n",
+    "    total_counts[category] = total_count\n",
+    "    for sentiment in sentiments:\n",
+    "        sentiment_count = category_data[category_data['label'] == sentiment].shape[0]\n",
+    "        sentiment_percentage = (sentiment_count / total_count) * 100 if total_count > 0 else 0\n",
+    "        sentiment_percentages[category].append(sentiment_percentage)\n",
+    "\n",
+    "# Custom color palette\n",
+    "custom_colors = ['#0D2859', '#D83B33', '#A7A9AC']\n",
+    "\n",
+    "# Prepare the DataFrame for plotting\n",
+    "plot_data = pd.DataFrame(sentiment_percentages, index=sentiments).T\n",
+    "\n",
+    "# Plot the horizontal stacked bar chart\n",
+    "fig, ax = plt.subplots(figsize=(20, 10))\n",
+    "bottoms = [0] * len(categories)\n",
+    "\n",
+    "for sentiment, color in zip(sentiments, custom_colors):\n",
+    "    ax.barh(categories, plot_data[sentiment], left=bottoms, label=sentiment.capitalize(), color=color)\n",
+    "    bottoms = [i + j for i, j in zip(bottoms, plot_data[sentiment])]\n",
+    "\n",
+    "    # Annotate percentages on the bars\n",
+    "    for i, (percent, total) in enumerate(zip(plot_data[sentiment], total_counts.values())):\n",
+    "        if total > 0:\n",
+    "            ax.text(bottoms[i] - percent / 2, i, f'{percent:.1f}%', va='center', ha='center', fontsize=12, color='white', weight='bold')\n",
+    "\n",
+    "ax.set_xlabel('Percentage', fontsize=14)\n",
+    "ax.set_title('Sentiment Analysis for Different Categories in KLM', fontsize=20, weight='bold')\n",
+    "ax.legend(title='Sentiment', bbox_to_anchor=(1.05, 1), loc='upper left', prop={'size': 15})\n",
+    "ax.set_yticklabels([category.capitalize().replace('_', ' ') for category in categories])\n",
+    "ax.tick_params(axis='both', which='major', labelsize=13)\n",
+    "plt.tight_layout(rect=[0, 0, 0.85, 1])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_ba = df[df['mentioned_airlines'].apply(lambda x: 'Lufthansa' in x) | df['user_mentions'].apply(lambda x: '124476322' in x)]\n",
+    "\n",
+    "categories = ['baggage', 'money', 'staff', 'delay_and_cancellation']\n",
+    "sentiments = ['positive', 'neutral', 'negative']\n",
+    "sentiment_colors = sns.color_palette(\"muted\", 3)\n",
+    "\n",
+    "# Calculate sentiment percentages for each category\n",
+    "sentiment_percentages = {category: [] for category in categories}\n",
+    "total_counts = {category: 0 for category in categories}\n",
+    "\n",
+    "for category in categories:\n",
+    "    category_data = df_ba[df_ba[category] == 1]\n",
+    "    total_count = category_data.shape[0]\n",
+    "    total_counts[category] = total_count\n",
+    "    for sentiment in sentiments:\n",
+    "        sentiment_count = category_data[category_data['label'] == sentiment].shape[0]\n",
+    "        sentiment_percentage = (sentiment_count / total_count) * 100 if total_count > 0 else 0\n",
+    "        sentiment_percentages[category].append(sentiment_percentage)\n",
+    "\n",
+    "# Custom color palette\n",
+    "custom_colors = ['#0D2859', '#D83B33', '#A7A9AC']\n",
+    "\n",
+    "# Prepare the DataFrame for plotting\n",
+    "plot_data = pd.DataFrame(sentiment_percentages, index=sentiments).T\n",
+    "\n",
+    "# Plot the horizontal stacked bar chart\n",
+    "fig, ax = plt.subplots(figsize=(20, 10))\n",
+    "bottoms = [0] * len(categories)\n",
+    "\n",
+    "for sentiment, color in zip(sentiments, custom_colors):\n",
+    "    ax.barh(categories, plot_data[sentiment], left=bottoms, label=sentiment.capitalize(), color=color)\n",
+    "    bottoms = [i + j for i, j in zip(bottoms, plot_data[sentiment])]\n",
+    "\n",
+    "    # Annotate percentages on the bars\n",
+    "    for i, (percent, total) in enumerate(zip(plot_data[sentiment], total_counts.values())):\n",
+    "        if total > 0:\n",
+    "            ax.text(bottoms[i] - percent / 2, i, f'{percent:.1f}%', va='center', ha='center', fontsize=12, color='white', weight='bold')\n",
+    "\n",
+    "ax.set_xlabel('Percentage', fontsize=14)\n",
+    "ax.set_title('Sentiment Analysis for Different Categories in Lufthansa', fontsize=20, weight='bold')\n",
+    "ax.legend(title='Sentiment', bbox_to_anchor=(1.05, 1), loc='upper left', prop={'size': 15})\n",
+    "ax.set_yticklabels([category.capitalize().replace('_', ' ') for category in categories])\n",
+    "ax.tick_params(axis='both', which='major', labelsize=13)\n",
+    "plt.tight_layout(rect=[0, 0, 0.85, 1])\n",
+    "plt.show()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,

diff --git a/final_demo.ipynb b/final_demo.ipynb
diff --git a/main.py b/main.py
@@ -15,7 +15,7 @@
     data = [Path("data/"+file) for file in os.listdir('data')]
     data.sort(key=lambda x: x.name)
 
-    connection = mysql.connector.connect(host='localhost', user='root', password=config.get('PASSWORD'),database='jbg030', allow_local_infile=True)  
+    connection = mysql.connector.connect(host=config.get('HOST'), user=config.get('USERNAME'), password=config.get('PASSWORD'),database=config.get('DATABASE'), allow_local_infile=True)  
     # with connect(host=config.get('HOST'), user=config.get('USERNAME'), password=config.get('PASSWORD'),database=config.get('DATABASE'), allow_local_infile=True) as connection:
 
     while True: