Skip to content

Commit

Permalink
Clean repo
Browse files Browse the repository at this point in the history
Former-commit-id: c8a89a773de743eeff106990ab5bdf716576439f
  • Loading branch information
lmBored committed Jun 27, 2024
1 parent 2028dd3 commit 9e0a576
Show file tree
Hide file tree
Showing 10 changed files with 442 additions and 3,626 deletions.
10 changes: 6 additions & 4 deletions Final_conv_stats.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
"import seaborn as sns\n",
"import numpy as np\n",
"\n",
"from config import config\n",
"\n",
"connection = mysql.connector.connect(\n",
" host='localhost',\n",
" user='root',\n",
" password='',\n",
" database='jbg030'\n",
" host=config.get('HOST'),\n",
" user=config.get('USERNAME'),\n",
" password=config.get('PASSWORD'),\n",
" database=config.get('DATABASE')\n",
")\n",
"\n",
"cursor = connection.cursor()"
Expand Down
548 changes: 59 additions & 489 deletions business_idea_stats_from_wh.ipynb

Large diffs are not rendered by default.

175 changes: 164 additions & 11 deletions final_demo copy.ipynb
Original file line number Diff line number Diff line change
@@ -1,12 +1,5 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Clear all outputs before pulling and pushing to avoid much conflicts!"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -22,12 +15,13 @@
"import numpy as np\n",
"import datetime\n",
"import plotly.graph_objects as go\n",
"from config import config\n",
"\n",
"connection = mysql.connector.connect(\n",
" host='localhost',\n",
" user='root',\n",
" password='4ADhj130!',\n",
" database='jbg030'\n",
" host=config.get('HOST'),\n",
" user=config.get('USERNAME'),\n",
" password=config.get('PASSWORD'),\n",
" database=config.get('DATABASE')\n",
")\n",
"\n",
"cursor = connection.cursor()"
Expand Down Expand Up @@ -1249,6 +1243,165 @@
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_ba = df[df['mentioned_airlines'].apply(lambda x: 'AirFrance' in x) | df['user_mentions'].apply(lambda x: '106062176' in x)]\n",
"\n",
"categories = ['baggage', 'money', 'staff', 'delay_and_cancellation']\n",
"sentiments = ['positive', 'neutral', 'negative']\n",
"sentiment_colors = sns.color_palette(\"muted\", 3)\n",
"\n",
"# Calculate sentiment percentages for each category\n",
"sentiment_percentages = {category: [] for category in categories}\n",
"total_counts = {category: 0 for category in categories}\n",
"\n",
"for category in categories:\n",
" category_data = df_ba[df_ba[category] == 1]\n",
" total_count = category_data.shape[0]\n",
" total_counts[category] = total_count\n",
" for sentiment in sentiments:\n",
" sentiment_count = category_data[category_data['label'] == sentiment].shape[0]\n",
" sentiment_percentage = (sentiment_count / total_count) * 100 if total_count > 0 else 0\n",
" sentiment_percentages[category].append(sentiment_percentage)\n",
"\n",
"# Custom color palette\n",
"custom_colors = ['#0D2859', '#D83B33', '#A7A9AC']\n",
"\n",
"# Prepare the DataFrame for plotting\n",
"plot_data = pd.DataFrame(sentiment_percentages, index=sentiments).T\n",
"\n",
"# Plot the horizontal stacked bar chart\n",
"fig, ax = plt.subplots(figsize=(20, 10))\n",
"bottoms = [0] * len(categories)\n",
"\n",
"for sentiment, color in zip(sentiments, custom_colors):\n",
" ax.barh(categories, plot_data[sentiment], left=bottoms, label=sentiment.capitalize(), color=color)\n",
" bottoms = [i + j for i, j in zip(bottoms, plot_data[sentiment])]\n",
"\n",
" # Annotate percentages on the bars\n",
" for i, (percent, total) in enumerate(zip(plot_data[sentiment], total_counts.values())):\n",
" if total > 0:\n",
" ax.text(bottoms[i] - percent / 2, i, f'{percent:.1f}%', va='center', ha='center', fontsize=12, color='white', weight='bold')\n",
"\n",
"ax.set_xlabel('Percentage', fontsize=14)\n",
"ax.set_title('Sentiment Analysis for Different Categories in AirFrance', fontsize=20, weight='bold')\n",
"ax.legend(title='Sentiment', bbox_to_anchor=(1.05, 1), loc='upper left', prop={'size': 15})\n",
"ax.set_yticklabels([category.capitalize().replace('_', ' ') for category in categories])\n",
"ax.tick_params(axis='both', which='major', labelsize=13)\n",
"plt.tight_layout(rect=[0, 0, 0.85, 1])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_ba = df[df['mentioned_airlines'].apply(lambda x: 'KLM' in x) | df['user_mentions'].apply(lambda x: '56377143' in x)]\n",
"\n",
"categories = ['baggage', 'money', 'staff', 'delay_and_cancellation']\n",
"sentiments = ['positive', 'neutral', 'negative']\n",
"sentiment_colors = sns.color_palette(\"muted\", 3)\n",
"\n",
"# Calculate sentiment percentages for each category\n",
"sentiment_percentages = {category: [] for category in categories}\n",
"total_counts = {category: 0 for category in categories}\n",
"\n",
"for category in categories:\n",
" category_data = df_ba[df_ba[category] == 1]\n",
" total_count = category_data.shape[0]\n",
" total_counts[category] = total_count\n",
" for sentiment in sentiments:\n",
" sentiment_count = category_data[category_data['label'] == sentiment].shape[0]\n",
" sentiment_percentage = (sentiment_count / total_count) * 100 if total_count > 0 else 0\n",
" sentiment_percentages[category].append(sentiment_percentage)\n",
"\n",
"# Custom color palette\n",
"custom_colors = ['#0D2859', '#D83B33', '#A7A9AC']\n",
"\n",
"# Prepare the DataFrame for plotting\n",
"plot_data = pd.DataFrame(sentiment_percentages, index=sentiments).T\n",
"\n",
"# Plot the horizontal stacked bar chart\n",
"fig, ax = plt.subplots(figsize=(20, 10))\n",
"bottoms = [0] * len(categories)\n",
"\n",
"for sentiment, color in zip(sentiments, custom_colors):\n",
" ax.barh(categories, plot_data[sentiment], left=bottoms, label=sentiment.capitalize(), color=color)\n",
" bottoms = [i + j for i, j in zip(bottoms, plot_data[sentiment])]\n",
"\n",
" # Annotate percentages on the bars\n",
" for i, (percent, total) in enumerate(zip(plot_data[sentiment], total_counts.values())):\n",
" if total > 0:\n",
" ax.text(bottoms[i] - percent / 2, i, f'{percent:.1f}%', va='center', ha='center', fontsize=12, color='white', weight='bold')\n",
"\n",
"ax.set_xlabel('Percentage', fontsize=14)\n",
"ax.set_title('Sentiment Analysis for Different Categories in KLM', fontsize=20, weight='bold')\n",
"ax.legend(title='Sentiment', bbox_to_anchor=(1.05, 1), loc='upper left', prop={'size': 15})\n",
"ax.set_yticklabels([category.capitalize().replace('_', ' ') for category in categories])\n",
"ax.tick_params(axis='both', which='major', labelsize=13)\n",
"plt.tight_layout(rect=[0, 0, 0.85, 1])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_ba = df[df['mentioned_airlines'].apply(lambda x: 'Lufthansa' in x) | df['user_mentions'].apply(lambda x: '124476322' in x)]\n",
"\n",
"categories = ['baggage', 'money', 'staff', 'delay_and_cancellation']\n",
"sentiments = ['positive', 'neutral', 'negative']\n",
"sentiment_colors = sns.color_palette(\"muted\", 3)\n",
"\n",
"# Calculate sentiment percentages for each category\n",
"sentiment_percentages = {category: [] for category in categories}\n",
"total_counts = {category: 0 for category in categories}\n",
"\n",
"for category in categories:\n",
" category_data = df_ba[df_ba[category] == 1]\n",
" total_count = category_data.shape[0]\n",
" total_counts[category] = total_count\n",
" for sentiment in sentiments:\n",
" sentiment_count = category_data[category_data['label'] == sentiment].shape[0]\n",
" sentiment_percentage = (sentiment_count / total_count) * 100 if total_count > 0 else 0\n",
" sentiment_percentages[category].append(sentiment_percentage)\n",
"\n",
"# Custom color palette\n",
"custom_colors = ['#0D2859', '#D83B33', '#A7A9AC']\n",
"\n",
"# Prepare the DataFrame for plotting\n",
"plot_data = pd.DataFrame(sentiment_percentages, index=sentiments).T\n",
"\n",
"# Plot the horizontal stacked bar chart\n",
"fig, ax = plt.subplots(figsize=(20, 10))\n",
"bottoms = [0] * len(categories)\n",
"\n",
"for sentiment, color in zip(sentiments, custom_colors):\n",
" ax.barh(categories, plot_data[sentiment], left=bottoms, label=sentiment.capitalize(), color=color)\n",
" bottoms = [i + j for i, j in zip(bottoms, plot_data[sentiment])]\n",
"\n",
" # Annotate percentages on the bars\n",
" for i, (percent, total) in enumerate(zip(plot_data[sentiment], total_counts.values())):\n",
" if total > 0:\n",
" ax.text(bottoms[i] - percent / 2, i, f'{percent:.1f}%', va='center', ha='center', fontsize=12, color='white', weight='bold')\n",
"\n",
"ax.set_xlabel('Percentage', fontsize=14)\n",
"ax.set_title('Sentiment Analysis for Different Categories in Lufthansa', fontsize=20, weight='bold')\n",
"ax.legend(title='Sentiment', bbox_to_anchor=(1.05, 1), loc='upper left', prop={'size': 15})\n",
"ax.set_yticklabels([category.capitalize().replace('_', ' ') for category in categories])\n",
"ax.tick_params(axis='both', which='major', labelsize=13)\n",
"plt.tight_layout(rect=[0, 0, 0.85, 1])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
2,190 changes: 117 additions & 2,073 deletions final_demo.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
data = [Path("data/"+file) for file in os.listdir('data')]
data.sort(key=lambda x: x.name)

connection = mysql.connector.connect(host='localhost', user='root', password=config.get('PASSWORD'),database='jbg030', allow_local_infile=True)
connection = mysql.connector.connect(host=config.get('HOST'), user=config.get('USERNAME'), password=config.get('PASSWORD'),database=config.get('DATABASE'), allow_local_infile=True)
# with connect(host=config.get('HOST'), user=config.get('USERNAME'), password=config.get('PASSWORD'),database=config.get('DATABASE'), allow_local_infile=True) as connection:

while True:
Expand Down
Loading

0 comments on commit 9e0a576

Please sign in to comment.