Skip to content

Commit

Permalink
Language analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
lmBored committed May 27, 2024
1 parent 971b5f9 commit 1abf3b9
Showing 1 changed file with 29 additions and 0 deletions.
29 changes: 29 additions & 0 deletions remnant/Language_grouping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from langdetect import detect, LangDetectException
import mysql.connector
from config import config

connection = mysql.connector.connect(host=config.get('HOST'), user=config.get('USERNAME'), password=config.get('PASSWORD'),database=config.get('DATABASE'), allow_local_infile=True)

cursor = connection.cursor()
cursor.execute("SELECT * FROM tweets ORDER BY timestamp_ms DESC")
tweets = cursor.fetchall()

langdict = {}
for tweet in tweets:
try:
lang_detected = detect(tweet[1])
except LangDetectException:
lang_detected = "unknown"
continue
if lang_detected != tweet[7].strip():
if lang_detected == 'nl':
print(tweet[1])
print(f"Detected: {lang_detected} | Stored: {tweet[7].strip()}")
print("----------")
if lang_detected in langdict:
if tweet[7].strip() not in langdict[lang_detected]:
langdict[lang_detected].append(tweet[7].strip())
else:
langdict[lang_detected] = list()

print(langdict)

0 comments on commit 1abf3b9

Please sign in to comment.