Skip to content

Commit

Permalink
remove backslash, update file splitting based on os and sentiment ana…
Browse files Browse the repository at this point in the history
…lyssi
  • Loading branch information
lmBored committed Jun 5, 2024
1 parent 6f510b1 commit e1c4397
Show file tree
Hide file tree
Showing 9 changed files with 21 additions and 37 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ tweets_dataset_gpu.csv
tweets_dataset_all.csv
json_files.txt
combined_dataset.csv
sentiment_evolution.ipynb
9 changes: 7 additions & 2 deletions file_splitting/split_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
data = [Path("data/"+file) for file in os.listdir('data')]
data.sort(key=lambda x: x.name)

with open('json_files.txt', 'w') as f:
f.write('\n'.join(str(file).replace('data/', '') for file in data))
if os.name == 'nt': # Windows
with open('json_files.txt', 'w') as f:
f.write('\n'.join(str(file).replace('data\\', '') for file in data))
else: # Other operating systems
with open('json_files.txt', 'w') as f:
f.write('\n'.join(str(file).replace('data/', '') for file in data))

print("Files written to json_files.txt.")
2 changes: 2 additions & 0 deletions preprocess/old_tweet_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ def text_transformer(text):
# text = re.sub(r'[^A-Za-z ]', '', text) # remove special characters
text = re.sub(r'\n', '', text)
text = re.sub(r'[,.!?]', '', text)
text = re.sub(r"\'", "", text)
text = re.sub(r'\\', '', text)
text = text.strip()
text = text.lower()
return text
Expand Down
2 changes: 2 additions & 0 deletions preprocess/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ def text_transformer(text):
# text = re.sub(r'[^A-Za-z ]', '', text) # remove special characters
text = re.sub(r'\n', '', text)
text = re.sub(r'[,.!?]', '', text)
text = re.sub(r"\'", "", text)
text = re.sub(r'\\', '', text)
text = text.strip()
text = text.lower()
return text
Expand Down
2 changes: 2 additions & 0 deletions preprocess/test_loading_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ def text_transformer(text):
# text = re.sub(r'[^A-Za-z ]', '', text) # remove special characters
text = re.sub(r'\n', '', text)
text = re.sub(r'[,.!?]', '', text)
text = re.sub(r"\'", "", text)
text = re.sub(r'\\', '', text)
text = text.strip()
text = text.lower()
return text
Expand Down
2 changes: 2 additions & 0 deletions preprocess/tweets_gpu_testing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,8 @@
" # text = re.sub(r'[^A-Za-z ]', '', text) # remove special characters\n",
" text = re.sub(r'\\n', '', text)\n",
" text = re.sub(r'[,.!?]', '', text)\n",
" text = re.sub(r\"\\'\", \"\", text)\n",
" text = re.sub(r'\\\\', '', text)\n",
" text = text.strip()\n",
" text = text.lower()\n",
" return text"
Expand Down
2 changes: 2 additions & 0 deletions preprocess/ultimate_tweet_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ def text_transformer(text):
# text = re.sub(r'[^A-Za-z ]', '', text) # remove special characters
text = re.sub(r'\n', '', text)
text = re.sub(r'[,.!?]', '', text)
text = re.sub(r"\'", "", text)
text = re.sub(r'\\', '', text)
text = text.strip()
text = text.lower()
return text
Expand Down
6 changes: 3 additions & 3 deletions sentiment/sentiment_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -48,7 +48,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 5,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -84,7 +84,7 @@
},
{
"cell_type": "code",
"execution_count": 58,
"execution_count": 6,
"metadata": {},
"outputs": [
{
Expand Down
32 changes: 0 additions & 32 deletions sentiment_evolution.ipynb

This file was deleted.

0 comments on commit e1c4397

Please sign in to comment.