Skip to content

Commit a22ab5a

Browse files
committed
updating to latest modifications
1 parent e8b2b6f commit a22ab5a

38 files changed

+438
-49
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -109,5 +109,6 @@ data/glove.twitter.27B.100d.txt
109109
.DS_Store
110110
_debug.py
111111
_batch_job.py
112+
_jupyter_debug.py
112113
figures
113114
jobs

.vscode/settings.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
{
2-
"python.pythonPath": "/Users/seanbugeja/.virtualenvs/msc/bin/python",
32
"files.exclude": {
43
"**/.git": true,
54
"**/.svn": true,
@@ -154,5 +153,6 @@
154153
]
155154
}
156155
},
157-
"kite.showWelcomeNotificationOnStartup": false
156+
"kite.showWelcomeNotificationOnStartup": false,
157+
"python.languageServer": "Microsoft"
158158
}

external/Nakov2016/parser_clean.py

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import re
2+
from csv import DictReader
3+
4+
5+
def nakov_parser(path):
6+
sentences = []
7+
targets = []
8+
labels = []
9+
with open(path, "r") as file:
10+
reader = DictReader(
11+
file,
12+
dialect="excel-tab",
13+
fieldnames=["tweet_id", "target", "sentiment", "sentence"],
14+
)
15+
for row in reader:
16+
sentence, target = row["sentence"], row["target"]
17+
if len(re.findall(r"\b{}\b"format(target), sentence, re.IGNORECASE)) > 1:
18+
continue
19+
sentences.append(sentence)
20+
targets.append(target)
21+
labels.append(
22+
{"2": 1, "1": 1, "0": 0, "-1": -1, "-2": -1}.get(
23+
row["sentiment"]
24+
)
25+
)
26+
return sentences, targets, labels

gcp/_config.json

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"jobId": "tdlstm_cc42_laptops_balanced_missing_run02",
2+
"jobId": "tdlstm_cc840_nakexptrnbal_oovt3b10_run06",
33
"labels": {},
44
"trainingInput": {
55
"scaleTier": "CUSTOM",
@@ -9,12 +9,13 @@
99
"region": "europe-west1",
1010
"args": [
1111
"batch",
12-
"missing2.batch.txt",
12+
"oovt3b10.trnbalanced.batch.txt",
1313
"--new",
1414
"--nocolor",
1515
"--defaults",
1616
"-m=td_lstm",
17-
"-em=commoncrawl-42[corpus]"
17+
"-em=commoncrawl-840[corpus]",
18+
"-ds=nakov-clean[33/34/33]"
1819
]
1920
}
2021
}

generate_boxplots.py

+55-26
Original file line numberDiff line numberDiff line change
@@ -92,14 +92,19 @@
9292
"ian": {
9393
# ! Scores from original paper
9494
"Dehong Ma et al. 2017 (Original)": {
95-
"laptops": {"Micro-F1": 74.49, "Macro-F1": 71.35},
96-
"restaurants": {"Micro-F1": 80.23, "Macro-F1": 70.8},
97-
},
98-
# ! Scores from LCR-ROT
99-
"Zheng et al. 2018": {
10095
"laptops": {"Micro-F1": 72.1},
10196
"restaurants": {"Micro-F1": 78.6},
10297
},
98+
# # ! Scores from LCR-ROT
99+
# "Zheng et al. 2018": {
100+
# "laptops": {"Micro-F1": 72.1},
101+
# "restaurants": {"Micro-F1": 78.6},
102+
# },
103+
# ! Scores from https://arxiv.org/abs/2005.06607
104+
"Navonil et al. 2020": {
105+
"laptops": {"Macro-F1": 64.86},
106+
"restaurants": {"Macro-F1": 66.41},
107+
},
103108
},
104109
"ram": {
105110
# ! Scores from original paper
@@ -209,11 +214,18 @@
209214
"ram": "RAM",
210215
}
211216

217+
# EMBEDDINGS = {
218+
# "cc42": "GloVe CommonCrawl 42b (300d)",
219+
# "cc840": "GloVe CommonCrawl 840b (300d)",
220+
# "t200": "GloVe Twitter (200d)",
221+
# "t100": "GloVe Twitter (100d)",
222+
# }
223+
212224
EMBEDDINGS = {
213-
"cc42": "GloVe CommonCrawl 42b (300d)",
214-
"cc840": "GloVe CommonCrawl 840b (300d)",
215-
"t200": "GloVe Twitter (200d)",
216-
"t100": "GloVe Twitter (100d)",
225+
"cc840": "300d 840b CommonCrawl GloVe ",
226+
"cc42": "300d 42b CommonCrawl GloVe",
227+
"t200": "200d Twitter GloVe",
228+
"t100": "100d Twitter GloVe",
217229
}
218230

219231
METRIC_COLS = {
@@ -290,12 +302,28 @@ def get_comet_api(api_key=None, **kwargs):
290302
return api
291303

292304

293-
def get_metric_series(experiment, metric_cmt_key):
294-
return {
295-
v["epoch"]: float(v["metricValue"])
296-
for v in experiment.get_metrics()
297-
if v["metricName"] == metric_cmt_key
298-
}
305+
# DEPRECATED, need to use metrics_for_chart now
306+
# def get_metric_series(experiment, metric_cmt_key):
307+
# return {
308+
# v["epoch"]: float(v["metricValue"])
309+
# for v in experiment.get_metrics()
310+
# if v["metricName"] == metric_cmt_key
311+
# }
312+
313+
314+
def get_metric_series(experiment, metric_cmt_key, api):
315+
series_data_full = api.get_metrics_for_chart(
316+
experiment_keys=[experiment.id], metrics=[metric_cmt_key]
317+
)
318+
metric_series_data = [
319+
{
320+
ep: float(val)
321+
for (ep, val) in zip(metrics["epochs"], metrics["values"])
322+
}
323+
for metrics in series_data_full[experiment.id]["metrics"]
324+
if metrics["metricName"] == metric_cmt_key
325+
]
326+
return metric_series_data[0]
299327

300328

301329
def get_grouped_metric_series(project, metrics, workspace=None, **kwargs):
@@ -319,7 +347,7 @@ def get_grouped_metric_series(project, metrics, workspace=None, **kwargs):
319347
"experiments": [e for e in experiments if e.name == name],
320348
**{
321349
metric_cmt_key: [
322-
get_metric_series(e, metric_cmt_key)
350+
get_metric_series(e, metric_cmt_key, api)
323351
for e in experiments
324352
if e.name == name
325353
]
@@ -480,14 +508,14 @@ def comet_to_df(workspace, models=None, metrics=None, **kwargs):
480508
"fasttext-wiki-news-subwords-300": "FastText (300d)",
481509
"glove-twitter-25": "GloVe Twitter (25d)",
482510
"glove-twitter-50": "GloVe Twitter (50d)",
483-
"glove-twitter-100": "GloVe Twitter (100d)",
484-
"glove-twitter-200": "GloVe Twitter (200d)",
511+
"glove-twitter-100": EMBEDDINGS["t100"],
512+
"glove-twitter-200": EMBEDDINGS["t200"],
485513
"glove-wiki-gigaword-50": "GloVe Wiki (50d)",
486514
"glove-wiki-gigaword-100": "GloVe Wiki (100d)",
487515
"glove-wiki-gigaword-200": "GloVe Wiki (200d)",
488516
"glove-wiki-gigaword-300": "GloVe Wiki (300d)",
489-
"glove-cc42-300": "GloVe CommonCrawl 42b (300d)",
490-
"glove-cc840-300": "GloVe CommonCrawl 840b (300d)",
517+
"glove-cc42-300": EMBEDDINGS["cc42"],
518+
"glove-cc840-300": EMBEDDINGS["cc840"],
491519
"word2vec-google-news-300": "Word2Vec Google News (300d)",
492520
"word2vec-ruscorpora-300": "Word2Vec Rus Corpora (300d)",
493521
}.get(embedding_info["name"])
@@ -497,12 +525,13 @@ def comet_to_df(workspace, models=None, metrics=None, **kwargs):
497525
exp_name_str = exp_name_str.replace(ds_name, "")
498526
exp_name_str = exp_name_str.replace("balanced", "")
499527
exp_name_str = exp_name_str.replace(
500-
{
501-
"GloVe CommonCrawl 42b (300d)": "cc42",
502-
"GloVe CommonCrawl 840b (300d)": "cc840",
503-
"GloVe Twitter (100d)": "t100",
504-
"GloVe Twitter (200d)": "t200",
505-
}.get(embedding_str),
528+
{v: k for k, v in EMBEDDINGS.items()}.get(embedding_str),
529+
# {
530+
# "GloVe CommonCrawl 42b (300d)": "cc42",
531+
# "GloVe CommonCrawl 840b (300d)": "cc840",
532+
# "GloVe Twitter (100d)": "t100",
533+
# "GloVe Twitter (200d)": "t200",
534+
# }.get(embedding_str),
506535
"",
507536
)
508537
exp_name_str = exp_name_str.replace("-", " ")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
### Comet Config
2+
default: -cmt=T1NrVnn32dXWeOxeQWGArkHwc -wrk=reproduction-new
3+
### Turn off metadata for performance
4+
default: -aux metadata=False
5+
### OOV Policy
6+
default: -mp oov_train=1 oov_buckets=1 oov_fn=uniform[-0.1,0.1]
7+
8+
### Run with default parameters
9+
-contd=..default
10+
11+
### Missing TDLSTM Runs
12+
#-contd=tdlstm-09momentum-cc42-laptops-01lr-oovt1b1 -mp optimizer=momentum momentum=0.9 learning_rate=0.1
13+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
### Comet Config
2+
default: -cmt=T1NrVnn32dXWeOxeQWGArkHwc -wrk=oov-policies
3+
### Turn off metadata for performance
4+
default: -aux metadata=False
5+
### OOV Policy
6+
default: -mp oov_train=0 oov_buckets=1 oov_fn=uniform[-0.1,0.1]
7+
8+
### Run with default parameters
9+
-contd=..nakovexp_trnbal
10+
-contd=..nakovexp_trnbal -mp hidden_units=300
11+
-contd=..nakovexp_trnbal -mp learning_rate=0.1
12+
-contd=..nakovexp_trnbal -mp hidden_units=300 learning_rate=0.1
13+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
### Comet Config
2+
default: -cmt=T1NrVnn32dXWeOxeQWGArkHwc -wrk=oov-policies
3+
### Turn off metadata for performance
4+
default: -aux metadata=False
5+
### OOV Policy
6+
default: -mp oov_train=0 oov_buckets=10 oov_fn=uniform[-0.1,0.1]
7+
8+
### Run with default parameters
9+
-contd=..nakovexp_trnbal
10+
-contd=..nakovexp_trnbal -mp hidden_units=300
11+
-contd=..nakovexp_trnbal -mp learning_rate=0.1
12+
-contd=..nakovexp_trnbal -mp hidden_units=300 learning_rate=0.1
13+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
### Comet Config
2+
default: -cmt=T1NrVnn32dXWeOxeQWGArkHwc -wrk=oov-policies
3+
### Turn off metadata for performance
4+
default: -aux metadata=False
5+
### OOV Policy
6+
default: -mp oov_train=0 oov_buckets=100 oov_fn=uniform[-0.1,0.1]
7+
8+
### Run with default parameters
9+
-contd=..nakovexp_trnbal
10+
-contd=..nakovexp_trnbal -mp hidden_units=300
11+
-contd=..nakovexp_trnbal -mp learning_rate=0.1
12+
-contd=..nakovexp_trnbal -mp hidden_units=300 learning_rate=0.1
13+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
### Comet Config
2+
default: -cmt=T1NrVnn32dXWeOxeQWGArkHwc -wrk=oov-policies
3+
### Turn off metadata for performance
4+
default: -aux metadata=False
5+
### OOV Policy
6+
default: -mp oov_train=0 oov_buckets=1000 oov_fn=uniform[-0.1,0.1]
7+
8+
### Run with default parameters
9+
-contd=..nakovexp_trnbal
10+
-contd=..nakovexp_trnbal -mp hidden_units=300
11+
-contd=..nakovexp_trnbal -mp learning_rate=0.1
12+
-contd=..nakovexp_trnbal -mp hidden_units=300 learning_rate=0.1
13+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
### Comet Config
2+
default: -cmt=T1NrVnn32dXWeOxeQWGArkHwc -wrk=oov-policies
3+
### Turn off metadata for performance
4+
default: -aux metadata=False
5+
### OOV Policy
6+
default: -mp oov_train=1 oov_buckets=10 oov_fn=uniform[-0.1,0.1]
7+
8+
### Run with default parameters
9+
-contd=..nakovexp_trnbal
10+
-contd=..nakovexp_trnbal -mp hidden_units=300
11+
-contd=..nakovexp_trnbal -mp learning_rate=0.1
12+
-contd=..nakovexp_trnbal -mp hidden_units=300 learning_rate=0.1
13+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
### Comet Config
2+
default: -cmt=T1NrVnn32dXWeOxeQWGArkHwc -wrk=oov-policies
3+
### Turn off metadata for performance
4+
default: -aux metadata=False
5+
### OOV Policy
6+
default: -mp oov_train=1 oov_buckets=100 oov_fn=uniform[-0.1,0.1]
7+
8+
### Run with default parameters
9+
-contd=..nakovexp_trnbal
10+
-contd=..nakovexp_trnbal -mp hidden_units=300
11+
-contd=..nakovexp_trnbal -mp learning_rate=0.1
12+
-contd=..nakovexp_trnbal -mp hidden_units=300 learning_rate=0.1
13+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
### Comet Config
2+
default: -cmt=T1NrVnn32dXWeOxeQWGArkHwc -wrk=oov-policies
3+
### Turn off metadata for performance
4+
default: -aux metadata=False
5+
### OOV Policy
6+
default: -mp oov_train=1 oov_buckets=1000 oov_fn=uniform[-0.1,0.1]
7+
8+
### Run with default parameters
9+
-contd=..nakovexp_trnbal
10+
-contd=..nakovexp_trnbal -mp hidden_units=300
11+
-contd=..nakovexp_trnbal -mp learning_rate=0.1
12+
-contd=..nakovexp_trnbal -mp hidden_units=300 learning_rate=0.1
13+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
### Comet Config
2+
default: -cmt=T1NrVnn32dXWeOxeQWGArkHwc -wrk=oov-policies
3+
### Turn off metadata for performance
4+
default: -aux metadata=False
5+
### OOV Policy
6+
default: -mp oov_train=2 oov_buckets=1 oov_fn=uniform[-0.1,0.1]
7+
8+
### Run with default parameters
9+
-contd=..nakovexp_trnbal
10+
-contd=..nakovexp_trnbal -mp hidden_units=300
11+
-contd=..nakovexp_trnbal -mp learning_rate=0.1
12+
-contd=..nakovexp_trnbal -mp hidden_units=300 learning_rate=0.1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
### Comet Config
2+
default: -cmt=T1NrVnn32dXWeOxeQWGArkHwc -wrk=oov-policies
3+
### Turn off metadata for performance
4+
default: -aux metadata=False
5+
### OOV Policy
6+
default: -mp oov_train=2 oov_buckets=10 oov_fn=uniform[-0.1,0.1]
7+
8+
### Run with default parameters
9+
-contd=..nakovexp_trnbal
10+
-contd=..nakovexp_trnbal -mp hidden_units=300
11+
-contd=..nakovexp_trnbal -mp learning_rate=0.1
12+
-contd=..nakovexp_trnbal -mp hidden_units=300 learning_rate=0.1
13+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
### Comet Config
2+
default: -cmt=T1NrVnn32dXWeOxeQWGArkHwc -wrk=oov-policies
3+
### Turn off metadata for performance
4+
default: -aux metadata=False
5+
### OOV Policy
6+
default: -mp oov_train=2 oov_buckets=100 oov_fn=uniform[-0.1,0.1]
7+
8+
### Run with default parameters
9+
-contd=..nakovexp_trnbal
10+
-contd=..nakovexp_trnbal -mp hidden_units=300
11+
-contd=..nakovexp_trnbal -mp learning_rate=0.1
12+
-contd=..nakovexp_trnbal -mp hidden_units=300 learning_rate=0.1
13+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
### Comet Config
2+
default: -cmt=T1NrVnn32dXWeOxeQWGArkHwc -wrk=oov-policies
3+
### Turn off metadata for performance
4+
default: -aux metadata=False
5+
### OOV Policy
6+
default: -mp oov_train=2 oov_buckets=1000 oov_fn=uniform[-0.1,0.1]
7+
8+
### Run with default parameters
9+
-contd=..nakovexp_trnbal
10+
-contd=..nakovexp_trnbal -mp hidden_units=300
11+
-contd=..nakovexp_trnbal -mp learning_rate=0.1
12+
-contd=..nakovexp_trnbal -mp hidden_units=300 learning_rate=0.1
13+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
### Comet Config
2+
default: -cmt=T1NrVnn32dXWeOxeQWGArkHwc -wrk=oov-policies
3+
### Turn off metadata for performance
4+
default: -aux metadata=False
5+
### OOV Policy
6+
default: -mp oov_train=3 oov_buckets=1 oov_fn=uniform[-0.1,0.1]
7+
8+
### Run with default parameters
9+
-contd=..nakovexp_trnbal
10+
-contd=..nakovexp_trnbal -mp hidden_units=300
11+
-contd=..nakovexp_trnbal -mp learning_rate=0.1
12+
-contd=..nakovexp_trnbal -mp hidden_units=300 learning_rate=0.1
13+
14+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
### Comet Config
2+
default: -cmt=T1NrVnn32dXWeOxeQWGArkHwc -wrk=oov-policies
3+
### Turn off metadata for performance
4+
default: -aux metadata=False
5+
### OOV Policy
6+
default: -mp oov_train=3 oov_buckets=10 oov_fn=uniform[-0.1,0.1]
7+
8+
### Run with default parameters
9+
-contd=..nakovexp_trnbal
10+
-contd=..nakovexp_trnbal -mp hidden_units=300
11+
-contd=..nakovexp_trnbal -mp learning_rate=0.1
12+
-contd=..nakovexp_trnbal -mp hidden_units=300 learning_rate=0.1
13+

0 commit comments

Comments
 (0)