From c81f2e3cdb23b18102ad220b7f978124b00bced6 Mon Sep 17 00:00:00 2001
From: Shan <shan@bewgle.com>
Date: Wed, 22 Apr 2020 12:43:32 +0530
Subject: [PATCH 1/8] reader

---
 text/utils/tokenization.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/text/utils/tokenization.py b/text/utils/tokenization.py
index e0369ac..5342b83 100644
--- a/text/utils/tokenization.py
+++ b/text/utils/tokenization.py
@@ -34,14 +34,19 @@ def load_vocab(vocab_file):
   """Loads a vocabulary file into a dictionary."""
   vocab = collections.OrderedDict()
   index = 0
-  with open_reader(vocab_file) as reader:
-    while True:
-      token = reader.readline()
-      if not token:
-        break
-      token = token.strip()
-      vocab[token] = index
-      index += 1
+  if six.PY2:
+    reader = open_reader(vocab_file)
+  else:
+    reader = tf.gfile.GFile(vocab_file, "r")
+
+  while True:
+    token = reader.readline()
+    if not token:
+      break
+    token = token.strip()
+    vocab[token] = index
+    index += 1
+  reader.close()
   return vocab
 
 

From e7b5fb3e7863c09457f290790f05fdbdc98d68fa Mon Sep 17 00:00:00 2001
From: Shan <shan@bewgle.com>
Date: Wed, 22 Apr 2020 12:45:01 +0530
Subject: [PATCH 2/8] unicode

---
 text/utils/tokenization.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/text/utils/tokenization.py b/text/utils/tokenization.py
index 5342b83..3450331 100644
--- a/text/utils/tokenization.py
+++ b/text/utils/tokenization.py
@@ -270,11 +270,12 @@ def _is_punctuation(char):
 
 def _convert_to_unicode_or_throw(text):
   """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
-  if isinstance(text, str):
-    text = text.decode("utf-8", "ignore")
-  if not isinstance(text, unicode):
-    raise ValueError("`text` must be of type `unicode` or `str`, but is "
-                     "actually of type: %s" % (type(text).__name__))
+  if six.PY2:
+    if isinstance(text, str):
+      text = text.decode("utf-8", "ignore")
+    if not isinstance(text, unicode):
+      raise ValueError("`text` must be of type `unicode` or `str`, but is "
+                      "actually of type: %s" % (type(text).__name__))
   return text
 
 

From 6da3f328bd8aab9ab5c7d2476144b9290340c42b Mon Sep 17 00:00:00 2001
From: Shan <shan@bewgle.com>
Date: Wed, 22 Apr 2020 12:46:35 +0530
Subject: [PATCH 3/8] preprocesspy unicode

---
 text/preprocess.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/text/preprocess.py b/text/preprocess.py
index ef5aab7..2f10bb0 100644
--- a/text/preprocess.py
+++ b/text/preprocess.py
@@ -21,6 +21,7 @@
 import copy
 import json
 import os
+import six
 from absl import app
 from absl import flags
 
@@ -266,7 +267,7 @@ def convert_examples_to_features(
       # st = " ".join([str(x) for x in tokens])
       st = ""
       for x in tokens:
-        if isinstance(x, unicode):
+        if six.PY2 and isinstance(x, unicode):
           st += x.encode("ascii", "replace") + " "
         else:
           st += str(x) + " "

From f9e1cfe7409f7d5cf51efa365233153a241d4f8b Mon Sep 17 00:00:00 2001
From: Shan <shan@bewgle.com>
Date: Wed, 22 Apr 2020 12:51:11 +0530
Subject: [PATCH 4/8] logging, run on tpu

---
 text/augmentation/sent_level_augment.py |  3 ++-
 text/scripts/run_base_uda.sh            | 17 +++++++++--------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/text/augmentation/sent_level_augment.py b/text/augmentation/sent_level_augment.py
index 44f8827..e1cfb9c 100644
--- a/text/augmentation/sent_level_augment.py
+++ b/text/augmentation/sent_level_augment.py
@@ -22,6 +22,7 @@
 import math
 
 import random
+import six
 from absl import flags
 
 import numpy as np
@@ -117,7 +118,7 @@ def back_translation(examples, aug_ops, sub_set, aug_copy_num,
         text_b=text_b,
         label=ori_example.label)
     aug_examples += [example]
-    if np.random.random() < 0.0001:
+    if six.PY2 and np.random.random() < 0.0001:
       tf.logging.info("\tori:\n\t\t{:s}\n\t\t{:s}\n\t\t{:s}\n".format(
           ori_example.text_a, ori_example.text_b, ori_example.label))
       tf.logging.info("\tnew:\n\t\t{:s}\n\t\t{:s}\n\t\t{:s}\n".format(
diff --git a/text/scripts/run_base_uda.sh b/text/scripts/run_base_uda.sh
index 8bd621a..9e716bb 100644
--- a/text/scripts/run_base_uda.sh
+++ b/text/scripts/run_base_uda.sh
@@ -12,18 +12,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+gsutil -m rsync -r . gs://bewgle-data/UDA-py3/
 python main.py \
-  --use_tpu=False \
+  --tpu_name= \
   --do_train=True \
   --do_eval=True \
-  --sup_train_data_dir=data/proc_data/IMDB/train_20 \
-  --unsup_data_dir=data/proc_data/IMDB/unsup \
-  --eval_data_dir=data/proc_data/IMDB/dev \
-  --bert_config_file=pretrained_models/bert_base/bert_config.json \
-  --vocab_file=pretrained_models/bert_base/vocab.txt \
-  --init_checkpoint=pretrained_models/bert_base/bert_model.ckpt \
+  --sup_train_data_dir=gs://bewgle-data/UDA-py3/data/proc_data/IMDB/train_20 \
+  --unsup_data_dir=gs://bewgle-data/UDA-py3/data/proc_data/IMDB/unsup \
+  --eval_data_dir=gs://bewgle-data/UDA-py3/data/proc_data/IMDB/dev \
+  --bert_config_file=gs://bewgle-data/UDA-py3/pretrained_models/bert_base/bert_config.json \
+  --vocab_file=gs://bewgle-data/UDA-py3/pretrained_models/bert_base/vocab.txt \
+  --init_checkpoint=gs://bewgle-data/UDA-py3/pretrained_models/bert_base/bert_model.ckpt \
   --task_name=IMDB \
-  --model_dir=ckpt/base_uda \
+  --model_dir=gs://bewgle-data/UDA-py3/ckpt/base_uda \
   --num_train_steps=10000 \
   --learning_rate=2e-05 \
   --num_warmup_steps=1000 \

From 398d6cbc56da45a69c8454615d95bd40d869bdba Mon Sep 17 00:00:00 2001
From: Shan <shan@bewgle.com>
Date: Wed, 22 Apr 2020 12:56:58 +0530
Subject: [PATCH 5/8] param gs

---
 text/scripts/run_base_uda.sh | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/text/scripts/run_base_uda.sh b/text/scripts/run_base_uda.sh
index 9e716bb..ccf87d1 100644
--- a/text/scripts/run_base_uda.sh
+++ b/text/scripts/run_base_uda.sh
@@ -12,19 +12,20 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-gsutil -m rsync -r . gs://bewgle-data/UDA-py3/
+GS_DIR = gs://bewgle-data/UDA-tpu-py3
+gsutil -m rsync -r . $GS_DIR
 python main.py \
   --tpu_name= \
   --do_train=True \
   --do_eval=True \
-  --sup_train_data_dir=gs://bewgle-data/UDA-py3/data/proc_data/IMDB/train_20 \
-  --unsup_data_dir=gs://bewgle-data/UDA-py3/data/proc_data/IMDB/unsup \
-  --eval_data_dir=gs://bewgle-data/UDA-py3/data/proc_data/IMDB/dev \
-  --bert_config_file=gs://bewgle-data/UDA-py3/pretrained_models/bert_base/bert_config.json \
-  --vocab_file=gs://bewgle-data/UDA-py3/pretrained_models/bert_base/vocab.txt \
-  --init_checkpoint=gs://bewgle-data/UDA-py3/pretrained_models/bert_base/bert_model.ckpt \
+  --sup_train_data_dir=$GS_DIR/data/proc_data/IMDB/train_20 \
+  --unsup_data_dir=$GS_DIR/data/proc_data/IMDB/unsup \
+  --eval_data_dir=$GS_DIR/data/proc_data/IMDB/dev \
+  --bert_config_file=$GS_DIR/pretrained_models/bert_base/bert_config.json \
+  --vocab_file=$GS_DIR/pretrained_models/bert_base/vocab.txt \
+  --init_checkpoint=$GS_DIR/pretrained_models/bert_base/bert_model.ckpt \
   --task_name=IMDB \
-  --model_dir=gs://bewgle-data/UDA-py3/ckpt/base_uda \
+  --model_dir=$GS_DIR/ckpt/base_uda \
   --num_train_steps=10000 \
   --learning_rate=2e-05 \
   --num_warmup_steps=1000 \

From 37d8100aa39b3ade1864d8ad9d2bc05198f53b46 Mon Sep 17 00:00:00 2001
From: Shan <shan@bewgle.com>
Date: Wed, 22 Apr 2020 13:21:45 +0530
Subject: [PATCH 6/8] long

---
 text/bert/modeling.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/text/bert/modeling.py b/text/bert/modeling.py
index b0fa8bb..2199e75 100644
--- a/text/bert/modeling.py
+++ b/text/bert/modeling.py
@@ -964,7 +964,9 @@ def assert_rank(tensor, expected_rank, name=None):
     name = tensor.name
 
   expected_rank_dict = {}
-  if isinstance(expected_rank, (int, long)):
+  if six.PY2 and isinstance(expected_rank, (int, long)):
+    expected_rank_dict[expected_rank] = True
+  elif six.PY3 and isinstance(expected_rank, int):
     expected_rank_dict[expected_rank] = True
   else:
     for x in expected_rank:

From 2e5f8ac10b917838f79f1a3bde4e4c15f4ed8e09 Mon Sep 17 00:00:00 2001
From: Shan <shan@bewgle.com>
Date: Wed, 22 Apr 2020 13:33:50 +0530
Subject: [PATCH 7/8] modeling

---
 text/bert/modeling.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/text/bert/modeling.py b/text/bert/modeling.py
index 2199e75..d8553de 100644
--- a/text/bert/modeling.py
+++ b/text/bert/modeling.py
@@ -312,7 +312,10 @@ def get_activation(activation_string):
 
   # We assume that anything that's not a string is already an activation
   # function, so we just return it.
-  if not isinstance(activation_string, (str, unicode)):
+
+  if six.PY2 and not isinstance(activation_string, (str, unicode)):
+    return activation_string
+  elif six.PY3 and not isinstance(activation_string, str):
     return activation_string
 
   if not activation_string:

From bd5c67a69601f5b63e8fce15307e9ecfa254bb20 Mon Sep 17 00:00:00 2001
From: Shan <shan@bewgle.com>
Date: Wed, 22 Apr 2020 13:56:06 +0530
Subject: [PATCH 8/8] revert

---
 text/scripts/run_base_uda.sh | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/text/scripts/run_base_uda.sh b/text/scripts/run_base_uda.sh
index ccf87d1..8bd621a 100644
--- a/text/scripts/run_base_uda.sh
+++ b/text/scripts/run_base_uda.sh
@@ -12,20 +12,18 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-GS_DIR = gs://bewgle-data/UDA-tpu-py3
-gsutil -m rsync -r . $GS_DIR
 python main.py \
-  --tpu_name= \
+  --use_tpu=False \
   --do_train=True \
   --do_eval=True \
-  --sup_train_data_dir=$GS_DIR/data/proc_data/IMDB/train_20 \
-  --unsup_data_dir=$GS_DIR/data/proc_data/IMDB/unsup \
-  --eval_data_dir=$GS_DIR/data/proc_data/IMDB/dev \
-  --bert_config_file=$GS_DIR/pretrained_models/bert_base/bert_config.json \
-  --vocab_file=$GS_DIR/pretrained_models/bert_base/vocab.txt \
-  --init_checkpoint=$GS_DIR/pretrained_models/bert_base/bert_model.ckpt \
+  --sup_train_data_dir=data/proc_data/IMDB/train_20 \
+  --unsup_data_dir=data/proc_data/IMDB/unsup \
+  --eval_data_dir=data/proc_data/IMDB/dev \
+  --bert_config_file=pretrained_models/bert_base/bert_config.json \
+  --vocab_file=pretrained_models/bert_base/vocab.txt \
+  --init_checkpoint=pretrained_models/bert_base/bert_model.ckpt \
   --task_name=IMDB \
-  --model_dir=$GS_DIR/ckpt/base_uda \
+  --model_dir=ckpt/base_uda \
   --num_train_steps=10000 \
   --learning_rate=2e-05 \
   --num_warmup_steps=1000 \