Rename examples/bert -> examples/bert_pretraining (#647)

mattdangerw · web-flow · commit f7dfc7b880ce · 2023-01-12T15:47:12.000-08:00
diff --git a/examples/bert_pretraining/README.md b/examples/bert_pretraining/README.md
@@ -25,12 +25,12 @@ python3 examples/tools/split_sentences.py \
     --input_files $OUTPUT_DIR/wiki_example_data.txt \
     --output_directory $OUTPUT_DIR/sentence-split-data
 # Preprocess input for pretraining.
-python3 examples/bert/bert_create_pretraining_data.py \
+python3 examples/bert_pretraining/bert_create_pretraining_data.py \
     --input_files $OUTPUT_DIR/sentence-split-data/ \
     --vocab_file $OUTPUT_DIR/bert_vocab_uncased.txt \
     --output_file $OUTPUT_DIR/pretraining-data/pretraining.tfrecord
 # Run pretraining for 100 train steps only.
-python3 examples/bert/bert_pretrain.py \
+python3 examples/bert_pretraining/bert_pretrain.py \
     --input_directory $OUTPUT_DIR/pretraining-data/ \
     --vocab_file $OUTPUT_DIR/bert_vocab_uncased.txt \
     --saved_model_output $OUTPUT_DIR/model/ \
@@ -156,7 +156,7 @@ with the following:
 ```shell
 for file in path/to/sentence-split-data/*; do
     output="path/to/pretraining-data/$(basename -- "$file" .txt).tfrecord"
-    python3 examples/bert/bert_create_pretraining_data.py \
+    python3 examples/bert_pretraining/bert_create_pretraining_data.py \
         --input_files ${file} \
         --vocab_file bert_vocab_uncased.txt \
         --output_file ${output}
@@ -171,7 +171,7 @@ on an 8 core machine.
 NUM_JOBS=5
 for file in path/to/sentence-split-data/*; do
     output="path/to/pretraining-data/$(basename -- "$file" .txt).tfrecord"
-    echo python3 examples/bert/bert_create_pretraining_data.py \
+    echo python3 examples/bert_pretraining/bert_create_pretraining_data.py \
         --input_files ${file} \
         --vocab_file bert_vocab_uncased.txt \
         --output_file ${output}
@@ -192,7 +192,7 @@ directory. If you are willing to train from data stored on google cloud storage
 the URL of GCS bucket. For example, `--input_directory=gs://your-bucket-name/you-data-path`. You can also save models directly to GCS by the same approach.
 
 ```shell
-python3 examples/bert/bert_pretrain.py \
+python3 examples/bert_pretraining/bert_pretrain.py \
     --input_directory path/to/data/ \
     --vocab_file path/to/bert_vocab_uncased.txt \
     --model_size tiny \
diff --git a/examples/bert_pretraining/__init__.py b/examples/bert_pretraining/__init__.py
diff --git a/examples/bert_pretraining/bert_config.py b/examples/bert_pretraining/bert_config.py
@@ -77,9 +77,3 @@
     # Percentage of training steps used for learning rate warmup.
     "warmup_percentage": 0.1,
 }
-
-FINETUNING_CONFIG = {
-    "batch_size": 32,
-    "epochs": 3,
-    "learning_rates": [5e-5, 4e-5, 3e-5, 2e-5],
-}
diff --git a/examples/bert_pretraining/bert_create_pretraining_data.py b/examples/bert_pretraining/bert_create_pretraining_data.py
@@ -44,7 +44,7 @@
 from absl import app
 from absl import flags
 
-from examples.bert.bert_config import PREPROCESSING_CONFIG
+from examples.bert_pretraining.bert_config import PREPROCESSING_CONFIG
 from examples.utils.scripting_utils import list_filenames_for_arg
 
 # Tokenization will happen with tensorflow and can easily OOM a GPU.
diff --git a/examples/bert_pretraining/bert_pretrain.py b/examples/bert_pretraining/bert_pretrain.py
@@ -22,9 +22,9 @@
 from tensorflow import keras
 
 import keras_nlp
-from examples.bert.bert_config import MODEL_CONFIGS
-from examples.bert.bert_config import PREPROCESSING_CONFIG
-from examples.bert.bert_config import TRAINING_CONFIG
+from examples.bert_pretraining.bert_config import MODEL_CONFIGS
+from examples.bert_pretraining.bert_config import PREPROCESSING_CONFIG
+from examples.bert_pretraining.bert_config import TRAINING_CONFIG
 
 FLAGS = flags.FLAGS