Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions examples/bert/README.md → examples/bert_pretraining/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ python3 examples/tools/split_sentences.py \
--input_files $OUTPUT_DIR/wiki_example_data.txt \
--output_directory $OUTPUT_DIR/sentence-split-data
# Preprocess input for pretraining.
python3 examples/bert/bert_create_pretraining_data.py \
python3 examples/bert_pretraining/bert_create_pretraining_data.py \
--input_files $OUTPUT_DIR/sentence-split-data/ \
--vocab_file $OUTPUT_DIR/bert_vocab_uncased.txt \
--output_file $OUTPUT_DIR/pretraining-data/pretraining.tfrecord
# Run pretraining for 100 train steps only.
python3 examples/bert/bert_pretrain.py \
python3 examples/bert_pretraining/bert_pretrain.py \
--input_directory $OUTPUT_DIR/pretraining-data/ \
--vocab_file $OUTPUT_DIR/bert_vocab_uncased.txt \
--saved_model_output $OUTPUT_DIR/model/ \
Expand Down Expand Up @@ -156,7 +156,7 @@ with the following:
```shell
for file in path/to/sentence-split-data/*; do
output="path/to/pretraining-data/$(basename -- "$file" .txt).tfrecord"
python3 examples/bert/bert_create_pretraining_data.py \
python3 examples/bert_pretraining/bert_create_pretraining_data.py \
--input_files ${file} \
--vocab_file bert_vocab_uncased.txt \
--output_file ${output}
Expand All @@ -171,7 +171,7 @@ on an 8 core machine.
NUM_JOBS=5
for file in path/to/sentence-split-data/*; do
output="path/to/pretraining-data/$(basename -- "$file" .txt).tfrecord"
echo python3 examples/bert/bert_create_pretraining_data.py \
echo python3 examples/bert_pretraining/bert_create_pretraining_data.py \
--input_files ${file} \
--vocab_file bert_vocab_uncased.txt \
--output_file ${output}
Expand All @@ -192,7 +192,7 @@ directory. If you are willing to train from data stored on google cloud storage
the URL of GCS bucket. For example, `--input_directory=gs://your-bucket-name/you-data-path`. You can also save models directly to GCS by the same approach.

```shell
python3 examples/bert/bert_pretrain.py \
python3 examples/bert_pretraining/bert_pretrain.py \
--input_directory path/to/data/ \
--vocab_file path/to/bert_vocab_uncased.txt \
--model_size tiny \
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,3 @@
# Percentage of training steps used for learning rate warmup.
"warmup_percentage": 0.1,
}

FINETUNING_CONFIG = {
"batch_size": 32,
"epochs": 3,
"learning_rates": [5e-5, 4e-5, 3e-5, 2e-5],
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
from absl import app
from absl import flags

from examples.bert.bert_config import PREPROCESSING_CONFIG
from examples.bert_pretraining.bert_config import PREPROCESSING_CONFIG
from examples.utils.scripting_utils import list_filenames_for_arg

# Tokenization will happen with tensorflow and can easily OOM a GPU.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
from tensorflow import keras

import keras_nlp
from examples.bert.bert_config import MODEL_CONFIGS
from examples.bert.bert_config import PREPROCESSING_CONFIG
from examples.bert.bert_config import TRAINING_CONFIG
from examples.bert_pretraining.bert_config import MODEL_CONFIGS
from examples.bert_pretraining.bert_config import PREPROCESSING_CONFIG
from examples.bert_pretraining.bert_config import TRAINING_CONFIG

FLAGS = flags.FLAGS

Expand Down