@@ -25,12 +25,12 @@ python3 examples/tools/split_sentences.py \
2525 --input_files $OUTPUT_DIR /wiki_example_data.txt \
2626 --output_directory $OUTPUT_DIR /sentence-split-data
2727# Preprocess input for pretraining.
28- python3 examples/bert /bert_create_pretraining_data.py \
28+ python3 examples/bert_pretraining /bert_create_pretraining_data.py \
2929 --input_files $OUTPUT_DIR /sentence-split-data/ \
3030 --vocab_file $OUTPUT_DIR /bert_vocab_uncased.txt \
3131 --output_file $OUTPUT_DIR /pretraining-data/pretraining.tfrecord
3232# Run pretraining for 100 train steps only.
33- python3 examples/bert /bert_pretrain.py \
33+ python3 examples/bert_pretraining /bert_pretrain.py \
3434 --input_directory $OUTPUT_DIR /pretraining-data/ \
3535 --vocab_file $OUTPUT_DIR /bert_vocab_uncased.txt \
3636 --saved_model_output $OUTPUT_DIR /model/ \
@@ -156,7 +156,7 @@ with the following:
156156``` shell
157157for file in path/to/sentence-split-data/* ; do
158158 output=" path/to/pretraining-data/$( basename -- " $file " .txt) .tfrecord"
159- python3 examples/bert /bert_create_pretraining_data.py \
159+ python3 examples/bert_pretraining /bert_create_pretraining_data.py \
160160 --input_files ${file} \
161161 --vocab_file bert_vocab_uncased.txt \
162162 --output_file ${output}
@@ -171,7 +171,7 @@ on an 8 core machine.
171171NUM_JOBS=5
172172for file in path/to/sentence-split-data/* ; do
173173 output=" path/to/pretraining-data/$( basename -- " $file " .txt) .tfrecord"
174- echo python3 examples/bert /bert_create_pretraining_data.py \
174+ echo python3 examples/bert_pretraining /bert_create_pretraining_data.py \
175175 --input_files ${file} \
176176 --vocab_file bert_vocab_uncased.txt \
177177 --output_file ${output}
@@ -192,7 +192,7 @@ directory. If you are willing to train from data stored on google cloud storage
192192the URL of GCS bucket. For example, ` --input_directory=gs://your-bucket-name/you-data-path ` . You can also save models directly to GCS by the same approach.
193193
194194``` shell
195- python3 examples/bert /bert_pretrain.py \
195+ python3 examples/bert_pretraining /bert_pretrain.py \
196196 --input_directory path/to/data/ \
197197 --vocab_file path/to/bert_vocab_uncased.txt \
198198 --model_size tiny \
0 commit comments