diff --git a/tests/model/BingBertSquad/deepspeed_bsz24_fp16_config.json b/tests/model/BingBertSquad/deepspeed_bsz24_fp16_config.json
index 4322433ed279..f0e7ca55ac65 100755
--- a/tests/model/BingBertSquad/deepspeed_bsz24_fp16_config.json
+++ b/tests/model/BingBertSquad/deepspeed_bsz24_fp16_config.json
@@ -1,6 +1,6 @@
 {
   "train_batch_size": 24,
-  "train_micro_batch_size_per_gpu": 6,
+  "train_micro_batch_size_per_gpu": 3,
   "steps_per_print": 1,
   "optimizer": {
     "type": "Adam",
diff --git a/tests/model/BingBertSquad/deepspeed_bsz24_fp16_zero2_config.json b/tests/model/BingBertSquad/deepspeed_bsz24_fp16_zero2_config.json
index 4fb783082801..4a3d8c8f8dbf 100755
--- a/tests/model/BingBertSquad/deepspeed_bsz24_fp16_zero2_config.json
+++ b/tests/model/BingBertSquad/deepspeed_bsz24_fp16_zero2_config.json
@@ -1,6 +1,6 @@
 {
   "train_batch_size": 24,
-  "train_micro_batch_size_per_gpu": 6,
+  "train_micro_batch_size_per_gpu": 3,
   "steps_per_print": 1,
   "optimizer": {
     "type": "Adam",
diff --git a/tests/model/BingBertSquad/deepspeed_bsz24_fp32_config.json b/tests/model/BingBertSquad/deepspeed_bsz24_fp32_config.json
index 1b2a07f1c3dc..328be06298db 100755
--- a/tests/model/BingBertSquad/deepspeed_bsz24_fp32_config.json
+++ b/tests/model/BingBertSquad/deepspeed_bsz24_fp32_config.json
@@ -1,6 +1,6 @@
 {
   "train_batch_size": 24,
-  "train_micro_batch_size_per_gpu": 6,
+  "train_micro_batch_size_per_gpu": 3,
   "steps_per_print": 1,
   "optimizer": {
     "type": "Adam",
diff --git a/tests/model/BingBertSquad/run_BingBertSquad.sh b/tests/model/BingBertSquad/run_BingBertSquad.sh
index 181009ff2620..7631217619ae 100755
--- a/tests/model/BingBertSquad/run_BingBertSquad.sh
+++ b/tests/model/BingBertSquad/run_BingBertSquad.sh
@@ -121,7 +121,7 @@ echo "deepspeed: ${enable_deepspeed}"
 echo "other_args: ${other_args}"
 
 EFFECTIVE_BATCH_SIZE=${batch_size}
-MAX_GPU_BATCH_SIZE=6
+MAX_GPU_BATCH_SIZE=3
 PER_GPU_BATCH_SIZE=$((EFFECTIVE_BATCH_SIZE/num_gpus))
 if [[ $PER_GPU_BATCH_SIZE -lt $MAX_GPU_BATCH_SIZE ]]; then
        GRAD_ACCUM_STEPS=1
diff --git a/tests/model/BingBertSquad/run_BingBertSquad_sanity.sh b/tests/model/BingBertSquad/run_BingBertSquad_sanity.sh
index 91dcbdb723bb..1b49a37b783f 100755
--- a/tests/model/BingBertSquad/run_BingBertSquad_sanity.sh
+++ b/tests/model/BingBertSquad/run_BingBertSquad_sanity.sh
@@ -122,7 +122,7 @@ echo "deepspeed: ${enable_deepspeed}"
 echo "other_args: ${other_args}"
 
 EFFECTIVE_BATCH_SIZE=${batch_size}
-MAX_GPU_BATCH_SIZE=6
+MAX_GPU_BATCH_SIZE=3
 PER_GPU_BATCH_SIZE=$((EFFECTIVE_BATCH_SIZE/num_gpus))
 if [[ $PER_GPU_BATCH_SIZE -lt $MAX_GPU_BATCH_SIZE ]]; then
        GRAD_ACCUM_STEPS=1