From 23e5081e28af0afa678c91a7717e2b1c178157a8 Mon Sep 17 00:00:00 2001
From: Erik <erscor@microsoft.com>
Date: Mon, 5 Jan 2026 11:43:00 -0500
Subject: [PATCH 1/2] Update protobuf references from 3.20.3 to 4.25.8

---
 .../requirements/requirements.txt             |  2 +-
 .../PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb  | 34 +++++++++----------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements/requirements.txt b/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements/requirements.txt
index cc119a8553a98..73929214b22ea 100644
--- a/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements/requirements.txt
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements/requirements.txt
@@ -7,7 +7,7 @@ onnx==1.18.0
 coloredlogs
 packaging
 # Use newer version of protobuf might cause crash
-protobuf==3.20.3
+protobuf==4.25.8
 psutil
 sympy
 nvtx==0.2.5
diff --git a/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb b/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb
index 7295ae1436c99..2d1689d0bec93 100644
--- a/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb
+++ b/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb
@@ -51,7 +51,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -59,7 +59,7 @@
     "\n",
     "if sys.platform in ['linux', 'win32']: # Linux or Windows\n",
     "    !{sys.executable} -m pip install torch --index-url https://download.pytorch.org/whl/cu118 -q\n",
-    "    !{sys.executable} -m pip install onnxruntime-gpu onnx transformers psutil pandas py-cpuinfo py3nvml coloredlogs wget netron sympy protobuf==3.20.3 -q\n",
+    "    !{sys.executable} -m pip install onnxruntime-gpu onnx transformers psutil pandas py-cpuinfo py3nvml coloredlogs wget netron sympy protobuf==4.25.8 -q\n",
     "else: # Mac\n",
     "    print(\"CUDA is not available on MacOS\")"
    ]
@@ -186,7 +186,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -223,7 +223,7 @@
     "examples = processor.get_dev_examples(None, filename=predict_file)\n",
     "\n",
     "from transformers import squad_convert_examples_to_features\n",
-    "features, dataset = squad_convert_examples_to_features( \n",
+    "features, dataset = squad_convert_examples_to_features(\n",
     "            examples=examples[:total_samples], # convert enough examples for this notebook\n",
     "            tokenizer=tokenizer,\n",
     "            max_seq_length=max_seq_length,\n",
@@ -244,7 +244,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -262,7 +262,7 @@
    "source": [
     "output_dir = os.path.join(\".\", \"onnx_models\")\n",
     "if not os.path.exists(output_dir):\n",
-    "    os.makedirs(output_dir)   \n",
+    "    os.makedirs(output_dir)\n",
     "export_model_path = os.path.join(output_dir, 'bert-base-cased-squad_opset{}.onnx'.format(opset_version))\n",
     "\n",
     "import torch\n",
@@ -277,7 +277,7 @@
     "    'token_type_ids': data[2].to(device).reshape(1, max_seq_length)\n",
     "}\n",
     "\n",
-    "# Set model to inference mode, which is required before exporting the model because some operators behave differently in \n",
+    "# Set model to inference mode, which is required before exporting the model because some operators behave differently in\n",
     "# inference and training mode.\n",
     "model.eval()\n",
     "model.to(device)\n",
@@ -291,7 +291,7 @@
     "                          opset_version=opset_version,                      # the ONNX version to export the model to\n",
     "                          do_constant_folding=True,                         # whether to execute constant folding for optimization\n",
     "                          input_names=['input_ids',                         # the model's input names\n",
-    "                                       'input_mask', \n",
+    "                                       'input_mask',\n",
     "                                       'segment_ids'],\n",
     "                          output_names=['start', 'end'],                    # the model's output names\n",
     "                          dynamic_axes={'input_ids': symbolic_names,        # variable length axes\n",
@@ -358,7 +358,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -399,7 +399,7 @@
     "    start = time.time()\n",
     "    ort_outputs = session.run(None, ort_inputs)\n",
     "    latency.append(time.time() - start)\n",
-    "    \n",
+    "\n",
     "print(\"OnnxRuntime {} Inference time = {} ms\".format(device_name, format(sum(latency) * 1000 / len(latency), '.2f')))"
    ]
   },
@@ -412,7 +412,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {
     "scrolled": true
    },
@@ -431,7 +431,7 @@
    ],
    "source": [
     "print(\"***** Verifying correctness *****\")\n",
-    "for i in range(2):    \n",
+    "for i in range(2):\n",
     "    print('PyTorch and ONNX Runtime output {} are close:'.format(i), numpy.allclose(ort_outputs[i], outputs[i].cpu(), rtol=1e-02, atol=1e-02))\n",
     "    diff = ort_outputs[i] - outputs[i].cpu().numpy()\n",
     "    max_diff = numpy.max(numpy.abs(diff))\n",
@@ -753,7 +753,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -1034,7 +1034,7 @@
    "source": [
     "def load_last_perf_test_result():\n",
     "    import os\n",
-    "    import glob     \n",
+    "    import glob\n",
     "    import pandas\n",
     "    latest_result_file = max(glob.glob(\"./onnx/perf_results_*.txt\"), key=os.path.getmtime)\n",
     "    result_data = pandas.read_table(latest_result_file)\n",
@@ -1043,7 +1043,7 @@
     "    columns_to_remove = ['model', 'graph_optimization_level', 'batch_size', 'sequence_length', 'test_cases', 'test_times', 'use_gpu', 'use_io_binding', 'average_sequence_length', 'random_sequence_length']\n",
     "    result_data.drop(columns_to_remove, axis=1, inplace=True)\n",
     "    return result_data\n",
-    "    \n",
+    "\n",
     "thread_results = load_last_perf_test_result()\n",
     "thread_results"
    ]
@@ -1672,7 +1672,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -1722,7 +1722,7 @@
     "assert use_gpu, \"Require GPU for packing mode\"\n",
     "packed_fp16_model_path = './onnx/bert-base-cased-squad_opt_gpu_fp16_packed.onnx'\n",
     "!{sys.executable} -m onnxruntime.transformers.convert_to_packing_mode --input $optimized_fp16_model_path --output $packed_fp16_model_path --use_external_data_format\n",
-    "!{sys.executable} -m onnxruntime.transformers.bert_perf_test --model $packed_fp16_model_path --batch_size 1 2 4 8 16 32 --sequence_length 128 --average_sequence_length 32 --samples 1000 --test_times 1 $THREAD_SETTING $GPU_OPTION    "
+    "!{sys.executable} -m onnxruntime.transformers.bert_perf_test --model $packed_fp16_model_path --batch_size 1 2 4 8 16 32 --sequence_length 128 --average_sequence_length 32 --samples 1000 --test_times 1 $THREAD_SETTING $GPU_OPTION"
    ]
   },
   {

From 1c96bb89f2ca1bec6b2b55b6645446332f93b155 Mon Sep 17 00:00:00 2001
From: Erik <erscor@microsoft.com>
Date: Wed, 7 Jan 2026 08:57:33 -0500
Subject: [PATCH 2/2] Revert Jupyter notebook change

---
 .../PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb  | 34 +++++++++----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb b/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb
index 2d1689d0bec93..7295ae1436c99 100644
--- a/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb
+++ b/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb
@@ -51,7 +51,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -59,7 +59,7 @@
     "\n",
     "if sys.platform in ['linux', 'win32']: # Linux or Windows\n",
     "    !{sys.executable} -m pip install torch --index-url https://download.pytorch.org/whl/cu118 -q\n",
-    "    !{sys.executable} -m pip install onnxruntime-gpu onnx transformers psutil pandas py-cpuinfo py3nvml coloredlogs wget netron sympy protobuf==4.25.8 -q\n",
+    "    !{sys.executable} -m pip install onnxruntime-gpu onnx transformers psutil pandas py-cpuinfo py3nvml coloredlogs wget netron sympy protobuf==3.20.3 -q\n",
     "else: # Mac\n",
     "    print(\"CUDA is not available on MacOS\")"
    ]
@@ -186,7 +186,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -223,7 +223,7 @@
     "examples = processor.get_dev_examples(None, filename=predict_file)\n",
     "\n",
     "from transformers import squad_convert_examples_to_features\n",
-    "features, dataset = squad_convert_examples_to_features(\n",
+    "features, dataset = squad_convert_examples_to_features( \n",
     "            examples=examples[:total_samples], # convert enough examples for this notebook\n",
     "            tokenizer=tokenizer,\n",
     "            max_seq_length=max_seq_length,\n",
@@ -244,7 +244,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -262,7 +262,7 @@
    "source": [
     "output_dir = os.path.join(\".\", \"onnx_models\")\n",
     "if not os.path.exists(output_dir):\n",
-    "    os.makedirs(output_dir)\n",
+    "    os.makedirs(output_dir)   \n",
     "export_model_path = os.path.join(output_dir, 'bert-base-cased-squad_opset{}.onnx'.format(opset_version))\n",
     "\n",
     "import torch\n",
@@ -277,7 +277,7 @@
     "    'token_type_ids': data[2].to(device).reshape(1, max_seq_length)\n",
     "}\n",
     "\n",
-    "# Set model to inference mode, which is required before exporting the model because some operators behave differently in\n",
+    "# Set model to inference mode, which is required before exporting the model because some operators behave differently in \n",
     "# inference and training mode.\n",
     "model.eval()\n",
     "model.to(device)\n",
@@ -291,7 +291,7 @@
     "                          opset_version=opset_version,                      # the ONNX version to export the model to\n",
     "                          do_constant_folding=True,                         # whether to execute constant folding for optimization\n",
     "                          input_names=['input_ids',                         # the model's input names\n",
-    "                                       'input_mask',\n",
+    "                                       'input_mask', \n",
     "                                       'segment_ids'],\n",
     "                          output_names=['start', 'end'],                    # the model's output names\n",
     "                          dynamic_axes={'input_ids': symbolic_names,        # variable length axes\n",
@@ -358,7 +358,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -399,7 +399,7 @@
     "    start = time.time()\n",
     "    ort_outputs = session.run(None, ort_inputs)\n",
     "    latency.append(time.time() - start)\n",
-    "\n",
+    "    \n",
     "print(\"OnnxRuntime {} Inference time = {} ms\".format(device_name, format(sum(latency) * 1000 / len(latency), '.2f')))"
    ]
   },
@@ -412,7 +412,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {
     "scrolled": true
    },
@@ -431,7 +431,7 @@
    ],
    "source": [
     "print(\"***** Verifying correctness *****\")\n",
-    "for i in range(2):\n",
+    "for i in range(2):    \n",
     "    print('PyTorch and ONNX Runtime output {} are close:'.format(i), numpy.allclose(ort_outputs[i], outputs[i].cpu(), rtol=1e-02, atol=1e-02))\n",
     "    diff = ort_outputs[i] - outputs[i].cpu().numpy()\n",
     "    max_diff = numpy.max(numpy.abs(diff))\n",
@@ -753,7 +753,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -1034,7 +1034,7 @@
    "source": [
     "def load_last_perf_test_result():\n",
     "    import os\n",
-    "    import glob\n",
+    "    import glob     \n",
     "    import pandas\n",
     "    latest_result_file = max(glob.glob(\"./onnx/perf_results_*.txt\"), key=os.path.getmtime)\n",
     "    result_data = pandas.read_table(latest_result_file)\n",
@@ -1043,7 +1043,7 @@
     "    columns_to_remove = ['model', 'graph_optimization_level', 'batch_size', 'sequence_length', 'test_cases', 'test_times', 'use_gpu', 'use_io_binding', 'average_sequence_length', 'random_sequence_length']\n",
     "    result_data.drop(columns_to_remove, axis=1, inplace=True)\n",
     "    return result_data\n",
-    "\n",
+    "    \n",
     "thread_results = load_last_perf_test_result()\n",
     "thread_results"
    ]
@@ -1672,7 +1672,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
@@ -1722,7 +1722,7 @@
     "assert use_gpu, \"Require GPU for packing mode\"\n",
     "packed_fp16_model_path = './onnx/bert-base-cased-squad_opt_gpu_fp16_packed.onnx'\n",
     "!{sys.executable} -m onnxruntime.transformers.convert_to_packing_mode --input $optimized_fp16_model_path --output $packed_fp16_model_path --use_external_data_format\n",
-    "!{sys.executable} -m onnxruntime.transformers.bert_perf_test --model $packed_fp16_model_path --batch_size 1 2 4 8 16 32 --sequence_length 128 --average_sequence_length 32 --samples 1000 --test_times 1 $THREAD_SETTING $GPU_OPTION"
+    "!{sys.executable} -m onnxruntime.transformers.bert_perf_test --model $packed_fp16_model_path --batch_size 1 2 4 8 16 32 --sequence_length 128 --average_sequence_length 32 --samples 1000 --test_times 1 $THREAD_SETTING $GPU_OPTION    "
    ]
   },
   {