From 23e5081e28af0afa678c91a7717e2b1c178157a8 Mon Sep 17 00:00:00 2001 From: Erik Date: Mon, 5 Jan 2026 11:43:00 -0500 Subject: [PATCH 1/2] Update protobuf references from 3.20.3 to 4.25.8 --- .../requirements/requirements.txt | 2 +- .../PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb | 34 +++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements/requirements.txt b/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements/requirements.txt index cc119a8553a98..73929214b22ea 100644 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements/requirements.txt +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements/requirements.txt @@ -7,7 +7,7 @@ onnx==1.18.0 coloredlogs packaging # Use newer version of protobuf might cause crash -protobuf==3.20.3 +protobuf==4.25.8 psutil sympy nvtx==0.2.5 diff --git a/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb b/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb index 7295ae1436c99..2d1689d0bec93 100644 --- a/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb +++ b/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -59,7 +59,7 @@ "\n", "if sys.platform in ['linux', 'win32']: # Linux or Windows\n", " !{sys.executable} -m pip install torch --index-url https://download.pytorch.org/whl/cu118 -q\n", - " !{sys.executable} -m pip install onnxruntime-gpu onnx transformers psutil pandas py-cpuinfo py3nvml coloredlogs wget netron sympy protobuf==3.20.3 -q\n", + " !{sys.executable} -m pip install onnxruntime-gpu onnx transformers psutil pandas py-cpuinfo py3nvml coloredlogs wget netron sympy protobuf==4.25.8 -q\n", "else: # Mac\n", " print(\"CUDA is not available on MacOS\")" ] @@ -186,7 +186,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -223,7 +223,7 @@ "examples = processor.get_dev_examples(None, filename=predict_file)\n", "\n", "from transformers import squad_convert_examples_to_features\n", - "features, dataset = squad_convert_examples_to_features( \n", + "features, dataset = squad_convert_examples_to_features(\n", " examples=examples[:total_samples], # convert enough examples for this notebook\n", " tokenizer=tokenizer,\n", " max_seq_length=max_seq_length,\n", @@ -244,7 +244,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -262,7 +262,7 @@ "source": [ "output_dir = os.path.join(\".\", \"onnx_models\")\n", "if not os.path.exists(output_dir):\n", - " os.makedirs(output_dir) \n", + " os.makedirs(output_dir)\n", "export_model_path = os.path.join(output_dir, 'bert-base-cased-squad_opset{}.onnx'.format(opset_version))\n", "\n", "import torch\n", @@ -277,7 +277,7 @@ " 'token_type_ids': data[2].to(device).reshape(1, max_seq_length)\n", "}\n", "\n", - "# Set model to inference mode, which is required before exporting the model because some operators behave differently in \n", + "# Set model to inference mode, which is required before exporting the model because some operators behave differently in\n", "# inference and training mode.\n", "model.eval()\n", "model.to(device)\n", @@ -291,7 +291,7 @@ " opset_version=opset_version, # the ONNX version to export the model to\n", " do_constant_folding=True, # whether to execute constant folding for optimization\n", " input_names=['input_ids', # the model's input names\n", - " 'input_mask', \n", + " 'input_mask',\n", " 'segment_ids'],\n", " output_names=['start', 'end'], # the model's output names\n", " dynamic_axes={'input_ids': symbolic_names, # variable length axes\n", @@ -358,7 +358,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -399,7 +399,7 @@ " start = time.time()\n", " ort_outputs = session.run(None, ort_inputs)\n", " latency.append(time.time() - start)\n", - " \n", + "\n", "print(\"OnnxRuntime {} Inference time = {} ms\".format(device_name, format(sum(latency) * 1000 / len(latency), '.2f')))" ] }, @@ -412,7 +412,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": { "scrolled": true }, @@ -431,7 +431,7 @@ ], "source": [ "print(\"***** Verifying correctness *****\")\n", - "for i in range(2): \n", + "for i in range(2):\n", " print('PyTorch and ONNX Runtime output {} are close:'.format(i), numpy.allclose(ort_outputs[i], outputs[i].cpu(), rtol=1e-02, atol=1e-02))\n", " diff = ort_outputs[i] - outputs[i].cpu().numpy()\n", " max_diff = numpy.max(numpy.abs(diff))\n", @@ -753,7 +753,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1034,7 +1034,7 @@ "source": [ "def load_last_perf_test_result():\n", " import os\n", - " import glob \n", + " import glob\n", " import pandas\n", " latest_result_file = max(glob.glob(\"./onnx/perf_results_*.txt\"), key=os.path.getmtime)\n", " result_data = pandas.read_table(latest_result_file)\n", @@ -1043,7 +1043,7 @@ " columns_to_remove = ['model', 'graph_optimization_level', 'batch_size', 'sequence_length', 'test_cases', 'test_times', 'use_gpu', 'use_io_binding', 'average_sequence_length', 'random_sequence_length']\n", " result_data.drop(columns_to_remove, axis=1, inplace=True)\n", " return result_data\n", - " \n", + "\n", "thread_results = load_last_perf_test_result()\n", "thread_results" ] @@ -1672,7 +1672,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1722,7 +1722,7 @@ "assert use_gpu, \"Require GPU for packing mode\"\n", "packed_fp16_model_path = './onnx/bert-base-cased-squad_opt_gpu_fp16_packed.onnx'\n", "!{sys.executable} -m onnxruntime.transformers.convert_to_packing_mode --input $optimized_fp16_model_path --output $packed_fp16_model_path --use_external_data_format\n", - "!{sys.executable} -m onnxruntime.transformers.bert_perf_test --model $packed_fp16_model_path --batch_size 1 2 4 8 16 32 --sequence_length 128 --average_sequence_length 32 --samples 1000 --test_times 1 $THREAD_SETTING $GPU_OPTION " + "!{sys.executable} -m onnxruntime.transformers.bert_perf_test --model $packed_fp16_model_path --batch_size 1 2 4 8 16 32 --sequence_length 128 --average_sequence_length 32 --samples 1000 --test_times 1 $THREAD_SETTING $GPU_OPTION" ] }, { From 1c96bb89f2ca1bec6b2b55b6645446332f93b155 Mon Sep 17 00:00:00 2001 From: Erik Date: Wed, 7 Jan 2026 08:57:33 -0500 Subject: [PATCH 2/2] Revert Jupyter notebook change --- .../PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb b/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb index 2d1689d0bec93..7295ae1436c99 100644 --- a/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb +++ b/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -59,7 +59,7 @@ "\n", "if sys.platform in ['linux', 'win32']: # Linux or Windows\n", " !{sys.executable} -m pip install torch --index-url https://download.pytorch.org/whl/cu118 -q\n", - " !{sys.executable} -m pip install onnxruntime-gpu onnx transformers psutil pandas py-cpuinfo py3nvml coloredlogs wget netron sympy protobuf==4.25.8 -q\n", + " !{sys.executable} -m pip install onnxruntime-gpu onnx transformers psutil pandas py-cpuinfo py3nvml coloredlogs wget netron sympy protobuf==3.20.3 -q\n", "else: # Mac\n", " print(\"CUDA is not available on MacOS\")" ] @@ -186,7 +186,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -223,7 +223,7 @@ "examples = processor.get_dev_examples(None, filename=predict_file)\n", "\n", "from transformers import squad_convert_examples_to_features\n", - "features, dataset = squad_convert_examples_to_features(\n", + "features, dataset = squad_convert_examples_to_features( \n", " examples=examples[:total_samples], # convert enough examples for this notebook\n", " tokenizer=tokenizer,\n", " max_seq_length=max_seq_length,\n", @@ -244,7 +244,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -262,7 +262,7 @@ "source": [ "output_dir = os.path.join(\".\", \"onnx_models\")\n", "if not os.path.exists(output_dir):\n", - " os.makedirs(output_dir)\n", + " os.makedirs(output_dir) \n", "export_model_path = os.path.join(output_dir, 'bert-base-cased-squad_opset{}.onnx'.format(opset_version))\n", "\n", "import torch\n", @@ -277,7 +277,7 @@ " 'token_type_ids': data[2].to(device).reshape(1, max_seq_length)\n", "}\n", "\n", - "# Set model to inference mode, which is required before exporting the model because some operators behave differently in\n", + "# Set model to inference mode, which is required before exporting the model because some operators behave differently in \n", "# inference and training mode.\n", "model.eval()\n", "model.to(device)\n", @@ -291,7 +291,7 @@ " opset_version=opset_version, # the ONNX version to export the model to\n", " do_constant_folding=True, # whether to execute constant folding for optimization\n", " input_names=['input_ids', # the model's input names\n", - " 'input_mask',\n", + " 'input_mask', \n", " 'segment_ids'],\n", " output_names=['start', 'end'], # the model's output names\n", " dynamic_axes={'input_ids': symbolic_names, # variable length axes\n", @@ -358,7 +358,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -399,7 +399,7 @@ " start = time.time()\n", " ort_outputs = session.run(None, ort_inputs)\n", " latency.append(time.time() - start)\n", - "\n", + " \n", "print(\"OnnxRuntime {} Inference time = {} ms\".format(device_name, format(sum(latency) * 1000 / len(latency), '.2f')))" ] }, @@ -412,7 +412,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { "scrolled": true }, @@ -431,7 +431,7 @@ ], "source": [ "print(\"***** Verifying correctness *****\")\n", - "for i in range(2):\n", + "for i in range(2): \n", " print('PyTorch and ONNX Runtime output {} are close:'.format(i), numpy.allclose(ort_outputs[i], outputs[i].cpu(), rtol=1e-02, atol=1e-02))\n", " diff = ort_outputs[i] - outputs[i].cpu().numpy()\n", " max_diff = numpy.max(numpy.abs(diff))\n", @@ -753,7 +753,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -1034,7 +1034,7 @@ "source": [ "def load_last_perf_test_result():\n", " import os\n", - " import glob\n", + " import glob \n", " import pandas\n", " latest_result_file = max(glob.glob(\"./onnx/perf_results_*.txt\"), key=os.path.getmtime)\n", " result_data = pandas.read_table(latest_result_file)\n", @@ -1043,7 +1043,7 @@ " columns_to_remove = ['model', 'graph_optimization_level', 'batch_size', 'sequence_length', 'test_cases', 'test_times', 'use_gpu', 'use_io_binding', 'average_sequence_length', 'random_sequence_length']\n", " result_data.drop(columns_to_remove, axis=1, inplace=True)\n", " return result_data\n", - "\n", + " \n", "thread_results = load_last_perf_test_result()\n", "thread_results" ] @@ -1672,7 +1672,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -1722,7 +1722,7 @@ "assert use_gpu, \"Require GPU for packing mode\"\n", "packed_fp16_model_path = './onnx/bert-base-cased-squad_opt_gpu_fp16_packed.onnx'\n", "!{sys.executable} -m onnxruntime.transformers.convert_to_packing_mode --input $optimized_fp16_model_path --output $packed_fp16_model_path --use_external_data_format\n", - "!{sys.executable} -m onnxruntime.transformers.bert_perf_test --model $packed_fp16_model_path --batch_size 1 2 4 8 16 32 --sequence_length 128 --average_sequence_length 32 --samples 1000 --test_times 1 $THREAD_SETTING $GPU_OPTION" + "!{sys.executable} -m onnxruntime.transformers.bert_perf_test --model $packed_fp16_model_path --batch_size 1 2 4 8 16 32 --sequence_length 128 --average_sequence_length 32 --samples 1000 --test_times 1 $THREAD_SETTING $GPU_OPTION " ] }, {