diff --git a/tutorials/demo_notebooks/demo_pipeline/demo-pipeline.ipynb b/tutorials/demo_notebooks/demo_pipeline/demo-pipeline.ipynb index 844b878..f700667 100644 --- a/tutorials/demo_notebooks/demo_pipeline/demo-pipeline.ipynb +++ b/tutorials/demo_notebooks/demo_pipeline/demo-pipeline.ipynb @@ -751,8 +751,8 @@ " logger.info(f\"\\nInference service URL:\\n{is_url}\\n\")\n", "\n", " inference_input = {\n", - " 'instances': input_sample.tolist()\n", - " }\n", + " 'instances': input_sample.tolist()\n", + " }\n", " response = requests.post(\n", " is_url,\n", " json=inference_input,\n", diff --git a/tutorials/demo_notebooks/demo_pipeline_standalone_kfp/demo-pipeline.ipynb b/tutorials/demo_notebooks/demo_pipeline_standalone_kfp/demo-pipeline.ipynb index 5a6f32a..471da4e 100644 --- a/tutorials/demo_notebooks/demo_pipeline_standalone_kfp/demo-pipeline.ipynb +++ b/tutorials/demo_notebooks/demo_pipeline_standalone_kfp/demo-pipeline.ipynb @@ -493,7 +493,7 @@ " logger = logging.getLogger(__name__)\n", "\n", " namespace = 'kserve-inference'\n", - "\n", + " \n", " input_sample = [[5.6, 0.54, 0.04, 1.7, 0.049, 5, 13, 0.9942, 3.72, 0.58, 11.4],\n", " [11.3, 0.34, 0.45, 2, 0.082, 6, 15, 0.9988, 2.94, 0.66, 9.2]]\n", "\n", @@ -511,17 +511,24 @@ " KServe.get(model_name, namespace=namespace, watch=True, timeout_seconds=120)\n", "\n", " inference_service = KServe.get(model_name, namespace=namespace)\n", - " is_url = inference_service['status']['address']['url']\n", - "\n", + " header = {\"Host\": f\"{model_name}.{namespace}.example.com\"}\n", + " is_url = f\"http://istio-ingressgateway.istio-system.svc.cluster.local:80/v1/models/{model_name}:predict\"\n", + " \n", " logger.info(f\"\\nInference service status:\\n{inference_service['status']}\")\n", " logger.info(f\"\\nInference service URL:\\n{is_url}\\n\")\n", "\n", " inference_input = {\n", - " 'instances': input_sample.tolist()\n", - " }\n", - "\n", - " response = requests.post(is_url, json=inference_input)\n", - " logger.info(f\"\\nPrediction response:\\n{response.text}\\n\")" + " 'instances': input_sample.tolist()\n", + " }\n", + " response = requests.post(\n", + " is_url,\n", + " json=inference_input,\n", + " headers=header,\n", + " )\n", + " if response.status_code != 200:\n", + " raise RuntimeError(f\"HTTP status code '{response.status_code}': {response.json()}\")\n", + " \n", + " logger.info(f\"\\nPrediction response:\\n{response.json()}\\n\")" ], "outputs": [], "execution_count": null