diff --git a/cookbook/populate_embeddings.ipynb b/cookbook/populate_embeddings.ipynb index d487be1..89bd7fe 100644 --- a/cookbook/populate_embeddings.ipynb +++ b/cookbook/populate_embeddings.ipynb @@ -24,21 +24,12 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 25, "id": "c5498911", "metadata": { "id": "c5498911" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2025-05-27 13:21:11.840076: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", - "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "import os.path\n", @@ -50,7 +41,10 @@ "import math\n", "# import numpy as np\n", "import pandas as pd\n", - "from sentence_transformers import SentenceTransformer" + "from sentence_transformers import SentenceTransformer\n", + "\n", + "def model_id_to_filename(model_id):\n", + " return model_id.split(\"/\")[-1].lower()" ] }, { @@ -65,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 26, "id": "45b95c55", "metadata": { "id": "45b95c55" @@ -85,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 27, "id": "b87a3c65-0e08-4fa9-aa8f-2f9a2f6c3499", "metadata": { "colab": { @@ -101,7 +95,7 @@ "False" ] }, - "execution_count": 3, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -122,7 +116,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 28, "id": "95fb523c", "metadata": { "id": "95fb523c" @@ -150,7 +144,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 29, "id": "cd09f66b", "metadata": { "id": "cd09f66b" @@ -220,7 +214,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 30, "id": "87316fa4-1fcf-41c4-9913-bc5704b25ea2", "metadata": { "colab": { @@ -248,19 +242,33 @@ "\n", "Opening existing file locally: ../prompt-sentences-main/prompt_sentences-bge-large-en-v1.5.json\n", "Request url: https://router.huggingface.co/hf-inference/models/BAAI/bge-large-en-v1.5/pipeline/feature-extraction\n", - "Dimensions from hugging face API response: 1024\n", + "Dimensions from hugging face API response: 1\n", "Dimensions from json file: 1024\n", "Old prompts: 2217\n", "New prompts: 0\n", "Errors: 0\n", "Successes: 0\n", - "Updating centroids.\n", + "Updating centroids.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Rahul\\AppData\\Local\\Temp\\ipykernel_17512\\3081262251.py:43: UserWarning: Dimensions are different: API=1 while JSON sentences file=1024\n", + " warnings.warn( f\"Dimensions are different: API={api_response_dimensions} while JSON sentences file={json_file_dimensions}\" )\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "Saving into file: ../prompt-sentences-main/prompt_sentences-bge-large-en-v1.5.json\n", "\n", "\n", "Opening existing file locally: ../prompt-sentences-main/prompt_sentences-multilingual-e5-large.json\n", "Request url: https://router.huggingface.co/hf-inference/models/intfloat/multilingual-e5-large/pipeline/feature-extraction\n", - "Dimensions from hugging face API response: 1024\n", + "Dimensions from hugging face API response: 1\n", "Dimensions from json file: 1024\n", "Old prompts: 2217\n", "New prompts: 0\n", @@ -458,7 +466,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -472,7 +480,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.13.2" } }, "nbformat": 4,