Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 31 additions & 23 deletions cookbook/populate_embeddings.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,12 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 25,
"id": "c5498911",
"metadata": {
"id": "c5498911"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025-05-27 13:21:11.840076: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
"To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
]
}
],
"outputs": [],
"source": [
"import os\n",
"import os.path\n",
Expand All @@ -50,7 +41,10 @@
"import math\n",
"# import numpy as np\n",
"import pandas as pd\n",
"from sentence_transformers import SentenceTransformer"
"from sentence_transformers import SentenceTransformer\n",
"\n",
"def model_id_to_filename(model_id):\n",
" return model_id.split(\"/\")[-1].lower()"
]
},
{
Expand All @@ -65,7 +59,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 26,
"id": "45b95c55",
"metadata": {
"id": "45b95c55"
Expand All @@ -85,7 +79,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 27,
"id": "b87a3c65-0e08-4fa9-aa8f-2f9a2f6c3499",
"metadata": {
"colab": {
Expand All @@ -101,7 +95,7 @@
"False"
]
},
"execution_count": 3,
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -122,7 +116,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 28,
"id": "95fb523c",
"metadata": {
"id": "95fb523c"
Expand Down Expand Up @@ -150,7 +144,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 29,
"id": "cd09f66b",
"metadata": {
"id": "cd09f66b"
Expand Down Expand Up @@ -220,7 +214,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 30,
"id": "87316fa4-1fcf-41c4-9913-bc5704b25ea2",
"metadata": {
"colab": {
Expand Down Expand Up @@ -248,19 +242,33 @@
"\n",
"Opening existing file locally: ../prompt-sentences-main/prompt_sentences-bge-large-en-v1.5.json\n",
"Request url: https://router.huggingface.co/hf-inference/models/BAAI/bge-large-en-v1.5/pipeline/feature-extraction\n",
"Dimensions from hugging face API response: 1024\n",
"Dimensions from hugging face API response: 1\n",
"Dimensions from json file: 1024\n",
"Old prompts: 2217\n",
"New prompts: 0\n",
"Errors: 0\n",
"Successes: 0\n",
"Updating centroids.\n",
"Updating centroids.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Rahul\\AppData\\Local\\Temp\\ipykernel_17512\\3081262251.py:43: UserWarning: Dimensions are different: API=1 while JSON sentences file=1024\n",
" warnings.warn( f\"Dimensions are different: API={api_response_dimensions} while JSON sentences file={json_file_dimensions}\" )\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Saving into file: ../prompt-sentences-main/prompt_sentences-bge-large-en-v1.5.json\n",
"\n",
"\n",
"Opening existing file locally: ../prompt-sentences-main/prompt_sentences-multilingual-e5-large.json\n",
"Request url: https://router.huggingface.co/hf-inference/models/intfloat/multilingual-e5-large/pipeline/feature-extraction\n",
"Dimensions from hugging face API response: 1024\n",
"Dimensions from hugging face API response: 1\n",
"Dimensions from json file: 1024\n",
"Old prompts: 2217\n",
"New prompts: 0\n",
Expand Down Expand Up @@ -458,7 +466,7 @@
"provenance": []
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
Expand All @@ -472,7 +480,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
"version": "3.13.2"
}
},
"nbformat": 4,
Expand Down