|
81 | 81 | "cell_type": "markdown", |
82 | 82 | "metadata": {}, |
83 | 83 | "source": [ |
84 | | - "For code search models we use babbage-code-search-code to obtain embeddings for code snippets, and code-search-text to embed natural language queries." |
| 84 | + "For code search models we use code-search-{model}-code to obtain embeddings for code snippets, and code-search-{model}-text to embed natural language queries." |
85 | 85 | ] |
86 | 86 | }, |
87 | 87 | { |
|
188 | 188 | "from openai.embeddings_utils import get_embedding\n", |
189 | 189 | "\n", |
190 | 190 | "df = pd.DataFrame(all_funcs)\n", |
191 | | - "df['code_embedding'] = df['code'].apply(lambda x: get_embedding(x, engine='babbage-code-search-code'))\n", |
| 191 | + "df['code_embedding'] = df['code'].apply(lambda x: get_embedding(x, engine='code-search-babbage-code-001'))\n", |
192 | 192 | "df['filepath'] = df['filepath'].apply(lambda x: x.replace(code_root, \"\"))\n", |
193 | 193 | "df.to_csv(\"output/code_search_openai-python.csv\", index=False)\n", |
194 | 194 | "df.head()" |
|
234 | 234 | "from openai.embeddings_utils import cosine_similarity\n", |
235 | 235 | "\n", |
236 | 236 | "def search_functions(df, code_query, n=3, pprint=True, n_lines=7):\n", |
237 | | - " embedding = get_embedding(code_query, engine='babbage-code-search-text')\n", |
| 237 | + " embedding = get_embedding(code_query, engine='code-search-babbage-text-001')\n", |
238 | 238 | " df['similarities'] = df.code_embedding.apply(lambda x: cosine_similarity(x, embedding))\n", |
239 | 239 | "\n", |
240 | 240 | " res = df.sort_values('similarities', ascending=False).head(n)\n", |
|
0 commit comments