-
Notifications
You must be signed in to change notification settings - Fork 529
/
Copy pathopenai.py
73 lines (57 loc) · 2.26 KB
/
openai.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import os
import numpy as np
from gptcache.embedding.base import BaseEmbedding
from gptcache.utils import import_openai
import_openai()
import openai # pylint: disable=C0413
class OpenAI(BaseEmbedding):
"""Generate text embedding for given text using OpenAI.
:param model: model name, defaults to 'text-embedding-ada-002'.
:type model: str
:param api_key: OpenAI API Key. When the parameter is not specified, it will load the key by default if it is available.
:type api_key: str
Example:
.. code-block:: python
from gptcache.embedding import OpenAI
test_sentence = 'Hello, world.'
encoder = OpenAI(api_key='your_openai_key')
embed = encoder.to_embeddings(test_sentence)
"""
def __init__(self, model: str = "text-embedding-ada-002", api_key: str = None, api_base: str = None):
if not api_key:
if openai.api_key:
api_key = openai.api_key
else:
api_key = os.getenv("OPENAI_API_KEY")
if not api_base:
if openai.api_base:
api_base = openai.api_base
else:
api_base = os.getenv("OPENAI_API_BASE")
openai.api_key = api_key
self.api_base = api_base # don't override all of openai as we may just want to override for say embeddings
self.model = model
if model in self.dim_dict():
self.__dimension = self.dim_dict()[model]
else:
self.__dimension = None
def to_embeddings(self, data, **_):
"""Generate embedding given text input
:param data: text in string.
:type data: str
:return: a text embedding in shape of (dim,).
"""
sentence_embeddings = openai.Embedding.create(model=self.model, input=data, api_base=self.api_base)
return np.array(sentence_embeddings["data"][0]["embedding"]).astype("float32")
@property
def dimension(self):
"""Embedding dimension.
:return: embedding dimension
"""
if not self.__dimension:
foo_emb = self.to_embeddings("foo")
self.__dimension = len(foo_emb)
return self.__dimension
@staticmethod
def dim_dict():
return {"text-embedding-ada-002": 1536}