-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
124 lines (95 loc) · 4.04 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import os
from langchain import OpenAI
from llama_index import (
GPTListIndex,
MockLLMPredictor,
SimpleDirectoryReader,
ServiceContext,
GPTSimpleVectorIndex,
MockEmbedding
)
from currency import currency_converter
def openaipricing(nb_tokens_used: int, model_name: str = None, embeddings=False, chatgpt=False) -> float:
"""
Return cost per token in dollars for either GPT models or embeddings
Args:
nb_tokens_used: total token used by model
model_name: openapi model
chatgpt: toggle gpt model compute cost
embeddings: toggle embedding to compute tokens used cost
Returns: cost
"""
if not isinstance(nb_tokens_used, int):
raise TypeError(
f"nb_tokens_used Expected object of type int, got: {type(model_name)}"
)
if model_name and not isinstance(model_name, str):
raise TypeError(
f"model_name Expected object of type str, got: {type(model_name)}"
)
if chatgpt:
# GPT 4 has 2 prices
# [REQUEST] Prompt tokens: 0.03 USD per 1000 tokens are the parts of words fed into GPT-4
# [RESPONSE] while completion tokens are the content generated by GPT-4 at 0.06 USD per 1000 tokens
model_costs = {
'gpt-4': 0.03,
'gpt-3.5-turbo': 0.002,
'davinci': 0.02,
'babbage': 0.0005,
'curie': 0.002,
'ada': 0.0004,
'turbo': 0.002
}
cost = model_costs.get(model_name, 1)
return (cost / 1000.) * nb_tokens_used
elif embeddings:
# text-embedding-ada-002
return (0.0004 / 1000.) * nb_tokens_used
else:
raise ValueError("Invalid option")
BASE_DIR: str = os.path.dirname(__file__)
documents = SimpleDirectoryReader(os.path.join(BASE_DIR, "data")).load_data()
print(documents)
llm_model_name = "gpt-4"
llm_predictor_mock = MockLLMPredictor(max_tokens=256, llm=OpenAI(temperature=0, model_name=llm_model_name))
embed_model_mock = MockEmbedding(embed_dim=1536)
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor_mock, embed_model=embed_model_mock)
# Vector is cheaper than raw indexed data
# index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)
index = GPTListIndex.from_documents(documents, service_context=service_context)
# Run the mock call first
# You can *predict* how many tokens each GPT Index operation
# (build index, query index) can consume! (within 5-10% error)
query_str = "Why was Ketanji Brown Jackson nominated?"
print("query_str = {}".format(query_str))
response = index.query(query_str, service_context=service_context)
print(response)
# In this example, after querying the index, you can access the last_token_usage attribute
# of the MockLLMPredictor instance to get the number of tokens used in the query.
last_token_usage = llm_predictor_mock.last_token_usage
last_embedding_usage = embed_model_mock.last_token_usage
print("last_token_usage={}".format(last_token_usage))
print("last_embedding_usage={}".format(last_embedding_usage))
total_usage_price = (
openaipricing(last_token_usage, model_name=llm_model_name, chatgpt=True)
+ openaipricing(last_embedding_usage, embeddings=True))
print("total_usage_price={}".format(total_usage_price))
def get_inference_max_price() -> float:
"""
Get the maximum amount that should not be exceeded per ChatGPT query
Returns:
float: amount
"""
max_amount = 3.00
try:
inference_max_price = float(os.environ.get("MAX_INFERENCE_PRICE", max_amount))
return inference_max_price
except (TypeError, ValueError):
return max_amount
MAX_INFERENCE_PRICE = get_inference_max_price()
if total_usage_price > MAX_INFERENCE_PRICE:
raise ValueError(
"Prompt is too broad this inference would be prohibitively expensive. Please try a narrower prompt scope."
)
print("The total composition price is: USD ", total_usage_price)
print("The total composition price is: MYR ", currency_converter(total_usage_price, to_currency='MYR'))