main.py

import os

from langchain import OpenAI
from llama_index import (
    GPTListIndex,
    MockLLMPredictor,
    SimpleDirectoryReader,
    ServiceContext,
    GPTSimpleVectorIndex,
    MockEmbedding
)


from currency import currency_converter


def openaipricing(nb_tokens_used: int, model_name: str = None, embeddings=False, chatgpt=False) -> float:
    """
    Return cost per token in dollars for either GPT models or embeddings
    Args:
        nb_tokens_used: total token used by model
        model_name: openapi model
        chatgpt: toggle gpt model compute cost
        embeddings: toggle embedding to compute tokens used cost

    Returns: cost
    """
    if not isinstance(nb_tokens_used, int):
        raise TypeError(
            f"nb_tokens_used Expected object of type int, got: {type(model_name)}"
        )
    if model_name and not isinstance(model_name, str):
        raise TypeError(
            f"model_name Expected object of type str, got: {type(model_name)}"
        )
    if chatgpt:
        #  GPT 4 has 2 prices
        #  [REQUEST] Prompt tokens: 0.03 USD per 1000 tokens are the parts of words fed into GPT-4
        #  [RESPONSE] while completion tokens are the content generated by GPT-4 at 0.06 USD per 1000 tokens
        model_costs = {
            'gpt-4': 0.03,
            'gpt-3.5-turbo': 0.002,
            'davinci': 0.02,
            'babbage': 0.0005,
            'curie': 0.002,
            'ada': 0.0004,
            'turbo': 0.002
        }
        cost = model_costs.get(model_name, 1)
        return (cost / 1000.) * nb_tokens_used
    elif embeddings:
        # text-embedding-ada-002
        return (0.0004 / 1000.) * nb_tokens_used
    else:
        raise ValueError("Invalid option")


BASE_DIR: str = os.path.dirname(__file__)

documents = SimpleDirectoryReader(os.path.join(BASE_DIR, "data")).load_data()
print(documents)

llm_model_name = "gpt-4"
llm_predictor_mock = MockLLMPredictor(max_tokens=256, llm=OpenAI(temperature=0, model_name=llm_model_name))
embed_model_mock = MockEmbedding(embed_dim=1536)

service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor_mock, embed_model=embed_model_mock)


# Vector is cheaper than raw indexed data
# index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)
index = GPTListIndex.from_documents(documents, service_context=service_context)

# Run the mock call first
# You can *predict* how many tokens each GPT Index operation
# (build index, query index) can consume! (within 5-10% error)
query_str = "Why was Ketanji Brown Jackson nominated?"

print("query_str = {}".format(query_str))

response = index.query(query_str, service_context=service_context)

print(response)

# In this example, after querying the index, you can access the last_token_usage attribute
# of the MockLLMPredictor instance to get the number of tokens used in the query.
last_token_usage = llm_predictor_mock.last_token_usage

last_embedding_usage = embed_model_mock.last_token_usage

print("last_token_usage={}".format(last_token_usage))
print("last_embedding_usage={}".format(last_embedding_usage))


total_usage_price = (
        openaipricing(last_token_usage, model_name=llm_model_name, chatgpt=True)
        + openaipricing(last_embedding_usage, embeddings=True))


print("total_usage_price={}".format(total_usage_price))


def get_inference_max_price() -> float:
    """
    Get the maximum amount that should not be exceeded per ChatGPT query
    Returns:
        float: amount
    """
    max_amount = 3.00
    try:
        inference_max_price = float(os.environ.get("MAX_INFERENCE_PRICE", max_amount))
        return inference_max_price
    except (TypeError, ValueError):
        return max_amount


MAX_INFERENCE_PRICE = get_inference_max_price()

if total_usage_price > MAX_INFERENCE_PRICE:
    raise ValueError(
        "Prompt is too broad this inference would be prohibitively expensive. Please try a narrower prompt scope."
    )
print("The total composition price is: USD ", total_usage_price)
print("The total composition price is: MYR ", currency_converter(total_usage_price, to_currency='MYR'))