|
| 1 | +"""Generator for the RAG system.""" |
| 2 | + |
| 3 | +import logging |
| 4 | +import os |
| 5 | +from typing import Any |
| 6 | + |
| 7 | +import openai |
| 8 | + |
| 9 | +logger = logging.getLogger(__name__) |
| 10 | + |
| 11 | + |
| 12 | +class Generator: |
| 13 | + """Generates answers to user queries based on retrieved context.""" |
| 14 | + |
| 15 | + MAX_TOKENS = 2000 |
| 16 | + SYSTEM_PROMPT = """ |
| 17 | +You are a helpful and professional AI assistant for the OWASP Foundation. |
| 18 | +Your task is to answer user queries based ONLY on the provided context. |
| 19 | +Follow these rules strictly: |
| 20 | +1. Base your entire answer on the information given in the "CONTEXT" section. Do not use any |
| 21 | +external knowledge unless and until it is about OWASP. |
| 22 | +2. Do not mention or refer to the word "context", "based on context", "provided information", |
| 23 | +"Information given to me" or similar phrases in your responses. |
| 24 | +3. you will answer questions only related to OWASP and within the scope of OWASP. |
| 25 | +4. Be concise and directly answer the user's query. |
| 26 | +5. Provide the necessary link if the context contains a URL. |
| 27 | +6. If there is any query based on location, you need to look for latitude and longitude in the |
| 28 | +context and provide the nearest OWASP chapter based on that. |
| 29 | +7. You can ask for more information if the query is very personalized or user-centric. |
| 30 | +8. after trying all of the above, If the context does not contain the information or you think that |
| 31 | +it is out of scope for OWASP, you MUST state: "please ask question related to OWASP." |
| 32 | +""" |
| 33 | + TEMPERATURE = 0.4 |
| 34 | + |
| 35 | + def __init__(self, chat_model: str = "gpt-4o"): |
| 36 | + """Initialize the Generator. |
| 37 | +
|
| 38 | + Args: |
| 39 | + chat_model (str): The name of the OpenAI chat model to use for generation. |
| 40 | +
|
| 41 | + Raises: |
| 42 | + ValueError: If the OpenAI API key is not set. |
| 43 | +
|
| 44 | + """ |
| 45 | + if not (openai_api_key := os.getenv("DJANGO_OPEN_AI_SECRET_KEY")): |
| 46 | + error_msg = "DJANGO_OPEN_AI_SECRET_KEY environment variable not set" |
| 47 | + raise ValueError(error_msg) |
| 48 | + |
| 49 | + self.chat_model = chat_model |
| 50 | + self.openai_client = openai.OpenAI(api_key=openai_api_key) |
| 51 | + logger.info("Generator initialized with chat model: %s", self.chat_model) |
| 52 | + |
| 53 | + def prepare_context(self, context_chunks: list[dict[str, Any]]) -> str: |
| 54 | + """Format the list of retrieved context chunks into a single string for the LLM. |
| 55 | +
|
| 56 | + Args: |
| 57 | + context_chunks: A list of chunk dictionaries from the retriever. |
| 58 | +
|
| 59 | + Returns: |
| 60 | + A formatted string containing the context. |
| 61 | +
|
| 62 | + """ |
| 63 | + if not context_chunks: |
| 64 | + return "No context provided" |
| 65 | + |
| 66 | + formatted_context = [] |
| 67 | + for i, chunk in enumerate(context_chunks): |
| 68 | + source_name = chunk.get("source_name", f"Unknown Source {i + 1}") |
| 69 | + text = chunk.get("text", "") |
| 70 | + |
| 71 | + context_block = f"Source Name: {source_name}\nContent: {text}" |
| 72 | + formatted_context.append(context_block) |
| 73 | + |
| 74 | + return "\n\n---\n\n".join(formatted_context) |
| 75 | + |
| 76 | + def generate_answer(self, query: str, context_chunks: list[dict[str, Any]]) -> str: |
| 77 | + """Generate an answer to the user's query using provided context chunks. |
| 78 | +
|
| 79 | + Args: |
| 80 | + query: The user's query text. |
| 81 | + context_chunks: A list of context chunks retrieved by the retriever. |
| 82 | +
|
| 83 | + Returns: |
| 84 | + The generated answer as a string. |
| 85 | +
|
| 86 | + """ |
| 87 | + formatted_context = self.prepare_context(context_chunks) |
| 88 | + |
| 89 | + user_prompt = f""" |
| 90 | +- You are an assistant for question-answering tasks related to OWASP. |
| 91 | +- Use the following pieces of retrieved context to answer the question. |
| 92 | +- If the question is related to OWASP then you can try to answer based on your knowledge, if you |
| 93 | +don't know the answer, just say that you don't know. |
| 94 | +- Try to give answer and keep the answer concise, but you really think that the response will be |
| 95 | +longer and better you will provide more information. |
| 96 | +- Ask for the current location if the query is related to location. |
| 97 | +- Ask for the information you need if the query is very personalized or user-centric. |
| 98 | +- Do not mention or refer to the word "context", "based on context", "provided information", |
| 99 | +"Information given to me" or similar phrases in your responses. |
| 100 | +Question: {query} |
| 101 | +Context: {formatted_context} |
| 102 | +Answer: |
| 103 | +""" |
| 104 | + |
| 105 | + try: |
| 106 | + response = self.openai_client.chat.completions.create( |
| 107 | + model=self.chat_model, |
| 108 | + messages=[ |
| 109 | + {"role": "system", "content": self.SYSTEM_PROMPT}, |
| 110 | + {"role": "user", "content": user_prompt}, |
| 111 | + ], |
| 112 | + temperature=self.TEMPERATURE, |
| 113 | + max_tokens=self.MAX_TOKENS, |
| 114 | + ) |
| 115 | + answer = response.choices[0].message.content.strip() |
| 116 | + except openai.OpenAIError: |
| 117 | + logger.exception("OpenAI API error") |
| 118 | + answer = "I'm sorry, I'm currently unable to process your request." |
| 119 | + |
| 120 | + return answer |
0 commit comments