diff --git a/.cursor/rules/general-rule.mdc b/.cursor/rules/general-rule.mdc index 01ef0ac64..c130f8259 100644 --- a/.cursor/rules/general-rule.mdc +++ b/.cursor/rules/general-rule.mdc @@ -5,7 +5,6 @@ alwaysApply: true --- ## Rules to Follow -- You must always commit your changes whenever you update code. -- You must always try and write code that is well documented. (self or commented is fine) -- You must only work on a single feature at a time. -- You must explain your decisions thouroughly to the user. \ No newline at end of file +You always prefer to use branch development. Before writing any code - you create a feature branch to hold those changes. + +After you are done - provide instructions in a "MERGE.md" file that explains how to merge the changes back to main with both a GitHub PR route and a GitHub CLI route. \ No newline at end of file diff --git a/.gitignore b/.gitignore index 8b7ec41e3..531079543 100644 --- a/.gitignore +++ b/.gitignore @@ -163,3 +163,11 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ .vercel + +# Node.js +node_modules +.next + +# PDF filess +*.pdf + diff --git a/.vercelignore b/.vercelignore new file mode 100644 index 000000000..0b902433c --- /dev/null +++ b/.vercelignore @@ -0,0 +1,17 @@ +# Ignore development files +*.log +*.tmp +.DS_Store +.vscode/ +.idea/ + +# Ignore large files that aren't needed for deployment +*.ipynb +*.md +.git/ +.gitignore + +# Keep only essential files for deployment +!api/ +!frontend/ +!vercel.json diff --git a/MERGE.md b/MERGE.md new file mode 100644 index 000000000..08a5510aa --- /dev/null +++ b/MERGE.md @@ -0,0 +1,132 @@ +# PDF RAG System - Merge Instructions + +This document provides instructions for merging the PDF RAG functionality back to the main branch and deploying the application. + +## Changes Made + +### Backend (API) +- Added PDF upload endpoint (`/api/upload-pdf`) +- Added PDF text extraction using PyPDF2 +- Implemented simple RAG system with keyword-based search +- Updated chat endpoint to use RAG context when PDF is uploaded +- Added PyPDF2 dependency to requirements.txt + +### Frontend +- Added PDF upload UI in settings panel +- Added file upload handling with drag-and-drop interface +- Added upload status indicators +- Updated welcome message to mention PDF functionality +- Added new icons for file operations + +### Dependencies Added +- PyPDF2==3.0.1 (PDF text extraction) + +## Deployment Instructions + +### Option 1: GitHub Pull Request (Recommended) + +1. **Push the feature branch to GitHub:** + ```bash + git push origin feature/pdf-rag-system + ``` + +2. **Create a Pull Request:** + - Go to your GitHub repository + - Click "Compare & pull request" for the `feature/pdf-rag-system` branch + - Add a descriptive title: "Add PDF Upload and RAG Chat Functionality" + - Add description of the changes + - Assign reviewers if needed + - Click "Create pull request" + +3. **Merge the Pull Request:** + - Review the changes + - Click "Merge pull request" + - Delete the feature branch after merging + +4. **Deploy to Vercel:** + - Vercel will automatically detect the changes and redeploy + - Monitor the deployment in your Vercel dashboard + - **Set Environment Variable**: Add `OPENAI_API_KEY` for the backend + +### Option 2: GitHub CLI + +1. **Push the feature branch:** + ```bash + git push origin feature/pdf-rag-system + ``` + +2. **Create and merge PR using GitHub CLI:** + ```bash + # Create pull request + gh pr create --title "Add PDF Upload and RAG Chat Functionality" --body "Implements PDF upload and RAG-based chat using simple keyword matching" + + # Merge the pull request + gh pr merge --squash + ``` + +3. **Switch back to main and pull changes:** + ```bash + git checkout main + git pull origin main + ``` + +4. **Deploy to Vercel:** + - Vercel will automatically redeploy from the main branch + +## Testing the Deployment + +After deployment, test the following features: + +1. **Basic Chat:** + - Set OpenAI API key in settings + - Send a message without uploading PDF + - Verify normal chat functionality + +2. **PDF Upload:** + - Upload a PDF file in settings + - Verify successful upload message + - Check that file appears in the UI + +3. **RAG Chat:** + - Ask questions about the uploaded PDF content + - Verify responses are based on PDF content + - Test with questions not in the PDF (should get "I don't have enough information" response) + +## Environment Variables + +### For Vercel (Both Frontend and Backend): +- `OPENAI_API_KEY`: Your OpenAI API key (for production use) + +## File Structure + +``` +The-AI-Engineer-Challenge/ +├── api/ +│ ├── app.py (updated with RAG functionality) +│ └── requirements.txt (added PyPDF2) +├── frontend/ +│ └── app/page.tsx (updated with PDF upload UI) +└── vercel.json (unchanged - original configuration) +``` + +## Local Development + +For local testing, run: +```bash +# Backend +cd api +pip install -r requirements.txt +python app.py + +# Frontend (in another terminal) +cd frontend +npm install +npm run dev +``` + +## Notes + +- Uses simple keyword-based RAG instead of vector embeddings for Vercel compatibility +- PDF processing happens server-side for security +- RAG system uses in-memory storage (resets on server restart) +- **Frontend and Backend deploy together on Vercel** - this is the original working configuration \ No newline at end of file diff --git a/aimakerspace/__init__.py b/aimakerspace/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/aimakerspace/openai_utils/__init__.py b/aimakerspace/openai_utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/aimakerspace/openai_utils/chatmodel.py b/aimakerspace/openai_utils/chatmodel.py new file mode 100644 index 000000000..f55fbfe72 --- /dev/null +++ b/aimakerspace/openai_utils/chatmodel.py @@ -0,0 +1,66 @@ +import os +from typing import Any, AsyncIterator, Iterable, List, MutableMapping + +from dotenv import load_dotenv +from openai import AsyncOpenAI, OpenAI + +load_dotenv() + +ChatMessage = MutableMapping[str, Any] + + +class ChatOpenAI: + """Thin wrapper around the OpenAI chat completion APIs.""" + + def __init__(self, model_name: str = "gpt-4o-mini"): + self.model_name = model_name + self.openai_api_key = os.getenv("OPENAI_API_KEY") + if self.openai_api_key is None: + raise ValueError("OPENAI_API_KEY is not set") + + self._client = OpenAI() + self._async_client = AsyncOpenAI() + + def run( + self, + messages: Iterable[ChatMessage], + text_only: bool = True, + **kwargs: Any, + ) -> Any: + """Execute a chat completion request. + + ``messages`` must be an iterable of ``{"role": ..., "content": ...}`` + dictionaries. When ``text_only`` is ``True`` (the default) only the + completion text is returned; otherwise the full response object is + provided. + """ + + message_list = self._coerce_messages(messages) + response = self._client.chat.completions.create( + model=self.model_name, messages=message_list, **kwargs + ) + + if text_only: + return response.choices[0].message.content + + return response + + async def astream( + self, messages: Iterable[ChatMessage], **kwargs: Any + ) -> AsyncIterator[str]: + """Yield streaming completion chunks as they arrive from the API.""" + + message_list = self._coerce_messages(messages) + stream = await self._async_client.chat.completions.create( + model=self.model_name, messages=message_list, stream=True, **kwargs + ) + + async for chunk in stream: + content = chunk.choices[0].delta.content + if content is not None: + yield content + + def _coerce_messages(self, messages: Iterable[ChatMessage]) -> List[ChatMessage]: + if isinstance(messages, list): + return messages + return list(messages) diff --git a/aimakerspace/openai_utils/embedding.py b/aimakerspace/openai_utils/embedding.py new file mode 100644 index 000000000..e06016a89 --- /dev/null +++ b/aimakerspace/openai_utils/embedding.py @@ -0,0 +1,69 @@ +import asyncio +import os +from typing import Iterable, List + +from dotenv import load_dotenv +from openai import AsyncOpenAI, OpenAI + + +class EmbeddingModel: + """Helper for generating embeddings via the OpenAI API.""" + + def __init__(self, embeddings_model_name: str = "text-embedding-3-small"): + load_dotenv() + self.openai_api_key = os.getenv("OPENAI_API_KEY") + if self.openai_api_key is None: + raise ValueError( + "OPENAI_API_KEY environment variable is not set. " + "Please configure it with your OpenAI API key." + ) + + self.embeddings_model_name = embeddings_model_name + self.async_client = AsyncOpenAI() + self.client = OpenAI() + + async def async_get_embeddings(self, list_of_text: Iterable[str]) -> List[List[float]]: + """Return embeddings for ``list_of_text`` using the async client.""" + + embedding_response = await self.async_client.embeddings.create( + input=list(list_of_text), model=self.embeddings_model_name + ) + + return [item.embedding for item in embedding_response.data] + + async def async_get_embedding(self, text: str) -> List[float]: + """Return an embedding for a single text using the async client.""" + + embedding = await self.async_client.embeddings.create( + input=text, model=self.embeddings_model_name + ) + + return embedding.data[0].embedding + + def get_embeddings(self, list_of_text: Iterable[str]) -> List[List[float]]: + """Return embeddings for ``list_of_text`` using the sync client.""" + + embedding_response = self.client.embeddings.create( + input=list(list_of_text), model=self.embeddings_model_name + ) + + return [item.embedding for item in embedding_response.data] + + def get_embedding(self, text: str) -> List[float]: + """Return an embedding for a single text using the sync client.""" + + embedding = self.client.embeddings.create( + input=text, model=self.embeddings_model_name + ) + + return embedding.data[0].embedding + + +if __name__ == "__main__": + embedding_model = EmbeddingModel() + print(asyncio.run(embedding_model.async_get_embedding("Hello, world!"))) + print( + asyncio.run( + embedding_model.async_get_embeddings(["Hello, world!", "Goodbye, world!"]) + ) + ) diff --git a/aimakerspace/openai_utils/prompts.py b/aimakerspace/openai_utils/prompts.py new file mode 100644 index 000000000..b36f750c4 --- /dev/null +++ b/aimakerspace/openai_utils/prompts.py @@ -0,0 +1,60 @@ +import re +from typing import Any, Dict, List + + +class BasePrompt: + """Simple string template helper used to format prompt text.""" + + def __init__(self, prompt: str): + self.prompt = prompt + self._pattern = re.compile(r"\{([^}]+)\}") + + def format_prompt(self, **kwargs: Any) -> str: + """Return the prompt with ``kwargs`` substituted for placeholders.""" + + matches = self._pattern.findall(self.prompt) + replacements = {match: kwargs.get(match, "") for match in matches} + return self.prompt.format(**replacements) + + def get_input_variables(self) -> List[str]: + """Return the placeholder names used by this prompt.""" + + return self._pattern.findall(self.prompt) + + +class RolePrompt(BasePrompt): + """Prompt template that also captures an accompanying chat role.""" + + def __init__(self, prompt: str, role: str): + super().__init__(prompt) + self.role = role + + def create_message(self, apply_format: bool = True, **kwargs: Any) -> Dict[str, str]: + """Build an OpenAI chat message dictionary for this prompt.""" + + content = self.format_prompt(**kwargs) if apply_format else self.prompt + return {"role": self.role, "content": content} + + +class SystemRolePrompt(RolePrompt): + def __init__(self, prompt: str): + super().__init__(prompt, "system") + + +class UserRolePrompt(RolePrompt): + def __init__(self, prompt: str): + super().__init__(prompt, "user") + + +class AssistantRolePrompt(RolePrompt): + def __init__(self, prompt: str): + super().__init__(prompt, "assistant") + + +if __name__ == "__main__": + prompt = BasePrompt("Hello {name}, you are {age} years old") + print(prompt.format_prompt(name="John", age=30)) + + prompt = SystemRolePrompt("Hello {name}, you are {age} years old") + print(prompt.create_message(name="John", age=30)) + print(prompt.get_input_variables()) diff --git a/aimakerspace/text_utils.py b/aimakerspace/text_utils.py new file mode 100644 index 000000000..fa9b5d1e5 --- /dev/null +++ b/aimakerspace/text_utils.py @@ -0,0 +1,147 @@ +from pathlib import Path +from typing import Iterable, List + +import PyPDF2 + + +class TextFileLoader: + """Load plain-text documents from a single file or an entire directory.""" + + def __init__(self, path: str, encoding: str = "utf-8"): + self.path = Path(path) + self.encoding = encoding + self.documents: List[str] = [] + + def load(self) -> None: + """Populate ``self.documents`` from the configured path.""" + + self.documents = list(self._iter_documents()) + + def load_file(self) -> None: + """Load a single file specified by ``self.path``.""" + + self.documents = [self._read_text_file(self.path)] + + def load_directory(self) -> None: + """Load all text files contained within ``self.path``.""" + + self.documents = list(self._iter_directory(self.path)) + + def load_documents(self) -> List[str]: + """Convenience wrapper returning the loaded documents.""" + + self.load() + return self.documents + + def _iter_documents(self) -> Iterable[str]: + if self.path.is_dir(): + yield from self._iter_directory(self.path) + elif self.path.is_file() and self.path.suffix.lower() == ".txt": + yield self._read_text_file(self.path) + else: + raise ValueError( + "Provided path must be a directory or a .txt file: " f"{self.path}" + ) + + def _iter_directory(self, directory: Path) -> Iterable[str]: + for entry in sorted(directory.rglob("*.txt")): + if entry.is_file(): + yield self._read_text_file(entry) + + def _read_text_file(self, file_path: Path) -> str: + with file_path.open("r", encoding=self.encoding) as file_handle: + return file_handle.read() + + +class CharacterTextSplitter: + """Naively split long strings into overlapping character chunks.""" + + def __init__( + self, + chunk_size: int = 1000, + chunk_overlap: int = 200, + ): + if chunk_size <= chunk_overlap: + raise ValueError("Chunk size must be greater than chunk overlap") + + self.chunk_size = chunk_size + self.chunk_overlap = chunk_overlap + + def split(self, text: str) -> List[str]: + """Split ``text`` into chunks preserving the configured overlap.""" + + step = self.chunk_size - self.chunk_overlap + return [text[i : i + self.chunk_size] for i in range(0, len(text), step)] + + def split_texts(self, texts: List[str]) -> List[str]: + """Split multiple texts and flatten the resulting chunks.""" + + chunks: List[str] = [] + for text in texts: + chunks.extend(self.split(text)) + return chunks + + +class PDFLoader: + """Extract text from PDF files stored at a path.""" + + def __init__(self, path: str): + self.path = Path(path) + self.documents: List[str] = [] + + def load(self) -> None: + """Populate ``self.documents`` from the configured path.""" + + self.documents = list(self._iter_documents()) + + def load_file(self) -> None: + """Load a single PDF specified by ``self.path``.""" + + self.documents = [self._read_pdf(self.path)] + + def load_directory(self) -> None: + """Load all PDF files contained within ``self.path``.""" + + self.documents = list(self._iter_directory(self.path)) + + def load_documents(self) -> List[str]: + """Convenience wrapper returning the loaded documents.""" + + self.load() + return self.documents + + def _iter_documents(self) -> Iterable[str]: + if self.path.is_dir(): + yield from self._iter_directory(self.path) + elif self.path.is_file() and self.path.suffix.lower() == ".pdf": + yield self._read_pdf(self.path) + else: + raise ValueError( + "Provided path must be a directory or a .pdf file: " f"{self.path}" + ) + + def _iter_directory(self, directory: Path) -> Iterable[str]: + for entry in sorted(directory.rglob("*.pdf")): + if entry.is_file(): + yield self._read_pdf(entry) + + def _read_pdf(self, file_path: Path) -> str: + with file_path.open("rb") as file_handle: + pdf_reader = PyPDF2.PdfReader(file_handle) + extracted_pages = [page.extract_text() or "" for page in pdf_reader.pages] + return "\n".join(extracted_pages) + + +if __name__ == "__main__": + loader = TextFileLoader("data/KingLear.txt") + loader.load() + splitter = CharacterTextSplitter() + chunks = splitter.split_texts(loader.documents) + print(len(chunks)) + print(chunks[0]) + print("--------") + print(chunks[1]) + print("--------") + print(chunks[-2]) + print("--------") + print(chunks[-1]) diff --git a/aimakerspace/vectordatabase.py b/aimakerspace/vectordatabase.py new file mode 100644 index 000000000..1eb32c1e1 --- /dev/null +++ b/aimakerspace/vectordatabase.py @@ -0,0 +1,105 @@ +import asyncio +from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union + +import numpy as np + +from aimakerspace.openai_utils.embedding import EmbeddingModel + + +def cosine_similarity(vector_a: np.ndarray, vector_b: np.ndarray) -> float: + """Return the cosine similarity between two vectors.""" + + norm_a = np.linalg.norm(vector_a) + norm_b = np.linalg.norm(vector_b) + if norm_a == 0 or norm_b == 0: + return 0.0 + + dot_product = np.dot(vector_a, vector_b) + return float(dot_product / (norm_a * norm_b)) + + +class VectorDatabase: + """Minimal in-memory vector store backed by numpy arrays.""" + + def __init__(self, embedding_model: Optional[EmbeddingModel] = None): + self.vectors: Dict[str, np.ndarray] = {} + self.embedding_model = embedding_model or EmbeddingModel() + + def insert(self, key: str, vector: Iterable[float]) -> None: + """Store ``vector`` so that it can be retrieved with ``key`` later on.""" + + self.vectors[key] = np.asarray(vector, dtype=float) + + def search( + self, + query_vector: Iterable[float], + k: int, + distance_measure: Callable[[np.ndarray, np.ndarray], float] = cosine_similarity, + ) -> List[Tuple[str, float]]: + """Return the ``k`` vectors most similar to ``query_vector``.""" + + if k <= 0: + raise ValueError("k must be a positive integer") + + query = np.asarray(query_vector, dtype=float) + scores = [ + (key, distance_measure(query, vector)) + for key, vector in self.vectors.items() + ] + scores.sort(key=lambda item: item[1], reverse=True) + return scores[:k] + + def search_by_text( + self, + query_text: str, + k: int, + distance_measure: Callable[[np.ndarray, np.ndarray], float] = cosine_similarity, + return_as_text: bool = False, + ) -> Union[List[Tuple[str, float]], List[str]]: + """Vector search using an embedding generated from ``query_text``.""" + + query_vector = self.embedding_model.get_embedding(query_text) + results = self.search(query_vector, k, distance_measure) + if return_as_text: + return [result[0] for result in results] + return results + + def retrieve_from_key(self, key: str) -> Optional[np.ndarray]: + """Return the stored vector for ``key`` if present.""" + + return self.vectors.get(key) + + async def abuild_from_list(self, list_of_text: List[str]) -> "VectorDatabase": + """Populate the vector store asynchronously from raw text snippets.""" + + embeddings = await self.embedding_model.async_get_embeddings(list_of_text) + for text, embedding in zip(list_of_text, embeddings): + self.insert(text, embedding) + return self + + +if __name__ == "__main__": + list_of_text = [ + "I like to eat broccoli and bananas.", + "I ate a banana and spinach smoothie for breakfast.", + "Chinchillas and kittens are cute.", + "My sister adopted a kitten yesterday.", + "Look at this cute hamster munching on a piece of broccoli.", + ] + + vector_db = VectorDatabase() + vector_db = asyncio.run(vector_db.abuild_from_list(list_of_text)) + k = 2 + + searched_vector = vector_db.search_by_text("I think fruit is awesome!", k=k) + print(f"Closest {k} vector(s):", searched_vector) + + retrieved_vector = vector_db.retrieve_from_key( + "I like to eat broccoli and bananas." + ) + print("Retrieved vector:", retrieved_vector) + + relevant_texts = vector_db.search_by_text( + "I think fruit is awesome!", k=k, return_as_text=True + ) + print(f"Closest {k} text(s):", relevant_texts) diff --git a/api/app.py b/api/app.py index 4fe8d0ba8..ea3514eb6 100644 --- a/api/app.py +++ b/api/app.py @@ -1,5 +1,5 @@ # Import required FastAPI components for building the API -from fastapi import FastAPI, HTTPException +from fastapi import FastAPI, HTTPException, UploadFile, File, Header from fastapi.responses import StreamingResponse from fastapi.middleware.cors import CORSMiddleware # Import Pydantic for data validation and settings management @@ -7,11 +7,17 @@ # Import OpenAI client for interacting with OpenAI's API from openai import OpenAI import os -from typing import Optional +import PyPDF2 +import io +from typing import Optional, List # Initialize FastAPI application with a title app = FastAPI(title="OpenAI Chat API") +# Global variables for RAG system +pdf_chunks = [] +pdf_text = "" + # Configure CORS (Cross-Origin Resource Sharing) middleware # This allows the API to be accessed from different domains/origins app.add_middleware( @@ -28,24 +34,150 @@ class ChatRequest(BaseModel): developer_message: str # Message from the developer/system user_message: str # Message from the user model: Optional[str] = "gpt-4.1-mini" # Optional model selection with default - api_key: str # OpenAI API key for authentication + +class UploadResponse(BaseModel): + message: str + success: bool + +class Flashcard(BaseModel): + question: str + answer: str + +class FlashcardResponse(BaseModel): + flashcards: List[Flashcard] + success: bool + +def extract_text_from_pdf(pdf_file: bytes) -> str: + """Extract text from PDF file bytes""" + try: + pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file)) + text = "" + for page in pdf_reader.pages: + text += page.extract_text() + "\n" + return text + except Exception as e: + raise HTTPException(status_code=400, detail=f"Error extracting text from PDF: {str(e)}") + +def build_rag_system(text: str) -> list: + """Build simple RAG system from PDF text""" + try: + # Simple text chunking + chunk_size = 1000 + chunks = [] + + for i in range(0, len(text), chunk_size): + chunk = text[i:i + chunk_size] + chunks.append(chunk) + + return chunks + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error building RAG system: {str(e)}") + +def find_relevant_chunks(query: str, chunks: list, k: int = 3) -> list: + """Simple keyword-based search for relevant chunks""" + query_words = query.lower().split() + chunk_scores = [] + + for i, chunk in enumerate(chunks): + chunk_lower = chunk.lower() + score = sum(1 for word in query_words if word in chunk_lower) + chunk_scores.append((i, score, chunk)) + + # Sort by score and return top k + chunk_scores.sort(key=lambda x: x[1], reverse=True) + return [chunk for _, _, chunk in chunk_scores[:k]] + +# PDF Upload endpoint +@app.post("/api/upload-pdf", response_model=UploadResponse) +async def upload_pdf(file: UploadFile = File(...), authorization: str = Header(None)): + global pdf_chunks, pdf_text + + # Extract API key from Authorization header + api_key = None + if authorization and authorization.startswith('Bearer '): + api_key = authorization[7:] # Remove 'Bearer ' prefix + + if not api_key: + raise HTTPException(status_code=400, detail="API key is required") + + if not file.filename.endswith('.pdf'): + raise HTTPException(status_code=400, detail="Only PDF files are allowed") + + try: + # Read PDF file + pdf_content = await file.read() + + # Extract text from PDF + pdf_text = extract_text_from_pdf(pdf_content) + + if not pdf_text.strip(): + raise HTTPException(status_code=400, detail="No text found in PDF") + + # Build simple RAG system + pdf_chunks = build_rag_system(pdf_text) + + return UploadResponse( + message=f"PDF uploaded successfully! Extracted {len(pdf_text)} characters and created {len(pdf_chunks)} chunks.", + success=True + ) + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) # Define the main chat endpoint that handles POST requests @app.post("/api/chat") -async def chat(request: ChatRequest): +async def chat(request: ChatRequest, authorization: str = Header(None)): + global pdf_chunks + + # Extract API key from Authorization header + api_key = None + if authorization and authorization.startswith('Bearer '): + api_key = authorization[7:] # Remove 'Bearer ' prefix + + if not api_key: + raise HTTPException(status_code=400, detail="API key is required") + try: # Initialize OpenAI client with the provided API key - client = OpenAI(api_key=request.api_key) + client = OpenAI(api_key=api_key) # Create an async generator function for streaming responses async def generate(): + # If we have PDF chunks (PDF uploaded), use RAG + if pdf_chunks: + # Search for relevant context using simple keyword matching + relevant_chunks = find_relevant_chunks(request.user_message, pdf_chunks, k=3) + context = "\n\n".join(relevant_chunks) + + # Create enhanced system message with context + enhanced_system_message = f"""{request.developer_message} + +IMPORTANT: You must ONLY answer questions using information from the provided context below. If the answer is not in the context, say "I don't have enough information in the provided document to answer that question." + +When the user asks to focus quizzes on a section/topic, begin your reply with a single control line: +CONTROL: {{"action":"set_topic","topic":""}} +After that line, provide your normal, helpful answer. + +If there is no topic change, do not output a CONTROL line. + +Context from uploaded document: +{context}""" + + messages = [ + {"role": "system", "content": enhanced_system_message}, + {"role": "user", "content": request.user_message} + ] + else: + # No PDF uploaded, use original behavior + messages = [ + {"role": "system", "content": request.developer_message}, + {"role": "user", "content": request.user_message} + ] + # Create a streaming chat completion request stream = client.chat.completions.create( model=request.model, - messages=[ - {"role": "developer", "content": request.developer_message}, - {"role": "user", "content": request.user_message} - ], + messages=messages, stream=True # Enable streaming response ) @@ -66,8 +198,108 @@ async def generate(): async def health_check(): return {"status": "ok"} +# Flashcard generation endpoint +@app.post("/api/flashcards", response_model=FlashcardResponse) +async def generate_flashcards(authorization: str = Header(None)): + global pdf_text, pdf_chunks + + # Extract API key from Authorization header + api_key = None + if authorization and authorization.startswith('Bearer '): + api_key = authorization[7:] # Remove 'Bearer ' prefix + + if not api_key: + raise HTTPException(status_code=400, detail="API key is required") + + if not pdf_text or not pdf_text.strip(): + raise HTTPException(status_code=400, detail="No PDF has been uploaded yet. Please upload a PDF first.") + + try: + # Initialize OpenAI client + client = OpenAI(api_key=api_key) + + # Create a prompt for flashcard generation + flashcard_prompt = f"""Based on the following document content, generate 8-10 educational flashcards in Q&A format. Each flashcard should have a clear, specific question and a comprehensive answer. + +Document content: +{pdf_text[:4000]} # Limit to first 4000 characters to avoid token limits + +Generate flashcards that: +1. Cover the main topics and concepts from the document +2. Have clear, specific questions +3. Provide detailed, accurate answers +4. Are educational and useful for studying + +Return the flashcards in this exact JSON format: +[ + {{"question": "What is...?", "answer": "The answer is..."}}, + {{"question": "How does...?", "answer": "The process involves..."}} +] + +Only return the JSON array, no other text.""" + + # Generate flashcards using OpenAI + response = client.chat.completions.create( + model="gpt-4.1-mini", + messages=[ + {"role": "system", "content": "You are an educational assistant that creates high-quality flashcards from document content. Always respond with valid JSON only."}, + {"role": "user", "content": flashcard_prompt} + ], + temperature=0.7, + max_tokens=2000 + ) + + # Parse the response + flashcard_text = response.choices[0].message.content.strip() + + # Clean up the response (remove any markdown formatting) + if flashcard_text.startswith("```json"): + flashcard_text = flashcard_text[7:] + if flashcard_text.endswith("```"): + flashcard_text = flashcard_text[:-3] + + # Parse JSON + import json + try: + flashcard_data = json.loads(flashcard_text) + + # Validate and format the flashcards + flashcards = [] + for item in flashcard_data: + if isinstance(item, dict) and "question" in item and "answer" in item: + flashcards.append(Flashcard( + question=item["question"].strip(), + answer=item["answer"].strip() + )) + + if len(flashcards) < 3: + raise ValueError("Not enough valid flashcards generated") + + return FlashcardResponse( + flashcards=flashcards, + success=True + ) + + except (json.JSONDecodeError, ValueError) as e: + # Fallback: create simple flashcards from document chunks + flashcards = [] + for i, chunk in enumerate(pdf_chunks[:8]): + if len(chunk.strip()) > 50: # Only use substantial chunks + flashcards.append(Flashcard( + question=f"What is mentioned about: {chunk[:100]}...?", + answer=chunk[:300] + "..." if len(chunk) > 300 else chunk + )) + + return FlashcardResponse( + flashcards=flashcards, + success=True + ) + + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error generating flashcards: {str(e)}") + # Entry point for running the application directly if __name__ == "__main__": import uvicorn # Start the server on all network interfaces (0.0.0.0) on port 8000 - uvicorn.run(app, host="0.0.0.0", port=8000) + uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/api/requirements.txt b/api/requirements.txt index f2d9a1cbc..0ba22ca23 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -2,4 +2,5 @@ fastapi==0.115.12 uvicorn==0.34.2 openai==1.77.0 pydantic==2.11.4 -python-multipart==0.0.18 \ No newline at end of file +python-multipart==0.0.18 +PyPDF2==3.0.1 \ No newline at end of file diff --git a/frontend/README.md b/frontend/README.md index 56347bab6..fa5d2a833 100644 --- a/frontend/README.md +++ b/frontend/README.md @@ -1,3 +1,91 @@ -### Front End +# AI Engineer Challenge Frontend -Please populate this README with instructions on how to run the application! \ No newline at end of file +A beautiful, modern chat interface for your LLM application built with Next.js, TypeScript, and Tailwind CSS. + +## 🚀 Quick Start + +### Prerequisites + +- Node.js (version 18 or higher) +- npm or yarn +- The FastAPI backend running on `http://localhost:8000` + +### Installation + +1. Navigate to the frontend directory: + ```bash + cd frontend + ``` + +2. Install dependencies: + ```bash + npm install + ``` + +3. Start the development server: + ```bash + npm run dev + ``` + +4. Open your browser and navigate to `http://localhost:3000` + +## 🎨 Features + +- **Modern UI**: Beautiful glass-morphism design with smooth animations +- **Real-time Streaming**: Watch AI responses stream in real-time +- **Settings Panel**: Configure your OpenAI API key, system prompt, and model +- **Responsive Design**: Works perfectly on desktop and mobile devices +- **TypeScript**: Full type safety for better development experience +- **Tailwind CSS**: Utility-first CSS framework for rapid styling + +## 🔧 Configuration + +Before using the chat, you need to: + +1. Click the settings icon (⚙️) in the top-right corner +2. Enter your OpenAI API key +3. Optionally customize the system prompt and model +4. Start chatting! + +## 🏗️ Project Structure + +``` +frontend/ +├── app/ +│ ├── globals.css # Global styles and Tailwind imports +│ ├── layout.tsx # Root layout component +│ └── page.tsx # Main chat interface +├── package.json # Dependencies and scripts +├── tailwind.config.js # Tailwind CSS configuration +├── tsconfig.json # TypeScript configuration +└── README.md # This file +``` + +## 🚀 Deployment + +This frontend is ready to be deployed to Vercel: + +1. Push your code to GitHub +2. Connect your repository to Vercel +3. Deploy with one click! + +## 🔗 Backend Integration + +This frontend integrates with the FastAPI backend located in the `/api` directory. Make sure the backend is running on `http://localhost:8000` before using the frontend. + +The frontend communicates with the following backend endpoints: +- `POST /api/chat` - Send messages and receive streaming responses +- `GET /api/health` - Health check endpoint + +## 🎯 Next Steps + +1. Start the backend server (see `/api/README.md`) +2. Install frontend dependencies and start the dev server +3. Configure your OpenAI API key +4. Start building amazing AI applications! + +## 🐛 Troubleshooting + +- **CORS Errors**: Make sure the backend CORS settings allow requests from `http://localhost:3000` +- **API Key Issues**: Verify your OpenAI API key is valid and has sufficient credits +- **Build Errors**: Ensure all dependencies are installed with `npm install` \ No newline at end of file diff --git a/frontend/app/globals.css b/frontend/app/globals.css new file mode 100644 index 000000000..7d30b2028 --- /dev/null +++ b/frontend/app/globals.css @@ -0,0 +1,129 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; + +@layer base { + html { + font-family: 'Inter', system-ui, sans-serif; + } + + body { + @apply bg-black-950 text-white min-h-screen; + } +} + +@layer components { + .glass-effect { + @apply bg-black-900/80 backdrop-blur-sm border border-black-800/20; + } + + /* 3D Flip Card Styles */ + .perspective-1000 { + perspective: 1000px; + } + + .transform-style-preserve-3d { + transform-style: preserve-3d; + } + + .backface-hidden { + backface-visibility: hidden; + } + + .rotate-y-180 { + transform: rotateY(180deg); + } + + /* Flashcard hover effects */ + .flashcard-container:hover { + transform: translateY(-2px); + box-shadow: 0 10px 25px rgba(0, 0, 0, 0.3); + } + + /* Mobile touch optimization */ + @media (max-width: 768px) { + .perspective-1000 { + perspective: 800px; + } + } + + /* Text truncation utility */ + .line-clamp-6 { + display: -webkit-box; + -webkit-line-clamp: 6; + -webkit-box-orient: vertical; + overflow: hidden; + } + + .line-clamp-4 { + display: -webkit-box; + -webkit-line-clamp: 4; + -webkit-box-orient: vertical; + overflow: hidden; + } + + /* Modal animations */ + .modal-enter { + opacity: 0; + transform: scale(0.9); + } + + .modal-enter-active { + opacity: 1; + transform: scale(1); + transition: opacity 200ms ease-out, transform 200ms ease-out; + } + + .gradient-text { + @apply bg-gradient-to-r from-cookie-500 to-cookie-700 bg-clip-text text-transparent; + } + + .chat-bubble { + @apply max-w-3xl mx-auto p-4 rounded-2xl shadow-lg; + } + + .user-bubble { + @apply bg-cookie-600 text-black-950 ml-auto; + } + + .ai-bubble { + @apply bg-black-900 text-white mr-auto border border-black-800; + } + + .typing-indicator { + @apply flex space-x-1 p-4; + } + + .typing-dot { + @apply w-2 h-2 bg-cookie-400 rounded-full animate-pulse; + } + + /* Study card flip animation */ + .study-card { + transform-style: preserve-3d; + transition: transform 0.6s; + } + + .study-card.flipped { + transform: rotateY(180deg); + } + + .backface-hidden { + backface-visibility: hidden; + } + + .rotate-y-180 { + transform: rotateY(180deg); + } +} + +@layer utilities { + .scrollbar-hide { + -ms-overflow-style: none; + scrollbar-width: none; + } + + .scrollbar-hide::-webkit-scrollbar { + display: none; + } +} \ No newline at end of file diff --git a/frontend/app/layout.tsx b/frontend/app/layout.tsx new file mode 100644 index 000000000..645451970 --- /dev/null +++ b/frontend/app/layout.tsx @@ -0,0 +1,27 @@ +import React from 'react' +import type { Metadata } from 'next' +import { Inter } from 'next/font/google' +import './globals.css' + +const inter = Inter({ subsets: ['latin'] }) + +export const metadata: Metadata = { + title: 'CookiesPDF - Smart PDF Learning Assistant', + description: 'AI-powered PDF chat and flashcard generation for smart learning', +} + +export default function RootLayout({ + children, +}: { + children: React.ReactNode +}) { + return ( + + +
+ {children} +
+ + + ) +} \ No newline at end of file diff --git a/frontend/app/page.tsx b/frontend/app/page.tsx new file mode 100644 index 000000000..03d559ab3 --- /dev/null +++ b/frontend/app/page.tsx @@ -0,0 +1,1062 @@ +'use client' + +import React, { useState, useRef, useEffect } from 'react' +import { Send, Bot, User, Settings, Sparkles, Loader2, Upload, FileText, X, Plus, CheckCircle, XCircle, HelpCircle } from 'lucide-react' + +interface Message { + id: string + content: string + role: 'user' | 'assistant' + timestamp: Date + citations?: Array<{ + docName: string + page: number + snippet: string + }> +} + +interface Flashcard { + question: string + answer: string +} + +// Flashcard Component +const FlashcardComponent: React.FC<{ + card: Flashcard, + onViewFull: (card: Flashcard) => void +}> = ({ card, onViewFull }) => { + const [isFlipped, setIsFlipped] = useState(false) + + const handleCardClick = (e: React.MouseEvent) => { + e.stopPropagation() + setIsFlipped(!isFlipped) + } + + const handleViewFullClick = (e: React.MouseEvent) => { + e.stopPropagation() + onViewFull(card) + } + + return ( +
+
+ {/* Front of card (Question) */} +
+
+
Q:
+
+ {card.question} +
+
+ Click to reveal answer +
+
+
+ + {/* Back of card (Answer Preview) */} +
+
+
A:
+
+ {card.answer} +
+ +
+ Click to see question +
+
+
+
+
+ ) +} + +// Get API URL from environment or default to localhost +const getApiUrl = () => { + if (typeof window !== 'undefined') { + // Client-side: use current origin for API calls + return `${window.location.origin}/api` + } + // Server-side: use environment variable or default + return process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000/api' +} + +export default function Home() { + const [messages, setMessages] = useState([]) + const [inputMessage, setInputMessage] = useState('') + const [isLoading, setIsLoading] = useState(false) + const [apiKey, setApiKey] = useState('') + const [developerMessage, setDeveloperMessage] = useState('You are a helpful AI assistant.') + const [model, setModel] = useState('gpt-4.1-mini') + const [showSettings, setShowSettings] = useState(false) + const [uploadedFile, setUploadedFile] = useState(null) + const [isUploading, setIsUploading] = useState(false) + const [uploadStatus, setUploadStatus] = useState('') + const [flashcards, setFlashcards] = useState([]) + const [isGeneratingFlashcards, setIsGeneratingFlashcards] = useState(false) + const [showFlashcards, setShowFlashcards] = useState(false) + const [selectedFlashcard, setSelectedFlashcard] = useState(null) + const [showModal, setShowModal] = useState(false) + const [currentCardIndex, setCurrentCardIndex] = useState(0) + const [studyMode, setStudyMode] = useState(true) + const [selectedText, setSelectedText] = useState('') + const [showAddFlashcard, setShowAddFlashcard] = useState(false) + const [addFlashcardPosition, setAddFlashcardPosition] = useState({ x: 0, y: 0 }) + const [studyProgress, setStudyProgress] = useState(0) + const [activeTopic, setActiveTopic] = useState(null) + const [isStudyPanelOpen, setIsStudyPanelOpen] = useState(false) + const [sessionCards, setSessionCards] = useState([]) + const [gradedCount, setGradedCount] = useState(0) + const [showDone, setShowDone] = useState(false) + const messagesEndRef = useRef(null) + const fileInputRef = useRef(null) + const textSelectionRef = useRef(null) + + const scrollToBottom = () => { + messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' }) + } + + useEffect(() => { + scrollToBottom() + }, [messages]) + + useEffect(() => { + const handleClickOutside = () => { + setShowAddFlashcard(false) + } + + if (showAddFlashcard) { + document.addEventListener('click', handleClickOutside) + return () => document.removeEventListener('click', handleClickOutside) + } + }, [showAddFlashcard]) + + useEffect(() => { + const handleKeyDown = (e: KeyboardEvent) => { + if (e.key === 'Escape' && isStudyPanelOpen) { + setIsStudyPanelOpen(false) + } + } + + document.addEventListener('keydown', handleKeyDown) + return () => document.removeEventListener('keydown', handleKeyDown) + }, [isStudyPanelOpen]) + + // Handle topic changes - reset session + useEffect(() => { + if (isStudyPanelOpen && flashcards.length > 0) { + createSessionSnapshot() + if (activeTopic) { + // Simple toast notification + const toast = document.createElement('div') + toast.className = 'fixed top-4 right-4 bg-blue-600 text-white px-4 py-2 rounded-lg shadow-lg z-50' + toast.textContent = `Topic changed—session reset` + document.body.appendChild(toast) + setTimeout(() => { + document.body.removeChild(toast) + }, 3000) + } + } + }, [activeTopic, isStudyPanelOpen, flashcards]) + + // Handle panel opening - create new session + useEffect(() => { + if (isStudyPanelOpen && flashcards.length > 0) { + createSessionSnapshot() + } + }, [isStudyPanelOpen]) + + const handleFileUpload = async (file: File) => { + if (!file.name.endsWith('.pdf')) { + setUploadStatus('Please select a PDF file') + return + } + + if (!apiKey.trim()) { + setUploadStatus('Please set your API key first') + return + } + + setIsUploading(true) + setUploadStatus('Uploading and processing PDF...') + + try { + const formData = new FormData() + formData.append('file', file) + + const apiUrl = getApiUrl() + const response = await fetch(`${apiUrl}/upload-pdf`, { + method: 'POST', + headers: { + 'Authorization': `Bearer ${apiKey}`, + }, + body: formData, + }) + + if (!response.ok) { + const errorData = await response.json() + throw new Error(errorData.detail || 'Upload failed') + } + + const result = await response.json() + setUploadedFile(file) + setUploadStatus(result.message) + // Clear previous flashcards when new PDF is uploaded + setFlashcards([]) + setShowFlashcards(false) + } catch (error) { + console.error('Upload error:', error) + setUploadStatus(`Upload failed: ${error instanceof Error ? error.message : 'Unknown error'}`) + } finally { + setIsUploading(false) + } + } + + const handleFileSelect = (e: React.ChangeEvent) => { + const file = e.target.files?.[0] + if (file) { + handleFileUpload(file) + } + } + + const removeUploadedFile = () => { + setUploadedFile(null) + setUploadStatus('') + setFlashcards([]) + setShowFlashcards(false) + if (fileInputRef.current) { + fileInputRef.current.value = '' + } + } + + const generateFlashcards = async () => { + if (!apiKey.trim()) { + alert('Please set your API key first') + return + } + + setIsGeneratingFlashcards(true) + try { + const apiUrl = getApiUrl() + const response = await fetch(`${apiUrl}/flashcards`, { + method: 'POST', + headers: { + 'Authorization': `Bearer ${apiKey}`, + 'Content-Type': 'application/json', + }, + }) + + if (!response.ok) { + const errorData = await response.json() + throw new Error(errorData.detail || 'Failed to generate flashcards') + } + + const result = await response.json() + setFlashcards(result.flashcards) + setShowFlashcards(true) + setIsStudyPanelOpen(true) + // Session will be created by useEffect when panel opens + } catch (error) { + console.error('Flashcard generation error:', error) + alert(`Error generating flashcards: ${error instanceof Error ? error.message : 'Unknown error'}`) + } finally { + setIsGeneratingFlashcards(false) + } + } + + const handleViewFull = (card: Flashcard) => { + setSelectedFlashcard(card) + setShowModal(true) + } + + const closeModal = () => { + setShowModal(false) + setSelectedFlashcard(null) + } + + const handleTextSelection = (e: React.MouseEvent) => { + e.stopPropagation() + const selection = window.getSelection() + if (selection && selection.toString().trim()) { + const rect = selection.getRangeAt(0).getBoundingClientRect() + setSelectedText(selection.toString()) + setAddFlashcardPosition({ + x: rect.left + rect.width / 2, + y: rect.top - 10 + }) + setShowAddFlashcard(true) + } else { + setShowAddFlashcard(false) + } + } + + const handleAddFlashcard = (text: string) => { + // Create a simple flashcard from selected text + const newFlashcard: Flashcard = { + question: `What does this mean: "${text.substring(0, 100)}${text.length > 100 ? '...' : ''}"?`, + answer: text + } + setFlashcards(prev => [...prev, newFlashcard]) + setShowAddFlashcard(false) + setSelectedText('') + } + + const handleCardGrading = (grade: 1 | 2 | 3) => { + if (showDone) return // Don't allow grading when done view is showing + + const currentSession = getCurrentSessionCards() + const newGradedCount = gradedCount + 1 + + setGradedCount(newGradedCount) + + if (newGradedCount >= currentSession.length) { + // Session completed - show done view + setShowDone(true) + } else { + // Move to next card + setCurrentCardIndex(prev => Math.min(prev + 1, currentSession.length - 1)) + } + } + + const handleKeyPress = (e: React.KeyboardEvent) => { + if (showDone) return // Don't allow grading when done view is showing + if (e.key === '1') handleCardGrading(1) + if (e.key === '2') handleCardGrading(2) + if (e.key === '3') handleCardGrading(3) + } + + // Helper function to check if a card matches the active topic + const matchesTopic = (card: Flashcard, topic: string) => { + const bag = [ + card.question.toLowerCase(), + card.answer.toLowerCase() + ] + return bag.some(text => text.includes(topic.toLowerCase())) + } + + // Get filtered flashcards based on active topic + const getFilteredFlashcards = () => { + if (!activeTopic) return flashcards + return flashcards.filter(card => matchesTopic(card, activeTopic)) + } + + // Create a new session snapshot + const createSessionSnapshot = () => { + const filtered = getFilteredFlashcards() + const snapshot = filtered.slice(0, 9) + setSessionCards(snapshot) + setCurrentCardIndex(0) + setGradedCount(0) + setShowDone(false) + return snapshot + } + + // Ensure current card index is within bounds + const getCurrentCardIndex = () => { + const session = getCurrentSessionCards() + return Math.max(0, Math.min(currentCardIndex, session.length - 1)) + } + + // Get current session cards (frozen snapshot) + const getCurrentSessionCards = () => { + return sessionCards + } + + // Handle slash commands + const handleSlashCommand = (input: string) => { + if (input.startsWith('/topic ')) { + const topic = input.replace('/topic ', '').trim() + setActiveTopic(topic) + return true + } + if (input === '/clear-topic') { + setActiveTopic(null) + return true + } + return false + } + + const handleSubmit = async (e: React.FormEvent) => { + e.preventDefault() + if (!inputMessage.trim() || !apiKey.trim()) return + + // Handle slash commands + if (handleSlashCommand(inputMessage)) { + setInputMessage('') + return + } + + const userMessage: Message = { + id: Date.now().toString(), + content: inputMessage, + role: 'user', + timestamp: new Date() + } + + setMessages(prev => [...prev, userMessage]) + setInputMessage('') + setIsLoading(true) + + try { + const apiUrl = getApiUrl() + const response = await fetch(`${apiUrl}/chat`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${apiKey}`, + }, + body: JSON.stringify({ + developer_message: developerMessage, + user_message: inputMessage, + model: model, + }), + }) + + if (!response.ok) { + throw new Error('Failed to get response') + } + + const reader = response.body?.getReader() + if (!reader) throw new Error('No reader available') + + let aiResponse = '' + let controlParsed = false + const aiMessage: Message = { + id: (Date.now() + 1).toString(), + content: '', + role: 'assistant', + timestamp: new Date() + } + + setMessages(prev => [...prev, aiMessage]) + + while (true) { + const { done, value } = await reader.read() + if (done) break + + const chunk = new TextDecoder().decode(value) + aiResponse += chunk + + // Parse CONTROL line if not already parsed + if (!controlParsed && aiResponse.includes('\n')) { + const lines = aiResponse.split('\n') + if (lines[0].startsWith('CONTROL:')) { + try { + const controlText = lines[0].replace('CONTROL:', '').trim() + const control = JSON.parse(controlText) + if (control?.action === 'set_topic' && control?.topic) { + setActiveTopic(control.topic) + // Remove control line from visible content + aiResponse = lines.slice(1).join('\n') + } + } catch (e) { + // Ignore parsing errors + } + controlParsed = true + } + } + + setMessages(prev => + prev.map(msg => + msg.id === aiMessage.id + ? { ...msg, content: aiResponse } + : msg + ) + ) + } + } catch (error) { + console.error('Error:', error) + const errorMessage: Message = { + id: (Date.now() + 1).toString(), + content: 'Sorry, there was an error processing your request. Please check your API key and try again.', + role: 'assistant', + timestamp: new Date() + } + setMessages(prev => [...prev, errorMessage]) + } finally { + setIsLoading(false) + } + } + + return ( +
+ {/* Header */} +
+
+
+
+
+
+
+
+
+
+
+

CookiesPDF

+
+
+ + +
+
+
+ + {/* Settings Modal */} + {showSettings && ( +
+
+
+

Settings

+ +
+
+
+ + setApiKey(e.target.value)} + placeholder="sk-..." + className="w-full px-3 py-2 bg-black-800 border border-black-700 rounded-lg focus:ring-2 focus:ring-cookie-500 focus:border-transparent text-white placeholder-cookie-400" + /> +
+
+ +