Skip to content

Commit

Permalink
Fix sending file attachments in save_to_conversation method
Browse files Browse the repository at this point in the history
- When files attached but upload fails, don't update the state variables
- Make removing null characters in pdf extraction more space efficient
  • Loading branch information
sabaimran committed Nov 11, 2024
1 parent ba2471d commit dd36303
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 25 deletions.
34 changes: 18 additions & 16 deletions src/interface/web/app/components/chatInputArea/chatInputArea.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -237,25 +237,27 @@ export const ChatInputArea = forwardRef<HTMLTextAreaElement, ChatInputProps>((pr
? Array.from(nonImageFiles).concat(Array.from(attachedFiles || []))
: Array.from(attachedFiles || []);

// Ensure files are below size limit (10 MB)
for (let i = 0; i < newFiles.length; i++) {
if (newFiles[i].size > 10 * 1024 * 1024) {
setWarning(
`File ${newFiles[i].name} is too large. Please upload files smaller than 10 MB.`,
);
return;
if (newFiles.length > 0) {
// Ensure files are below size limit (10 MB)
for (let i = 0; i < newFiles.length; i++) {
if (newFiles[i].size > 10 * 1024 * 1024) {
setWarning(
`File ${newFiles[i].name} is too large. Please upload files smaller than 10 MB.`,
);
return;
}
}
}

const dataTransfer = new DataTransfer();
newFiles.forEach((file) => dataTransfer.items.add(file));
setAttachedFiles(dataTransfer.files);
const dataTransfer = new DataTransfer();
newFiles.forEach((file) => dataTransfer.items.add(file));

// Extract text from files
extractTextFromFiles(dataTransfer.files).then((data) => {
props.setUploadedFiles(data);
setConvertedAttachedFiles(data);
});
// Extract text from files
extractTextFromFiles(dataTransfer.files).then((data) => {
props.setUploadedFiles(data);
setAttachedFiles(dataTransfer.files);
setConvertedAttachedFiles(data);
});
}

// Set focus to the input for user message after uploading files
chatInputRef?.current?.focus();
Expand Down
14 changes: 7 additions & 7 deletions src/khoj/processor/content/pdf/pdf_to_entries.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging
import tempfile
from io import BytesIO
from typing import Dict, List, Tuple
from typing import Dict, Final, List, Tuple

from langchain_community.document_loaders import PyMuPDFLoader

Expand All @@ -15,6 +14,9 @@


class PdfToEntries(TextToEntries):
# Class-level constant translation table
NULL_TRANSLATOR: Final = str.maketrans("", "", "\x00")

def __init__(self):
super().__init__()

Expand Down Expand Up @@ -112,8 +114,6 @@ def extract_text(pdf_file):

@staticmethod
def clean_text(text: str) -> str:
# Remove null bytes
text = text.replace("\x00", "")
# Replace invalid Unicode
text = text.encode("utf-8", errors="ignore").decode("utf-8")
return text
"""Clean PDF text by removing null bytes and invalid Unicode characters."""
# Use faster translation table instead of replace
return text.translate(PdfToEntries.NULL_TRANSLATOR)
2 changes: 0 additions & 2 deletions src/khoj/routers/api_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -1133,7 +1133,6 @@ def collect_telemetry():
online_results=online_results,
query_images=uploaded_images,
train_of_thought=train_of_thought,
attached_file_context=attached_file_context,
raw_query_files=raw_query_files,
tracer=tracer,
)
Expand Down Expand Up @@ -1194,7 +1193,6 @@ def collect_telemetry():
online_results=online_results,
query_images=uploaded_images,
train_of_thought=train_of_thought,
attached_file_context=attached_file_context,
raw_query_files=raw_query_files,
tracer=tracer,
)
Expand Down

0 comments on commit dd36303

Please sign in to comment.