Future-House
diff --git a/‎src/paperqa/core.py‎
Lines changed: 19 additions & 7 deletions b/‎src/paperqa/core.py‎
Lines changed: 19 additions & 7 deletions
diff --git a/‎src/paperqa/docs.py‎
Lines changed: 3 additions & 2 deletions b/‎src/paperqa/docs.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎src/paperqa/prompts.py‎
Lines changed: 24 additions & 14 deletions b/‎src/paperqa/prompts.py‎
Lines changed: 24 additions & 14 deletions
diff --git a/‎src/paperqa/settings.py‎
Lines changed: 5 additions & 1 deletion b/‎src/paperqa/settings.py‎
Lines changed: 5 additions & 1 deletion
@@ -9,6 +9,7 @@
 from aviary.core import Message
 from lmi import LLMModel
 
+from paperqa.prompts import text_with_tables_prompt_template
 from paperqa.types import Context, LLMResult, Text
 from paperqa.utils import extract_score, strip_citations
 
@@ -164,18 +165,31 @@ async def map_fxn_summary(
     citation = text.name + ": " + text.doc.formatted_citation
     success = False
 
+    # Strip newlines in case chunking led to blank lines,
+    # but not spaces, to preserve text alignment
+    cleaned_text = text.text.strip("\n")
     if summary_llm_model and prompt_templates:
+        media_text: list[str] = [m.text for m in text.media if m.text]
         data = {
             "question": question,
             "citation": citation,
-            # Strip newlines in case chunking led to blank lines,
-            # but not spaces, to preserve text alignment
-            "text": text.text.strip("\n"),
+            "text": (
+                text_with_tables_prompt_template.format(
+                    text=cleaned_text,
+                    citation=citation,
+                    tables="\n\n----\n\n".join(media_text),
+                )
+                if media_text
+                else cleaned_text
+            ),
         } | (extra_prompt_data or {})
         message_prompt, system_prompt = prompt_templates
         messages = [
             Message(role="system", content=system_prompt.format(**data)),
-            Message(role="user", content=message_prompt.format(**data)),
+            Message.create_message(
+                text=message_prompt.format(**data),
+                images=[i.to_image_url() for i in text.media] if text.media else None,
+            ),
         ]
         llm_result = await summary_llm_model.call_single(
             messages=messages,
@@ -199,9 +213,7 @@ async def map_fxn_summary(
             except KeyError:
                 success = False
     else:
-        # Strip newlines in case chunking led to blank lines,
-        # but not spaces, to preserve text alignment
-        context = text.text.strip("\n")
+        context = cleaned_text
         # If we don't assign scores, just default to 5.
         # why 5? Because we filter out 0s in another place
         # and 5/10 is the other default I could come up with
 
@@ -380,17 +380,18 @@ async def aadd(  # noqa: PLR0912
                 doc, **(query_kwargs | kwargs)
             )
 
-        texts = await read_doc(
+        texts, metadata = await read_doc(
             path,
             doc,
             chunk_chars=parse_config.chunk_size,
             overlap=parse_config.overlap,
             page_size_limit=parse_config.page_size_limit,
             use_block_parsing=parse_config.pdfs_use_block_parsing,
             parse_pdf=parse_config.parse_pdf,
+            include_metadata=True,
         )
         # loose check to see if document was loaded
-        if (
+        if metadata.parse_type != "image" and (
             not texts
             or len(texts[0].text) < 10  # noqa: PLR2004
             or (
 
@@ -1,20 +1,26 @@
 from datetime import datetime
 
-# ruff: noqa: E501
-
 summary_prompt = (
     "Summarize the excerpt below to help answer a question.\n\nExcerpt from"
-    " {citation}\n\n----\n\n{text}\n\n----\n\nQuestion: {question}\n\nDo not directly"
+    " {citation}\n\n------------\n\n{text}\n\n------------"
+    "\n\nQuestion: {question}\n\nDo not directly"
     " answer the question, instead summarize to give evidence to help answer the"
     " question. Stay detailed; report specific numbers, equations, or direct quotes"
     ' (marked with quotation marks). Reply "Not applicable" if the excerpt is'
     " irrelevant. At the end of your response, provide an integer score from 1-10 on a"
     " newline indicating relevance to question. Do not explain your score.\n\nRelevant"
     " Information Summary ({summary_length}):"
 )
+# This prompt template integrates with `text` variable of the above `summary_prompt`
+text_with_tables_prompt_template = (
+    "{text}\n\n------------\n\nMarkdown tables from {citation}."
+    " If the markdown is garbled, refer to the images"
+    "\n\n------------\n\n{tables}"
+)
 
 summary_json_prompt = (
-    "Excerpt from {citation}\n\n----\n\n{text}\n\n----\n\nQuestion: {question}\n\n"
+    "Excerpt from {citation}\n\n------------\n\n{text}\n\n------------"
+    "\n\nQuestion: {question}\n\n"
 )
 
 # The below "cannot answer" sentinel phrase should:
@@ -45,7 +51,7 @@
 
 qa_prompt = (
     "Answer the question below with the context.\n\n"
-    "Context:\n\n{context}\n\n----\n\n"
+    "Context:\n\n{context}\n\n------------\n\n"
     "Question: {question}\n\n"
     "Write an answer based on the context. "
     "If the context provides insufficient information reply "
@@ -99,15 +105,19 @@
 )
 
 # NOTE: we use double curly braces here so it's not considered an f-string template
-summary_json_system_prompt = """\
-Provide a summary of the relevant information that could help answer the question based on the excerpt. Respond with the following JSON format:
-
-{{
-  "summary": "...",
-  "relevance_score": "..."
-}}
-
-where `summary` is relevant information from the text - {summary_length} words. `relevance_score` is an integer 1-10 for the relevance of `summary` to the question."""
+summary_json_system_prompt = (
+    "Provide a summary of the relevant information"
+    " that could help answer the question based on the excerpt."
+    " Your summary, combined with many others,"
+    " will be given to the model to generate an answer."
+    " Respond with the following JSON format:"
+    '\n\n{{\n  "summary": "...",\n  "relevance_score": "..."\n  "used_images"\n}}'
+    "\n\nwhere `summary` is relevant information from the text - {summary_length} words."
+    " `relevance_score` is an integer 1-10 for the relevance of `summary` to the question."
+    " `used_images` is a boolean flag indicating"
+    " if any images present in a multimodal message were used,"
+    " and if no images were present it should be false."
+)
 
 env_system_prompt = (
     # Matching https://github.com/langchain-ai/langchain/blob/langchain%3D%3D0.2.3/libs/langchain/langchain/agents/openai_functions_agent/base.py#L213-L215
 
@@ -523,7 +523,11 @@ class IndexSettings(BaseModel):
         ),
     )
     files_filter: Callable[[anyio.Path | pathlib.Path], bool] = Field(
-        default=lambda f: f.suffix in {".txt", ".pdf", ".html", ".md"},
+        default=lambda f: (
+            f.suffix
+            # TODO: add images after embeddings are supported
+            in {".txt", ".pdf", ".html", ".md"}
+        ),
         exclude=True,
         description=(
             "Filter function to apply to files in the paper directory."