From dff2f8afd1395acffebe194ade284ea474ffacc4 Mon Sep 17 00:00:00 2001
From: Ivaylo Gochkov <ivaylo@gochkov.com>
Date: Mon, 28 Oct 2024 09:40:26 +0100
Subject: [PATCH 1/4] SearxNG API Retriever implementation

---
 gpt_researcher/retrievers/searx/searx.py | 86 +++++++++++++++++-------
 1 file changed, 60 insertions(+), 26 deletions(-)

diff --git a/gpt_researcher/retrievers/searx/searx.py b/gpt_researcher/retrievers/searx/searx.py
index 46bc10cb6..852bf3e33 100644
--- a/gpt_researcher/retrievers/searx/searx.py
+++ b/gpt_researcher/retrievers/searx/searx.py
@@ -1,45 +1,79 @@
-# Tavily API Retriever
-
-# libraries
 import os
-from langchain_community.utilities import SearxSearchWrapper
+import json
+import requests
+from typing import List, Dict
+from urllib.parse import urljoin
 
 
 class SearxSearch():
     """
-    Tavily API Retriever
+    SearxNG API Retriever
     """
-    def __init__(self, query):
+    def __init__(self, query: str):
         """
-        Initializes the TavilySearch object
+        Initializes the SearxSearch object
         Args:
-            query:
+            query: Search query string
         """
         self.query = query
-        self.api_key = self.get_api_key()
+        self.base_url = self.get_searxng_url()
 
-    def get_api_key(self):
+    def get_searxng_url(self) -> str:
         """
-        Gets the Tavily API key
+        Gets the SearxNG instance URL from environment variables
         Returns:
-
+            str: Base URL of SearxNG instance
         """
-        # Get the API key
         try:
-            api_key = os.environ["SEARX_URL"]
-        except:
-            raise Exception("Searx URL key not found. Please set the SEARX_URL environment variable. "
-                            "You can get your key from https://searx.space/")
-        return api_key
+            base_url = os.environ["SEARX_URL"]
+            if not base_url.endswith('/'):
+                base_url += '/'
+            return base_url
+        except KeyError:
+            raise Exception(
+                "SearxNG URL not found. Please set the SEARX_URL environment variable. "
+                "You can find public instances at https://searx.space/"
+            )
 
-    def search(self, max_results=7):
+    def search(self, max_results: int = 7) -> List[Dict[str, str]]:
         """
-        Searches the query
+        Searches the query using SearxNG API
+        Args:
+            max_results: Maximum number of results to return
         Returns:
-
+            List of dictionaries containing search results
         """
-        searx = SearxSearchWrapper(searx_host=os.environ["SEARX_URL"])
-        results = searx.results(self.query, max_results)
-        # Normalizing results to match the format of the other search APIs
-        search_response = [{"href": obj["link"], "body": obj["snippet"]} for obj in results]
-        return search_response
+        search_url = urljoin(self.base_url, "search")
+        
+        params = {
+            'q': self.query,
+            'format': 'json',
+            'pageno': 1,
+            'categories': 'general',
+            'engines': 'google,bing,duckduckgo',  # TODO: Add environment variable to customize the engines
+            'results': max_results
+        }
+
+        try:
+            response = requests.get(
+                search_url,
+                params=params,
+                headers={'Accept': 'application/json'}
+            )
+            response.raise_for_status()
+            results = response.json()
+
+            # Normalize results to match the expected format
+            search_response = []
+            for result in results.get('results', [])[:max_results]:
+                search_response.append({
+                    "href": result.get('url', ''),
+                    "body": result.get('content', '')
+                })
+
+            return search_response
+
+        except requests.exceptions.RequestException as e:
+            raise Exception(f"Error querying SearxNG: {str(e)}")
+        except json.JSONDecodeError:
+            raise Exception("Error parsing SearxNG response")

From c54003dada9a2ba7c194729065cf3807a39a78c5 Mon Sep 17 00:00:00 2001
From: Ivaylo Gochkov <ivaylo@gochkov.com>
Date: Mon, 28 Oct 2024 21:05:43 +0100
Subject: [PATCH 2/4] Simplified parameters

---
 gpt_researcher/retrievers/searx/searx.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/gpt_researcher/retrievers/searx/searx.py b/gpt_researcher/retrievers/searx/searx.py
index 852bf3e33..912ea27d0 100644
--- a/gpt_researcher/retrievers/searx/searx.py
+++ b/gpt_researcher/retrievers/searx/searx.py
@@ -35,7 +35,7 @@ def get_searxng_url(self) -> str:
                 "You can find public instances at https://searx.space/"
             )
 
-    def search(self, max_results: int = 7) -> List[Dict[str, str]]:
+    def search(self, max_results: int = 10) -> List[Dict[str, str]]:
         """
         Searches the query using SearxNG API
         Args:
@@ -46,12 +46,10 @@ def search(self, max_results: int = 7) -> List[Dict[str, str]]:
         search_url = urljoin(self.base_url, "search")
         
         params = {
-            'q': self.query,
-            'format': 'json',
-            'pageno': 1,
-            'categories': 'general',
-            'engines': 'google,bing,duckduckgo',  # TODO: Add environment variable to customize the engines
-            'results': max_results
+            # The search query. 
+            'q': self.query, 
+            # Output format of results. Format needs to be activated in searxng config.
+            'format': 'json'
         }
 
         try:

From e5df45ac0f7a3f333db0d63487a81eb574500357 Mon Sep 17 00:00:00 2001
From: Ivaylo Gochkov <ivaylo@gochkov.com>
Date: Mon, 28 Oct 2024 22:09:34 +0100
Subject: [PATCH 3/4] test

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index c7575b04f..626f683ce 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,6 +12,7 @@ markdown
 langchain
 langchain_community
 langchain-openai
+langchain-ollama
 langgraph
 tiktoken
 gpt-researcher

From cbb986b5a2b65fed8ee257722eb86a088c3b7ff1 Mon Sep 17 00:00:00 2001
From: Ivaylo Gochkov <ivaylo@gochkov.com>
Date: Mon, 28 Oct 2024 22:19:10 +0100
Subject: [PATCH 4/4] fixed warnings

---
 Dockerfile                                  | 7 -------
 gpt_researcher/llm_provider/generic/base.py | 2 +-
 gpt_researcher/memory/embeddings.py         | 2 +-
 gpt_researcher/scraper/arxiv/arxiv.py       | 2 +-
 4 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index ba4e29a32..9cb4fba85 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -29,13 +29,6 @@ RUN pip install --no-cache-dir -r requirements.txt && \
 # Stage 3: Final stage with non-root user and app
 FROM gpt-researcher-install AS gpt-researcher
 
-# Use environment variables for API keys (defaults can be overridden at runtime)
-ARG OPENAI_API_KEY
-ARG TAVILY_API_KEY
-
-ENV OPENAI_API_KEY=${OPENAI_API_KEY}
-ENV TAVILY_API_KEY=${TAVILY_API_KEY}
-
 # Create a non-root user for security
 RUN useradd -ms /bin/bash gpt-researcher && \
     chown -R gpt-researcher:gpt-researcher /usr/src/app
diff --git a/gpt_researcher/llm_provider/generic/base.py b/gpt_researcher/llm_provider/generic/base.py
index 7e64a8254..e7c8e2814 100644
--- a/gpt_researcher/llm_provider/generic/base.py
+++ b/gpt_researcher/llm_provider/generic/base.py
@@ -68,7 +68,7 @@ def from_provider(cls, provider: str, **kwargs: Any):
             llm = ChatFireworks(**kwargs)
         elif provider == "ollama":
             _check_pkg("langchain_community")
-            from langchain_community.chat_models import ChatOllama
+            from langchain_ollama import ChatOllama
             
             llm = ChatOllama(base_url=os.environ["OLLAMA_BASE_URL"], **kwargs)
         elif provider == "together":
diff --git a/gpt_researcher/memory/embeddings.py b/gpt_researcher/memory/embeddings.py
index 0e917ed4e..5bfcc8cb5 100644
--- a/gpt_researcher/memory/embeddings.py
+++ b/gpt_researcher/memory/embeddings.py
@@ -17,7 +17,7 @@ def __init__(self, embedding_provider: str, model: str, **embdding_kwargs: Any):
         _embeddings = None
         match embedding_provider:
             case "ollama":
-                from langchain_community.embeddings import OllamaEmbeddings
+                from langchain_ollama import OllamaEmbeddings
 
                 _embeddings = OllamaEmbeddings(
                     model=model,
diff --git a/gpt_researcher/scraper/arxiv/arxiv.py b/gpt_researcher/scraper/arxiv/arxiv.py
index e3c902830..2af550417 100644
--- a/gpt_researcher/scraper/arxiv/arxiv.py
+++ b/gpt_researcher/scraper/arxiv/arxiv.py
@@ -18,5 +18,5 @@ def scrape(self):
         """
         query = self.link.split("/")[-1]
         retriever = ArxivRetriever(load_max_docs=2, doc_content_chars_max=None)
-        docs = retriever.get_relevant_documents(query=query)
+        docs = retriever.invoke(query=query)
         return docs[0].page_content