Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mocking Pinecone tests #2778

Merged
merged 15 commits into from
Jul 14, 2022
98 changes: 47 additions & 51 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -428,68 +428,64 @@ jobs:
# pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=weaviate


# FIXME: This block should be uncommented as soon as Pinecone tests are fixed
# as part of the fixes discussed in #2644.
# Check locally for these tests to pass before uncommenting.
#
# pinecone-tests-linux:
# needs:
# - mypy
# - pylint
# runs-on: ubuntu-latest
# if: contains(github.event.pull_request.labels.*.name, 'topic:pinecone') || !github.event.pull_request.draft
pinecone-tests-linux:
needs:
- mypy
- pylint
runs-on: ubuntu-latest
if: contains(github.event.pull_request.labels.*.name, 'topic:pinecone') || !github.event.pull_request.draft

# steps:
# - uses: actions/checkout@v2
steps:
- uses: actions/checkout@v2

# - name: Setup Python
# uses: ./.github/actions/python_cache/
- name: Setup Python
uses: ./.github/actions/python_cache/

# # TODO Let's try to remove this one from the unit tests
# - name: Install pdftotext
# run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
# TODO Let's try to remove this one from the unit tests
- name: Install pdftotext
run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin

# - name: Install Haystack
# run: pip install .[pinecone]
- name: Install Haystack
run: pip install .[pinecone]

# - name: Run tests
# env:
# PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
# TOKENIZERS_PARALLELISM: 'false'
# run: |
# pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=pinecone
- name: Run tests
env:
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
TOKENIZERS_PARALLELISM: 'false'
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=pinecone

# FIXME very slow and very little platform dependency, so to evaluate
# pinecone-tests-windows:
# needs:
# - mypy
# - pylint
# runs-on: windows-latest
# if: contains(github.event.pull_request.labels.*.name, 'topic:pinecone') && contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft

# steps:
# - uses: actions/checkout@v2
pinecone-tests-windows:
needs:
- mypy
- pylint
runs-on: windows-latest
if: contains(github.event.pull_request.labels.*.name, 'topic:pinecone') && contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft

# - name: Setup Python
# uses: ./.github/actions/python_cache/
# with:
# prefix: windows
steps:
- uses: actions/checkout@v2

# - name: Install pdftotext
# run: |
# choco install xpdf-utils
# choco install openjdk11
# refreshenv
- name: Setup Python
uses: ./.github/actions/python_cache/
with:
prefix: windows

# - name: Install Haystack
# run: pip install .[pinecone]
- name: Install pdftotext
run: |
choco install xpdf-utils
choco install openjdk11
refreshenv

# - name: Run tests
# env:
# TOKENIZERS_PARALLELISM: 'false'
# PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
# run: |
# pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=pinecone
- name: Install Haystack
run: pip install .[pinecone]

- name: Run tests
env:
TOKENIZERS_PARALLELISM: 'false'
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=pinecone


rest-and-ui-tests-linux:
Expand Down
51 changes: 41 additions & 10 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@

from haystack.schema import Document

from .mocks import pinecone as pinecone_mock


# To manually run the tests with default PostgreSQL instead of SQLite, switch the lines below
SQL_TYPE = "sqlite"
Expand Down Expand Up @@ -159,9 +161,9 @@ def pytest_collection_modifyitems(config, items):
item.add_marker(skip_milvus)

# Skip PineconeDocumentStore if PINECONE_API_KEY not in environment variables
if not os.environ.get("PINECONE_API_KEY", False) and "pinecone" in keywords:
skip_pinecone = pytest.mark.skip(reason="PINECONE_API_KEY not in environment variables.")
item.add_marker(skip_pinecone)
# if not os.environ.get("PINECONE_API_KEY", False) and "pinecone" in keywords:
# skip_pinecone = pytest.mark.skip(reason="PINECONE_API_KEY not in environment variables.")
# item.add_marker(skip_pinecone)


#
Expand Down Expand Up @@ -742,8 +744,22 @@ def ensure_ids_are_correct_uuids(docs: list, document_store: object) -> None:
d["id"] = str(uuid.uuid4())


# FIXME Fix this in the docstore tests refactoring
from inspect import getmembers, isclass, isfunction


def mock_pinecone(monkeypatch):
for fname, function in getmembers(pinecone_mock, isfunction):
monkeypatch.setattr(f"pinecone.{fname}", function, raising=False)
for cname, class_ in getmembers(pinecone_mock, isclass):
monkeypatch.setattr(f"pinecone.{cname}", class_, raising=False)


@pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus1", "milvus", "weaviate", "pinecone"])
def document_store_with_docs(request, docs, tmp_path):
def document_store_with_docs(request, docs, tmp_path, monkeypatch):
if request.param == "pinecone":
mock_pinecone(monkeypatch)

embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
document_store = get_document_store(
document_store_type=request.param, embedding_dim=embedding_dim.args[0], tmp_path=tmp_path
Expand All @@ -754,7 +770,10 @@ def document_store_with_docs(request, docs, tmp_path):


@pytest.fixture
def document_store(request, tmp_path):
def document_store(request, tmp_path, monkeypatch: pytest.MonkeyPatch):
if request.param == "pinecone":
mock_pinecone(monkeypatch)

embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
document_store = get_document_store(
document_store_type=request.param, embedding_dim=embedding_dim.args[0], tmp_path=tmp_path
Expand All @@ -764,7 +783,10 @@ def document_store(request, tmp_path):


@pytest.fixture(params=["memory", "faiss", "milvus1", "milvus", "elasticsearch", "pinecone"])
def document_store_dot_product(request, tmp_path):
def document_store_dot_product(request, tmp_path, monkeypatch):
if request.param == "pinecone":
mock_pinecone(monkeypatch)

embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
document_store = get_document_store(
document_store_type=request.param,
Expand All @@ -777,7 +799,10 @@ def document_store_dot_product(request, tmp_path):


@pytest.fixture(params=["memory", "faiss", "milvus1", "milvus", "elasticsearch", "pinecone"])
def document_store_dot_product_with_docs(request, docs, tmp_path):
def document_store_dot_product_with_docs(request, docs, tmp_path, monkeypatch):
if request.param == "pinecone":
mock_pinecone(monkeypatch)

embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
document_store = get_document_store(
document_store_type=request.param,
Expand All @@ -791,7 +816,10 @@ def document_store_dot_product_with_docs(request, docs, tmp_path):


@pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus1", "pinecone"])
def document_store_dot_product_small(request, tmp_path):
def document_store_dot_product_small(request, tmp_path, monkeypatch):
if request.param == "pinecone":
mock_pinecone(monkeypatch)

embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(3))
document_store = get_document_store(
document_store_type=request.param,
Expand All @@ -804,7 +832,10 @@ def document_store_dot_product_small(request, tmp_path):


@pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus1", "milvus", "weaviate", "pinecone"])
def document_store_small(request, tmp_path):
def document_store_small(request, tmp_path, monkeypatch):
if request.param == "pinecone":
mock_pinecone(monkeypatch)

embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(3))
document_store = get_document_store(
document_store_type=request.param, embedding_dim=embedding_dim.args[0], similarity="cosine", tmp_path=tmp_path
Expand Down Expand Up @@ -931,7 +962,7 @@ def get_document_store(

elif document_store_type == "pinecone":
document_store = PineconeDocumentStore(
api_key=os.environ["PINECONE_API_KEY"],
api_key=os.environ.get("PINECONE_API_KEY"),
embedding_dim=embedding_dim,
embedding_field=embedding_field,
index=index,
Expand Down
20 changes: 18 additions & 2 deletions test/mocks/pinecone_mock.py → test/mocks/pinecone.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from typing import Optional, List

import logging

logger = logging.getLogger(__name__)


# Mock Pinecone instance
CONFIG: dict = {"api_key": None, "environment": None, "indexes": {}}

Expand Down Expand Up @@ -87,7 +92,9 @@ def query(
def fetch(self, ids: List[str], namespace: str = ""):
response: dict = {"namespace": namespace, "vectors": {}}
if namespace not in self.index_config.namespaces:
raise ValueError("Namespace not found")
# If we query an empty/non-existent namespace, Pinecone will just return an empty response
logger.warning(f"No namespace called '{namespace}'")
return response
records = self.index_config.namespaces[namespace]
for record in records:
if record["id"] in ids.copy():
Expand All @@ -98,7 +105,16 @@ def fetch(self, ids: List[str], namespace: str = ""):
}
return response

def delete(self, ids: Optional[List[str]] = None, namespace: str = "", filters: Optional[dict] = None):
def delete(
self,
ids: Optional[List[str]] = None,
namespace: str = "",
filters: Optional[dict] = None,
delete_all: bool = False,
):
if delete_all:
self.index_config.namespaces[namespace] = []

if namespace not in self.index_config.namespaces:
pass
elif ids is not None:
Expand Down