Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions meilisearch/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,44 @@ def add_documents(self, documents, primary_key=None):
url = f'{self.config.paths.index}/{self.uid}/{self.config.paths.document}?{primary_key}'
return self.http.post(url, documents)

def add_documents_in_batches(self, documents, batch_size=1000, primary_key=None):
"""Add documents to the index in batches.

Parameters
----------
documents: list
List of documents. Each document should be a dictionary.
batch_size (optional): int
The number of documents that should be included in each batch. Default = 1000
primary_key (optional): string
The primary-key used in index. Ignored if already set up.

Returns
-------
update: list[dict]
List of dictionaries containing an update ids to track the action:
https://docs.meilisearch.com/reference/api/updates.html#get-an-update-status

Raises
------
MeiliSearchApiError
An error containing details about why MeiliSearch can't process your request.
MeiliSearch error codes are described here: https://docs.meilisearch.com/errors/#meilisearch-errors
"""

def batch(documents, batch_size):
total_len = len(documents)
for i in range(0, total_len, batch_size):
yield documents[i : i + batch_size]

update_ids = []

for document_batch in batch(documents, batch_size):
update_id = self.add_documents(document_batch, primary_key)
update_ids.append(update_id)

return update_ids

def update_documents(self, documents, primary_key=None):
"""Update documents in the index.

Expand Down
26 changes: 26 additions & 0 deletions meilisearch/tests/index/test_index_document_meilisearch.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# pylint: disable=invalid-name

from math import ceil

import pytest

def test_get_documents_default(empty_index):
Expand All @@ -18,6 +20,30 @@ def test_add_documents(empty_index, small_movies):
assert index.get_primary_key() == 'id'
assert update['status'] == 'processed'


@pytest.mark.parametrize("batch_size", [2, 3, 1000])
@pytest.mark.parametrize(
"primary_key, expected_primary_key", [("release_date", "release_date"), (None, "id")]
)
def test_add_documents_in_batches(
batch_size,
primary_key,
expected_primary_key,
empty_index,
small_movies,
):
index = empty_index()
response = index.add_documents_in_batches(small_movies, batch_size, primary_key)
assert ceil(len(small_movies) / batch_size) == len(response)

for r in response:
assert "updateId" in r
update = index.wait_for_pending_update(r["updateId"])
assert update["status"] == "processed"

assert index.get_primary_key() == expected_primary_key


def test_get_document(index_with_documents):
"""Tests getting one document from a populated index."""
response = index_with_documents().get_document('500682')
Expand Down