Skip to content

Commit 9b5b6ba

Browse files
Merge #166
166: Add methods to automatically add/update documents in batches r=curquiza a=alejandrocq Hi, I have added the methods to add/update documents in batches (#157) in `Index.cs`. I have not been able to run the tests correctly because I get the following error in most of them: ``` Meilisearch.MeilisearchApiError MeilisearchApiError, Message: The Content-Type "application/json; charset=utf-8" is invalid. Accepted values for the Content-Type header are: "application/json", "application/x-ndjson", "application/csv" ``` Seems to be an error with the `PostJsonAsync`method adding the charset parameter, but I suppose that has been working correctly for you. Let me know if the code is okay and what should I do to fix that error. I will keep investigating if I get some time. Co-authored-by: Alejandro Castilla Quesada <[email protected]>
2 parents 14f1895 + 571497e commit 9b5b6ba

File tree

3 files changed

+126
-1
lines changed

3 files changed

+126
-1
lines changed

src/Meilisearch/Index.cs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,25 @@ public async Task<UpdateStatus> AddDocuments<T>(IEnumerable<T> documents, string
133133
return await responseMessage.Content.ReadFromJsonAsync<UpdateStatus>();
134134
}
135135

136+
/// <summary>
137+
/// Adds documents in batches with size specified with <paramref name="batchSize"/>.
138+
/// </summary>
139+
/// <param name="documents">Documents to add.</param>
140+
/// <param name="batchSize">Size of documents batches while adding them.</param>
141+
/// <param name="primaryKey">Primary key for the documents.</param>
142+
/// <typeparam name="T">Type of the document. Even though documents are schemaless in MeiliSearch, making it typed helps in compile time.</typeparam>
143+
/// <returns>Returns the updateID of this async operation.</returns>
144+
public async Task<IEnumerable<UpdateStatus>> AddDocumentsInBatches<T>(IEnumerable<T> documents, int batchSize = 1000, string primaryKey = default)
145+
{
146+
async Task AddAction(List<T> items, List<UpdateStatus> updates)
147+
{
148+
updates.Add(await this.AddDocuments(items, primaryKey));
149+
}
150+
151+
var result = await BatchOperation(documents, batchSize, AddAction);
152+
return result;
153+
}
154+
136155
/// <summary>
137156
/// Update documents.
138157
/// </summary>
@@ -155,6 +174,25 @@ public async Task<UpdateStatus> UpdateDocuments<T>(IEnumerable<T> documents, str
155174
return await responseMessage.Content.ReadFromJsonAsync<UpdateStatus>();
156175
}
157176

177+
/// <summary>
178+
/// Updates documents in batches with size specified with <paramref name="batchSize"/>.
179+
/// </summary>
180+
/// <param name="documents">Documents to update.</param>
181+
/// <param name="batchSize">Size of documents batches while updating them.</param>
182+
/// <param name="primaryKey">Primary key for the documents.</param>
183+
/// <typeparam name="T">Type of the document. Even though documents are schemaless in MeiliSearch, making it typed helps in compile time.</typeparam>
184+
/// <returns>Returns the updateID of this async operation.</returns>
185+
public async Task<IEnumerable<UpdateStatus>> UpdateDocumentsInBatches<T>(IEnumerable<T> documents, int batchSize = 1000, string primaryKey = default)
186+
{
187+
async Task UpdateAction(List<T> items, List<UpdateStatus> updates)
188+
{
189+
updates.Add(await this.UpdateDocuments(items, primaryKey));
190+
}
191+
192+
var result = await BatchOperation(documents, batchSize, UpdateAction);
193+
return result;
194+
}
195+
158196
/// <summary>
159197
/// Get document by its ID.
160198
/// </summary>
@@ -621,5 +659,19 @@ internal Index WithHttpClient(HttpRequest http)
621659
this.http = http;
622660
return this;
623661
}
662+
663+
private static async Task<List<UpdateStatus>> BatchOperation<T>(IEnumerable<T> items, int batchSize, Func<List<T>, List<UpdateStatus>, Task> action)
664+
{
665+
var itemsList = new List<T>(items);
666+
var numberOfBatches = Math.Ceiling((double)itemsList.Count / batchSize);
667+
var result = new List<UpdateStatus>();
668+
for (var i = 0; i < numberOfBatches; i++)
669+
{
670+
var batch = itemsList.GetRange(i * batchSize, batchSize);
671+
await action.Invoke(batch, result);
672+
}
673+
674+
return result;
675+
}
624676
}
625677
}

tests/Meilisearch.Tests/DocumentTests.cs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,35 @@ public async Task BasicDocumentsAddition()
3636
docs.First().Genre.Should().BeNull();
3737
}
3838

39+
[Fact]
40+
public async Task BasicDocumentsAdditionInBatches()
41+
{
42+
var indexUID = "BasicDocumentsAdditionInBatchesTest";
43+
Index index = this.client.Index(indexUID);
44+
45+
// Add the documents
46+
Movie[] movies =
47+
{
48+
new Movie { Id = "1", Name = "Batman" },
49+
new Movie { Id = "2", Name = "Reservoir Dogs" },
50+
new Movie { Id = "3", Name = "Taxi Driver" },
51+
new Movie { Id = "4", Name = "Interstellar" },
52+
};
53+
var updates = await index.AddDocumentsInBatches(movies, 2);
54+
foreach (var u in updates)
55+
{
56+
u.UpdateId.Should().BeGreaterOrEqualTo(0);
57+
await index.WaitForPendingUpdate(u.UpdateId);
58+
}
59+
60+
// Check the documents have been added (one movie from each batch)
61+
var docs = (await index.GetDocuments<Movie>()).ToList();
62+
Assert.Equal("1", docs.ElementAt(0).Id);
63+
Assert.Equal("Batman", docs.ElementAt(0).Name);
64+
Assert.Equal("3", docs.ElementAt(2).Id);
65+
Assert.Equal("Taxi Driver", docs.ElementAt(2).Name);
66+
}
67+
3968
[Fact]
4069
public async Task BasicDocumentsAdditionWithCreateIndex()
4170
{
@@ -124,6 +153,50 @@ public async Task BasicDocumentsUpdate()
124153
docs.ElementAt(1).Genre.Should().BeNull();
125154
}
126155

156+
[Fact]
157+
public async Task BasicDocumentsUpdateInBatches()
158+
{
159+
var indexUID = "BasicDocumentsUpdateInBatchesTest";
160+
Index index = this.client.Index(indexUID);
161+
162+
// Add the documents
163+
Movie[] movies =
164+
{
165+
new Movie { Id = "1", Name = "Batman" },
166+
new Movie { Id = "2", Name = "Reservoir Dogs" },
167+
new Movie { Id = "3", Name = "Taxi Driver" },
168+
new Movie { Id = "4", Name = "Interstellar" },
169+
};
170+
var updates = await index.AddDocumentsInBatches(movies, 2);
171+
foreach (var u in updates)
172+
{
173+
u.UpdateId.Should().BeGreaterOrEqualTo(0);
174+
await index.WaitForPendingUpdate(u.UpdateId);
175+
}
176+
177+
movies = new Movie[]
178+
{
179+
new Movie { Id = "1", Name = "Batman", Genre = "Action" },
180+
new Movie { Id = "2", Name = "Reservoir Dogs", Genre = "Drama" },
181+
new Movie { Id = "3", Name = "Taxi Driver", Genre = "Drama" },
182+
new Movie { Id = "4", Name = "Interstellar", Genre = "Sci-Fi" },
183+
};
184+
updates = await index.UpdateDocumentsInBatches(movies, 2);
185+
foreach (var u in updates)
186+
{
187+
u.UpdateId.Should().BeGreaterOrEqualTo(0);
188+
await index.WaitForPendingUpdate(u.UpdateId);
189+
}
190+
191+
// Assert movies have genre after update
192+
var docs = (await index.GetDocuments<Movie>()).ToList();
193+
foreach (var movie in docs)
194+
{
195+
movie.Genre.Should().NotBeNull();
196+
movie.Genre.Should().NotBeEmpty();
197+
}
198+
}
199+
127200
[Fact]
128201
public async Task DocumentsUpdateWithPrimaryKey()
129202
{

tests/Meilisearch.Tests/SearchTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ public async Task CustomSearchWithFilterWithSpaces()
158158
});
159159
movies.Hits.Should().NotBeEmpty();
160160
movies.FacetsDistribution.Should().BeNull();
161-
Assert.Equal(1, movies.Hits.Count());
161+
Assert.Single(movies.Hits);
162162
Assert.Equal("1344", movies.Hits.First().Id);
163163
Assert.Equal("The Hobbit", movies.Hits.First().Name);
164164
}

0 commit comments

Comments
 (0)