Skip to content

Commit 13b7190

Browse files
authored
Download the geoip databases only when needed (#92335)
This commit changes the geoip downloader so that we only download the geoip databases if you have at least one geoip processor in your cluster, or when you add a new geoip processor (or if `ingest.geoip.downloader.eager.download` is explicitly set to true).
1 parent 2cf70ae commit 13b7190

File tree

12 files changed

+684
-60
lines changed

12 files changed

+684
-60
lines changed

docs/changelog/92335.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 92335
2+
summary: Download the geoip databases only when needed
3+
area: Ingest Node
4+
type: bug
5+
issues:
6+
- 90673

docs/reference/ingest/processors/geoip.asciidoc

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@ IPv4 or IPv6 address.
1111
By default, the processor uses the GeoLite2 City, GeoLite2 Country, and GeoLite2
1212
ASN GeoIP2 databases from
1313
http://dev.maxmind.com/geoip/geoip2/geolite2/[MaxMind], shared under the
14-
CC BY-SA 4.0 license. {es} automatically downloads updates for
14+
CC BY-SA 4.0 license. It automatically downloads these databases if either
15+
`ingest.geoip.downloader.eager.download` is set to true, or your cluster
16+
has at least one pipeline with a `geoip` processor. {es}
17+
automatically downloads updates for
1518
these databases from the Elastic GeoIP endpoint:
1619
https://geoip.elastic.co/v1/database. To get download statistics for these
1720
updates, use the <<geoip-stats-api,GeoIP stats API>>.
@@ -412,6 +415,13 @@ If `true`, {es} automatically downloads and manages updates for GeoIP2 databases
412415
from the `ingest.geoip.downloader.endpoint`. If `false`, {es} does not download
413416
updates and deletes all downloaded databases. Defaults to `true`.
414417

418+
[[ingest-geoip-downloader-eager-download]]
419+
(<<dynamic-cluster-setting,Dynamic>>, Boolean)
420+
If `true`, {es} downloads GeoIP2 databases immediately, regardless of whether a
421+
pipeline exists with a geoip processor. If `false`, {es} only begins downloading
422+
the databases if a pipeline with a geoip processor exists or is added. Defaults
423+
to `false`.
424+
415425
[[ingest-geoip-downloader-endpoint]]
416426
`ingest.geoip.downloader.endpoint`::
417427
(<<static-cluster-setting,Static>>, string)

modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java

Lines changed: 128 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,13 @@
2727
import org.elasticsearch.index.query.BoolQueryBuilder;
2828
import org.elasticsearch.index.query.MatchQueryBuilder;
2929
import org.elasticsearch.index.query.RangeQueryBuilder;
30+
import org.elasticsearch.ingest.AbstractProcessor;
3031
import org.elasticsearch.ingest.IngestDocument;
32+
import org.elasticsearch.ingest.Processor;
3133
import org.elasticsearch.ingest.geoip.stats.GeoIpDownloaderStatsAction;
3234
import org.elasticsearch.persistent.PersistentTaskParams;
3335
import org.elasticsearch.persistent.PersistentTasksCustomMetadata;
36+
import org.elasticsearch.plugins.IngestPlugin;
3437
import org.elasticsearch.plugins.Plugin;
3538
import org.elasticsearch.reindex.ReindexPlugin;
3639
import org.elasticsearch.search.SearchHit;
@@ -51,11 +54,13 @@
5154
import java.util.ArrayList;
5255
import java.util.Arrays;
5356
import java.util.Collection;
57+
import java.util.HashMap;
5458
import java.util.Iterator;
5559
import java.util.List;
5660
import java.util.Map;
5761
import java.util.Set;
5862
import java.util.concurrent.TimeUnit;
63+
import java.util.function.BiConsumer;
5964
import java.util.stream.Collectors;
6065
import java.util.stream.Stream;
6166
import java.util.stream.StreamSupport;
@@ -82,7 +87,12 @@ public class GeoIpDownloaderIT extends AbstractGeoIpIT {
8287

8388
@Override
8489
protected Collection<Class<? extends Plugin>> nodePlugins() {
85-
return Arrays.asList(ReindexPlugin.class, IngestGeoIpPlugin.class, GeoIpProcessorNonIngestNodeIT.IngestGeoIpSettingsPlugin.class);
90+
return Arrays.asList(
91+
ReindexPlugin.class,
92+
IngestGeoIpPlugin.class,
93+
GeoIpProcessorNonIngestNodeIT.IngestGeoIpSettingsPlugin.class,
94+
NonGeoProcessorsPlugin.class
95+
);
8696
}
8797

8898
@Override
@@ -104,7 +114,7 @@ public void cleanUp() throws Exception {
104114
.setPersistentSettings(
105115
Settings.builder()
106116
.putNull(GeoIpDownloaderTaskExecutor.ENABLED_SETTING.getKey())
107-
.putNull(GeoIpDownloader.POLL_INTERVAL_SETTING.getKey())
117+
.putNull(GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getKey())
108118
.putNull("ingest.geoip.database_validity")
109119
)
110120
.get();
@@ -149,6 +159,7 @@ public void cleanUp() throws Exception {
149159
@TestLogging(value = "org.elasticsearch.ingest.geoip:TRACE", reason = "https://github.com/elastic/elasticsearch/issues/75221")
150160
public void testInvalidTimestamp() throws Exception {
151161
assumeTrue("only test with fixture to have stable results", ENDPOINT != null);
162+
putGeoIpPipeline();
152163
ClusterUpdateSettingsResponse settingsResponse = client().admin()
153164
.cluster()
154165
.prepareUpdateSettings()
@@ -160,7 +171,7 @@ public void testInvalidTimestamp() throws Exception {
160171
assertEquals(Set.of("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb"), state.getDatabases().keySet());
161172
}, 2, TimeUnit.MINUTES);
162173

163-
putPipeline();
174+
putGeoIpPipeline();
164175
verifyUpdatedDatabase();
165176

166177
settingsResponse = client().admin()
@@ -172,7 +183,9 @@ public void testInvalidTimestamp() throws Exception {
172183
settingsResponse = client().admin()
173184
.cluster()
174185
.prepareUpdateSettings()
175-
.setPersistentSettings(Settings.builder().put(GeoIpDownloader.POLL_INTERVAL_SETTING.getKey(), TimeValue.timeValueDays(2)))
186+
.setPersistentSettings(
187+
Settings.builder().put(GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getKey(), TimeValue.timeValueDays(2))
188+
)
176189
.get();
177190
assertTrue(settingsResponse.isAcknowledged());
178191
List<Path> geoIpTmpDirs = getGeoIpTmpDirs();
@@ -186,7 +199,7 @@ public void testInvalidTimestamp() throws Exception {
186199
}
187200
}
188201
});
189-
putPipeline();
202+
putGeoIpPipeline();
190203
assertBusy(() -> {
191204
SimulateDocumentBaseResult result = simulatePipeline();
192205
assertThat(result.getFailure(), nullValue());
@@ -221,14 +234,17 @@ public void testUpdatedTimestamp() throws Exception {
221234
ClusterUpdateSettingsResponse settingsResponse = client().admin()
222235
.cluster()
223236
.prepareUpdateSettings()
224-
.setPersistentSettings(Settings.builder().put(GeoIpDownloader.POLL_INTERVAL_SETTING.getKey(), TimeValue.timeValueDays(2)))
237+
.setPersistentSettings(
238+
Settings.builder().put(GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getKey(), TimeValue.timeValueDays(2))
239+
)
225240
.get();
226241
assertTrue(settingsResponse.isAcknowledged());
227242
assertBusy(() -> assertNotEquals(lastCheck, getGeoIpTaskState().getDatabases().get("GeoLite2-ASN.mmdb").lastCheck()));
228243
testGeoIpDatabasesDownload();
229244
}
230245

231246
public void testGeoIpDatabasesDownload() throws Exception {
247+
putGeoIpPipeline();
232248
ClusterUpdateSettingsResponse settingsResponse = client().admin()
233249
.cluster()
234250
.prepareUpdateSettings()
@@ -283,12 +299,34 @@ public void testGeoIpDatabasesDownload() throws Exception {
283299
}
284300
}
285301

302+
public void testGeoIpDatabasesDownloadNoGeoipProcessors() throws Exception {
303+
assumeTrue("only test with fixture to have stable results", ENDPOINT != null);
304+
String pipelineId = randomAlphaOfLength(10);
305+
putGeoIpPipeline(pipelineId);
306+
ClusterUpdateSettingsResponse settingsResponse = client().admin()
307+
.cluster()
308+
.prepareUpdateSettings()
309+
.setPersistentSettings(Settings.builder().put(GeoIpDownloaderTaskExecutor.ENABLED_SETTING.getKey(), true))
310+
.get();
311+
assertTrue(settingsResponse.isAcknowledged());
312+
assertBusy(() -> { assertNull(getTask().getState()); });
313+
putNonGeoipPipeline(pipelineId);
314+
assertBusy(() -> { assertNull(getTask().getState()); });
315+
putNonGeoipPipeline(pipelineId);
316+
assertNull(getTask().getState());
317+
putGeoIpPipeline();
318+
assertBusy(() -> {
319+
GeoIpTaskState state = getGeoIpTaskState();
320+
assertEquals(Set.of("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb"), state.getDatabases().keySet());
321+
}, 2, TimeUnit.MINUTES);
322+
}
323+
286324
@TestLogging(value = "org.elasticsearch.ingest.geoip:TRACE", reason = "https://github.com/elastic/elasticsearch/issues/69972")
287325
public void testUseGeoIpProcessorWithDownloadedDBs() throws Exception {
288326
assumeTrue("only test with fixture to have stable results", ENDPOINT != null);
289327
setupDatabasesInConfigDirectory();
290328
// setup:
291-
putPipeline();
329+
putGeoIpPipeline();
292330

293331
// verify before updating dbs
294332
{
@@ -355,7 +393,7 @@ public void testUseGeoIpProcessorWithDownloadedDBs() throws Exception {
355393
@TestLogging(value = "org.elasticsearch.ingest.geoip:TRACE", reason = "https://github.com/elastic/elasticsearch/issues/79074")
356394
public void testStartWithNoDatabases() throws Exception {
357395
assumeTrue("only test with fixture to have stable results", ENDPOINT != null);
358-
putPipeline();
396+
putGeoIpPipeline();
359397

360398
// Behaviour without any databases loaded:
361399
{
@@ -438,7 +476,21 @@ private SimulateDocumentBaseResult simulatePipeline() throws IOException {
438476
return (SimulateDocumentBaseResult) simulateResponse.getResults().get(0);
439477
}
440478

441-
private void putPipeline() throws IOException {
479+
/**
480+
* This creates a pipeline with a geoip processor, which ought to cause the geoip downloader to begin (assuming it is enabled).
481+
* @throws IOException
482+
*/
483+
private void putGeoIpPipeline() throws IOException {
484+
putGeoIpPipeline("_id");
485+
}
486+
487+
/**
488+
* This creates a pipeline named pipelineId with a geoip processor, which ought to cause the geoip downloader to begin (assuming it is
489+
* enabled).
490+
* @param pipelineId The name of the new pipeline with a geoip processor
491+
* @throws IOException
492+
*/
493+
private void putGeoIpPipeline(String pipelineId) throws IOException {
442494
BytesReference bytes;
443495
try (XContentBuilder builder = JsonXContent.contentBuilder()) {
444496
builder.startObject();
@@ -484,7 +536,45 @@ private void putPipeline() throws IOException {
484536
builder.endObject();
485537
bytes = BytesReference.bytes(builder);
486538
}
487-
assertAcked(client().admin().cluster().preparePutPipeline("_id", bytes, XContentType.JSON).get());
539+
assertAcked(client().admin().cluster().preparePutPipeline(pipelineId, bytes, XContentType.JSON).get());
540+
}
541+
542+
/**
543+
* This creates a pipeline named pipelineId that does _not_ have a geoip processor.
544+
* @throws IOException
545+
*/
546+
private void putNonGeoipPipeline(String pipelineId) throws IOException {
547+
BytesReference bytes;
548+
try (XContentBuilder builder = JsonXContent.contentBuilder()) {
549+
builder.startObject();
550+
{
551+
builder.startArray("processors");
552+
{
553+
builder.startObject();
554+
{
555+
builder.startObject(NonGeoProcessorsPlugin.NON_GEO_PROCESSOR_TYPE);
556+
builder.endObject();
557+
}
558+
builder.endObject();
559+
builder.startObject();
560+
{
561+
builder.startObject(NonGeoProcessorsPlugin.NON_GEO_PROCESSOR_TYPE);
562+
builder.endObject();
563+
}
564+
builder.endObject();
565+
builder.startObject();
566+
{
567+
builder.startObject(NonGeoProcessorsPlugin.NON_GEO_PROCESSOR_TYPE);
568+
builder.endObject();
569+
}
570+
builder.endObject();
571+
}
572+
builder.endArray();
573+
}
574+
builder.endObject();
575+
bytes = BytesReference.bytes(builder);
576+
}
577+
assertAcked(client().admin().cluster().preparePutPipeline(pipelineId, bytes, XContentType.JSON).get());
488578
}
489579

490580
private List<Path> getGeoIpTmpDirs() throws IOException {
@@ -624,4 +714,32 @@ public int read(byte[] b, int off, int len) throws IOException {
624714
return read;
625715
}
626716
}
717+
718+
/**
719+
* This class defines a processor of type "test".
720+
*/
721+
public static final class NonGeoProcessorsPlugin extends Plugin implements IngestPlugin {
722+
public static final String NON_GEO_PROCESSOR_TYPE = "test";
723+
724+
@Override
725+
public Map<String, Processor.Factory> getProcessors(Processor.Parameters parameters) {
726+
Map<String, Processor.Factory> procMap = new HashMap<>();
727+
procMap.put(NON_GEO_PROCESSOR_TYPE, (factories, tag, description, config) -> new AbstractProcessor(tag, description) {
728+
@Override
729+
public void execute(IngestDocument ingestDocument, BiConsumer<IngestDocument, Exception> handler) {}
730+
731+
@Override
732+
public String getType() {
733+
return NON_GEO_PROCESSOR_TYPE;
734+
}
735+
736+
@Override
737+
public boolean isAsync() {
738+
return false;
739+
}
740+
741+
});
742+
return procMap;
743+
}
744+
}
627745
}

modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderStatsIT.java

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
import org.elasticsearch.xcontent.ToXContent;
2121
import org.elasticsearch.xcontent.XContentBuilder;
2222
import org.elasticsearch.xcontent.XContentFactory;
23+
import org.elasticsearch.xcontent.XContentType;
24+
import org.elasticsearch.xcontent.json.JsonXContent;
2325
import org.junit.After;
2426

2527
import java.io.IOException;
@@ -29,6 +31,7 @@
2931
import java.util.Map;
3032
import java.util.stream.Collectors;
3133

34+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
3235
import static org.elasticsearch.xcontent.ToXContent.EMPTY_PARAMS;
3336
import static org.hamcrest.Matchers.containsInAnyOrder;
3437
import static org.hamcrest.Matchers.equalTo;
@@ -80,7 +83,7 @@ public void testStats() throws Exception {
8083
assertThat(jsonMapView.get("stats.databases_count"), equalTo(0));
8184
assertThat(jsonMapView.get("stats.total_download_time"), equalTo(0));
8285
assertEquals(0, jsonMapView.<Map<String, Object>>get("nodes").size());
83-
86+
putPipeline();
8487
ClusterUpdateSettingsResponse settingsResponse = client().admin()
8588
.cluster()
8689
.prepareUpdateSettings()
@@ -108,6 +111,33 @@ public void testStats() throws Exception {
108111
});
109112
}
110113

114+
private void putPipeline() throws IOException {
115+
BytesReference bytes;
116+
try (XContentBuilder builder = JsonXContent.contentBuilder()) {
117+
builder.startObject();
118+
{
119+
builder.startArray("processors");
120+
{
121+
builder.startObject();
122+
{
123+
builder.startObject("geoip");
124+
{
125+
builder.field("field", "ip");
126+
builder.field("target_field", "ip-city");
127+
builder.field("database_file", "GeoLite2-City.mmdb");
128+
}
129+
builder.endObject();
130+
}
131+
builder.endObject();
132+
}
133+
builder.endArray();
134+
}
135+
builder.endObject();
136+
bytes = BytesReference.bytes(builder);
137+
}
138+
assertAcked(client().admin().cluster().preparePutPipeline("_id", bytes, XContentType.JSON).get());
139+
}
140+
111141
public static Map<String, Object> convertToMap(ToXContent part) throws IOException {
112142
XContentBuilder builder = XContentFactory.jsonBuilder();
113143
part.toXContent(builder, EMPTY_PARAMS);

modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTaskIT.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ public void cleanUp() throws Exception {
4848
.setPersistentSettings(
4949
Settings.builder()
5050
.putNull(GeoIpDownloaderTaskExecutor.ENABLED_SETTING.getKey())
51-
.putNull(GeoIpDownloader.POLL_INTERVAL_SETTING.getKey())
51+
.putNull(GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getKey())
5252
.putNull("ingest.geoip.database_validity")
5353
)
5454
.get()

0 commit comments

Comments
 (0)