Skip to content

Commit ca47cd2

Browse files
p365labsruflin
authored andcommitted
Removed mapper-attachments plugin. Now use the ingest-attachment plugin (#1375)
[Mapper Attachment plugin has been removed](elastic/elasticsearch#20416) Use Ingest-attachment plugin and attachment processors with pipeline to ingest new documents. the flow for ingesting a file into Elasticsearch changed a bit : - u should create a Pipeline with an *Attachment Processor* - add a file to the document - ad the document to the index using a query string param of this format : **pipeline=name_of_the_pipeline**
1 parent 6a696f0 commit ca47cd2

File tree

3 files changed

+195
-166
lines changed

3 files changed

+195
-166
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ All notable changes to this project will be documented in this file based on the
2828
- The disable_coord parameter of the bool and common_terms queries has been removed. If provided, it will be ignored and issue a deprecation warning. [#1369](https://github.com/ruflin/Elastica/pull/1369)
2929
- [Unfiltered nested source](https://github.com/elastic/elasticsearch/pull/26102) should keep its full path [#1366](https://github.com/ruflin/Elastica/pull/1366)
3030
- [Analyze Explain](https://www.elastic.co/guide/en/elasticsearch/reference/6.0/_explain_analyze.html) no more support [request parameters](https://www.elastic.co/guide/en/elasticsearch/reference/5.5/indices-analyze.html), use request body instead. [#1370](https://github.com/ruflin/Elastica/pull/1370)
31+
- [Mapper Attachment plugin has been removed](https://github.com/elastic/elasticsearch/pull/20416) Use Ingest-attachment plugin and attachment processors with pipeline to ingest new documents. [#1375](https://github.com/ruflin/Elastica/pull/1375)
3132

3233
### Bugfixes
3334
- Enforce [Content-Type requirement on the layer Rest](https://github.com/elastic/elasticsearch/pull/23146), a [PR on Elastica #1301](https://github.com/ruflin/Elastica/issues/1301) solved it (it has been implemented only in the HTTP Transport), but it was not implemented in the Guzzle Transport. [#1349](https://github.com/ruflin/Elastica/pull/1349)

test/Elastica/IndexTest.php

-166
Original file line numberDiff line numberDiff line change
@@ -118,172 +118,6 @@ public function testParent()
118118
$this->assertEquals(['title' => 'Foo bar'], $resultSet->current()->getData());
119119
}
120120

121-
/**
122-
* @group functional
123-
*/
124-
public function testAddPdfFile()
125-
{
126-
$this->markTestSkipped('ES6 update: use ingest attachment : No handler for type [attachment] declared on field [file]');
127-
$indexMapping = ['file' => ['type' => 'attachment'], 'text' => ['type' => 'text']];
128-
129-
$indexParams = ['index' => ['number_of_shards' => 1, 'number_of_replicas' => 0]];
130-
131-
$index = $this->_createIndex();
132-
$type = new Type($index, 'test');
133-
134-
$index->create($indexParams, true);
135-
$type->setMapping($indexMapping);
136-
137-
$doc1 = new Document(1);
138-
$doc1->addFile('file', BASE_PATH.'/data/test.pdf', 'application/pdf');
139-
$doc1->set('text', 'basel world');
140-
$type->addDocument($doc1);
141-
142-
$doc2 = new Document(2);
143-
$doc2->set('text', 'running in basel');
144-
$type->addDocument($doc2);
145-
146-
$index->forcemerge();
147-
148-
$resultSet = $type->search('xodoa');
149-
$this->assertEquals(1, $resultSet->count());
150-
151-
$resultSet = $type->search('basel');
152-
$this->assertEquals(2, $resultSet->count());
153-
154-
// Author is ruflin
155-
$resultSet = $type->search('ruflin');
156-
$this->assertEquals(1, $resultSet->count());
157-
158-
// String does not exist in file
159-
$resultSet = $type->search('guschti');
160-
$this->assertEquals(0, $resultSet->count());
161-
}
162-
163-
/**
164-
* @group functional
165-
*/
166-
public function testAddPdfFileContent()
167-
{
168-
$this->markTestSkipped('ES6 update: use ingest attachment : No handler for type [attachment] declared on field [file]');
169-
$indexMapping = ['file' => ['type' => 'attachment'], 'text' => ['type' => 'text']];
170-
171-
$indexParams = ['index' => ['number_of_shards' => 1, 'number_of_replicas' => 0]];
172-
173-
$index = $this->_createIndex();
174-
$type = new Type($index, 'test');
175-
176-
$index->create($indexParams, true);
177-
$type->setMapping($indexMapping);
178-
179-
$doc1 = new Document(1);
180-
$doc1->addFileContent('file', file_get_contents(BASE_PATH.'/data/test.pdf'));
181-
$doc1->set('text', 'basel world');
182-
$type->addDocument($doc1);
183-
184-
$doc2 = new Document(2);
185-
$doc2->set('text', 'running in basel');
186-
$type->addDocument($doc2);
187-
188-
$index->forcemerge();
189-
190-
$resultSet = $type->search('xodoa');
191-
$this->assertEquals(1, $resultSet->count());
192-
193-
$resultSet = $type->search('basel');
194-
$this->assertEquals(2, $resultSet->count());
195-
196-
// Author is ruflin
197-
$resultSet = $type->search('ruflin');
198-
$this->assertEquals(1, $resultSet->count());
199-
200-
// String does not exist in file
201-
$resultSet = $type->search('guschti');
202-
$this->assertEquals(0, $resultSet->count());
203-
}
204-
205-
/**
206-
* @group functional
207-
*/
208-
public function testAddWordxFile()
209-
{
210-
$this->markTestSkipped('ES6 update: use ingest attachment : No handler for type [attachment] declared on field [file]');
211-
$indexMapping = ['file' => ['type' => 'attachment'], 'text' => ['type' => 'text']];
212-
213-
$indexParams = ['index' => ['number_of_shards' => 1, 'number_of_replicas' => 0]];
214-
215-
$index = $this->_createIndex();
216-
$type = new Type($index, 'content');
217-
218-
$index->create($indexParams, true);
219-
$type->setMapping($indexMapping);
220-
221-
$doc1 = new Document(1);
222-
$doc1->addFile('file', BASE_PATH.'/data/test.docx');
223-
$doc1->set('text', 'basel world');
224-
$type->addDocument($doc1);
225-
226-
$index->forcemerge();
227-
$index->refresh();
228-
229-
$doc2 = new Document(2);
230-
$doc2->set('text', 'running in basel');
231-
$type->addDocument($doc2);
232-
233-
$index->forcemerge();
234-
$index->refresh();
235-
236-
$resultSet = $type->search('basel');
237-
$this->assertEquals(2, $resultSet->count());
238-
239-
$resultSet = $type->search('ruflin');
240-
$this->assertEquals(0, $resultSet->count());
241-
242-
$resultSet = $type->search('Xodoa');
243-
$this->assertEquals(1, $resultSet->count());
244-
}
245-
246-
/**
247-
* @group functional
248-
*/
249-
public function testExcludeFileSource()
250-
{
251-
$this->markTestSkipped('ES6 update: use ingest attachment : No handler for type [attachment] declared on field [file]');
252-
$indexMapping = ['file' => ['type' => 'attachment'], 'text' => ['type' => 'text', 'store' => true],
253-
'title' => ['type' => 'text', 'store' => true], ];
254-
255-
$indexParams = ['index' => ['number_of_shards' => 1, 'number_of_replicas' => 0]];
256-
257-
$index = $this->_createIndex();
258-
$type = new Type($index, 'content');
259-
260-
$mapping = Mapping::create($indexMapping);
261-
$mapping->setSource(['excludes' => ['file']]);
262-
263-
$mapping->setType($type);
264-
265-
$index->create($indexParams, true);
266-
$type->setMapping($mapping);
267-
268-
$docId = 1;
269-
$text = 'Basel World';
270-
$title = 'No Title';
271-
272-
$doc1 = new Document($docId);
273-
$doc1->addFile('file', BASE_PATH.'/data/test.docx');
274-
$doc1->set('text', $text);
275-
$doc1->set('title', $title);
276-
$type->addDocument($doc1);
277-
278-
// Optimization necessary, as otherwise source still in realtime get
279-
$index->forcemerge();
280-
281-
$data = $type->getDocument($docId)->getData();
282-
$this->assertEquals($data['title'], $title);
283-
$this->assertEquals($data['text'], $text);
284-
$this->assertFalse(isset($data['file']));
285-
}
286-
287121
/**
288122
* @group functional
289123
* @expectedException \Elastica\Exception\ResponseException

test/Elastica/Processor/AttachmentTest.php

+194
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
<?php
22
namespace Elastica\Test\Processor;
33

4+
use Elastica\Bulk;
5+
use Elastica\Document;
46
use Elastica\Processor\Attachment;
57
use Elastica\Test\BasePipeline as BasePipelineTest;
8+
use Elastica\Type;
69

710
class AttachmentTest extends BasePipelineTest
811
{
@@ -45,4 +48,195 @@ public function testAttachmentWithNonDefaultOptions()
4548

4649
$this->assertEquals($expected, $processor->toArray());
4750
}
51+
52+
/**
53+
* @group functional
54+
*/
55+
public function testAttachmentAddPdf()
56+
{
57+
$attachment = new Attachment('data');
58+
$pipeline = $this->_createPipeline('my_custom_pipeline_attachment', 'pipeline for Attachment');
59+
$pipeline->addProcessor($attachment);
60+
$pipeline->create();
61+
62+
$index = $this->_createIndex();
63+
$type = $index->getType('bulk_test');
64+
65+
$bulk = new Bulk($index->getClient());
66+
$bulk->setIndex($index);
67+
$bulk->setType($type);
68+
69+
$doc1 = new Document(null);
70+
$doc1->addFile('data', BASE_PATH.'/data/test.pdf');
71+
72+
$doc2 = new Document(2, ['data' => '', 'text' => 'test running in basel']);
73+
74+
$bulk->addDocuments([
75+
$doc1, $doc2
76+
]);
77+
$bulk->setRequestParam('pipeline', 'my_custom_pipeline_attachment');
78+
79+
$bulk->send();
80+
$index->refresh();
81+
82+
$resultSet = $type->search('xodoa');
83+
$this->assertEquals(1, $resultSet->count());
84+
85+
$resultSet = $type->search('test');
86+
$this->assertEquals(2, $resultSet->count());
87+
88+
// Author is ruflin
89+
$resultSet = $type->search('ruflin');
90+
$this->assertEquals(1, $resultSet->count());
91+
92+
// String does not exist in file
93+
$resultSet = $type->search('guschti');
94+
$this->assertEquals(0, $resultSet->count());
95+
}
96+
97+
/**
98+
* @group functional
99+
*/
100+
public function testAttachmentAddPdfFileContent()
101+
{
102+
$attachment = new Attachment('data');
103+
$pipeline = $this->_createPipeline('my_custom_pipeline_attachment', 'pipeline for Attachment');
104+
$pipeline->addProcessor($attachment);
105+
$pipeline->create();
106+
107+
$index = $this->_createIndex();
108+
$type = $index->getType('bulk_test');
109+
110+
$bulk = new Bulk($index->getClient());
111+
$bulk->setIndex($index);
112+
$bulk->setType($type);
113+
114+
$doc1 = new Document(null);
115+
$doc1->addFile('data', BASE_PATH.'/data/test.pdf');
116+
$doc1->set('text', 'basel world');
117+
118+
$doc2 = new Document(2, ['data' => '', 'text' => 'test running in basel']);
119+
$doc2->set('text', 'running in basel');
120+
121+
$bulk->addDocuments([
122+
$doc1, $doc2
123+
]);
124+
$bulk->setRequestParam('pipeline', 'my_custom_pipeline_attachment');
125+
126+
$bulk->send();
127+
$index->forcemerge();
128+
129+
$resultSet = $type->search('xodoa');
130+
$this->assertEquals(1, $resultSet->count());
131+
132+
$resultSet = $type->search('basel');
133+
$this->assertEquals(2, $resultSet->count());
134+
135+
// Author is ruflin
136+
$resultSet = $type->search('ruflin');
137+
$this->assertEquals(1, $resultSet->count());
138+
139+
// String does not exist in file
140+
$resultSet = $type->search('guschti');
141+
$this->assertEquals(0, $resultSet->count());
142+
}
143+
144+
/**
145+
* @group functional
146+
*/
147+
public function testAddWordxFile()
148+
{
149+
$attachment = new Attachment('data');
150+
$pipeline = $this->_createPipeline('my_custom_pipeline_attachment', 'pipeline for Attachment');
151+
$pipeline->addProcessor($attachment);
152+
$pipeline->create();
153+
154+
$index = $this->_createIndex();
155+
$type = $index->getType('bulk_test');
156+
157+
$bulk = new Bulk($index->getClient());
158+
$bulk->setIndex($index);
159+
$bulk->setType($type);
160+
161+
$doc1 = new Document(null);
162+
$doc1->addFile('data', BASE_PATH.'/data/test.docx');
163+
$doc1->set('text', 'basel world');
164+
165+
$doc2 = new Document(2, ['data' => '', 'text' => 'test running in basel']);
166+
167+
$bulk->addDocuments([
168+
$doc1, $doc2
169+
]);
170+
$bulk->setRequestParam('pipeline', 'my_custom_pipeline_attachment');
171+
172+
$bulk->send();
173+
$index->refresh();
174+
175+
$resultSet = $type->search('basel');
176+
$this->assertEquals(2, $resultSet->count());
177+
178+
$resultSet = $type->search('ruflin');
179+
$this->assertEquals(0, $resultSet->count());
180+
181+
$resultSet = $type->search('Xodoa');
182+
$this->assertEquals(1, $resultSet->count());
183+
184+
// String does not exist in file
185+
$resultSet = $type->search('guschti');
186+
$this->assertEquals(0, $resultSet->count());
187+
}
188+
189+
/**
190+
* @group functional
191+
*/
192+
public function testExcludeFileSource()
193+
{
194+
$attachment = new Attachment('data');
195+
$pipeline = $this->_createPipeline('my_custom_pipeline_attachment', 'pipeline for Attachment');
196+
$pipeline->addProcessor($attachment);
197+
$pipeline->create();
198+
199+
$indexMapping = ['data' => ['type' => 'text'], 'text' => ['type' => 'text', 'store' => true],
200+
'title' => ['type' => 'text', 'store' => true], ];
201+
202+
$indexParams = ['index' => ['number_of_shards' => 1, 'number_of_replicas' => 0]];
203+
204+
$index = $this->_createIndex();
205+
$type = new Type($index, 'content');
206+
207+
$mapping = Type\Mapping::create($indexMapping);
208+
$mapping->setSource(['excludes' => ['data']]);
209+
210+
$mapping->setType($type);
211+
212+
$index->create($indexParams, true);
213+
$type->setMapping($mapping);
214+
215+
$docId = 1;
216+
$text = 'Basel World';
217+
$title = 'No Title';
218+
219+
$doc1 = new Document($docId);
220+
$doc1->set('text', $text);
221+
$doc1->set('title', $title);
222+
$doc1->addFile('data', BASE_PATH.'/data/test.docx');
223+
224+
$bulk = new Bulk($index->getClient());
225+
$bulk->setIndex($index);
226+
$bulk->setType($type);
227+
228+
$bulk->addDocuments([
229+
$doc1
230+
]);
231+
$bulk->setRequestParam('pipeline', 'my_custom_pipeline_attachment');
232+
233+
// Optimization necessary, as otherwise source still in realtime get
234+
$bulk->send();
235+
$index->forcemerge();
236+
237+
$data = $type->getDocument($docId)->getData();
238+
$this->assertEquals($data['title'], $title);
239+
$this->assertEquals($data['text'], $text);
240+
$this->assertFalse(isset($data['file']));
241+
}
48242
}

0 commit comments

Comments
 (0)