Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,13 @@

package org.elasticsearch.client;

import org.apache.http.HttpEntity;
import org.apache.http.client.methods.HttpDelete;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpPut;
import org.apache.http.entity.ByteArrayEntity;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.client.RequestConverters.EndpointBuilder;
import org.elasticsearch.client.ml.CloseJobRequest;
import org.elasticsearch.client.ml.DeleteJobRequest;
Expand All @@ -34,12 +37,15 @@
import org.elasticsearch.client.ml.GetOverallBucketsRequest;
import org.elasticsearch.client.ml.GetRecordsRequest;
import org.elasticsearch.client.ml.OpenJobRequest;
import org.elasticsearch.client.ml.PostDataRequest;
import org.elasticsearch.client.ml.PutJobRequest;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;

import java.io.IOException;

import static org.elasticsearch.client.RequestConverters.REQUEST_BODY_CONTENT_TYPE;
import static org.elasticsearch.client.RequestConverters.createContentType;
import static org.elasticsearch.client.RequestConverters.createEntity;

final class MLRequestConverters {
Expand Down Expand Up @@ -188,6 +194,35 @@ static Request getRecords(GetRecordsRequest getRecordsRequest) throws IOExceptio
return request;
}

static Request postData(PostDataRequest postDataRequest) throws IOException {
String endpoint = new EndpointBuilder()
.addPathPartAsIs("_xpack")
.addPathPartAsIs("ml")
.addPathPartAsIs("anomaly_detectors")
.addPathPart(postDataRequest.getJobId())
.addPathPartAsIs("_data")
.build();
Request request = new Request(HttpPost.METHOD_NAME, endpoint);

RequestConverters.Params params = new RequestConverters.Params(request);
if (postDataRequest.getResetStart() != null) {
params.putParam(PostDataRequest.RESET_START.getPreferredName(), postDataRequest.getResetStart());
}
if (postDataRequest.getResetEnd() != null) {
params.putParam(PostDataRequest.RESET_END.getPreferredName(), postDataRequest.getResetEnd());
}
BytesReference content = postDataRequest.getContent();
if (content != null) {
BytesRef source = postDataRequest.getContent().toBytesRef();
HttpEntity byteEntity = new ByteArrayEntity(source.bytes,
source.offset,
source.length,
createContentType(postDataRequest.getXContentType()));
request.setEntity(byteEntity);
}
return request;
}

static Request getInfluencers(GetInfluencersRequest getInfluencersRequest) throws IOException {
String endpoint = new EndpointBuilder()
.addPathPartAsIs("_xpack")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
package org.elasticsearch.client;

import org.elasticsearch.action.ActionListener;
import org.elasticsearch.client.ml.PostDataRequest;
import org.elasticsearch.client.ml.PostDataResponse;
import org.elasticsearch.client.ml.CloseJobRequest;
import org.elasticsearch.client.ml.CloseJobResponse;
import org.elasticsearch.client.ml.DeleteJobRequest;
Expand Down Expand Up @@ -467,6 +469,52 @@ public void getRecordsAsync(GetRecordsRequest request, RequestOptions options, A
Collections.emptySet());
}

/**
* Sends data to an anomaly detection job for analysis.
*
* NOTE: The job must have a state of open to receive and process the data.
*
* <p>
* For additional info
* see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-post-data.html">ML POST Data documentation</a>
* </p>
*
* @param request PostDataRequest containing the data to post and some additional options
* @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
* @return response containing operational progress about the job
* @throws IOException when there is a serialization issue sending the request or receiving the response
*/
public PostDataResponse postData(PostDataRequest request, RequestOptions options) throws IOException {
return restHighLevelClient.performRequestAndParseEntity(request,
MLRequestConverters::postData,
options,
PostDataResponse::fromXContent,
Collections.emptySet());
}

/**
* Sends data to an anomaly detection job for analysis, asynchronously
*
* NOTE: The job must have a state of open to receive and process the data.
*
* <p>
* For additional info
* see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-post-data.html">ML POST Data documentation</a>
* </p>
*
* @param request PostDataRequest containing the data to post and some additional options
* @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
* @param listener Listener to be notified upon request completion
*/
public void postDataAsync(PostDataRequest request, RequestOptions options, ActionListener<PostDataResponse> listener) {
restHighLevelClient.performRequestAsyncAndParseEntity(request,
MLRequestConverters::postData,
options,
PostDataResponse::fromXContent,
listener,
Collections.emptySet());
}

/**
* Gets the influencers for a Machine Learning Job.
* <p>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml;

import org.elasticsearch.action.ActionRequest;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.client.ml.job.config.Job;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.ToXContentObject;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentType;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;

/**
* POJO for posting data to a Machine Learning job
*/
public class PostDataRequest extends ActionRequest implements ToXContentObject {

public static final ParseField RESET_START = new ParseField("reset_start");
public static final ParseField RESET_END = new ParseField("reset_end");
public static final ParseField CONTENT_TYPE = new ParseField("content_type");

public static final ConstructingObjectParser<PostDataRequest, Void> PARSER =
new ConstructingObjectParser<>("post_data_request",
(a) -> new PostDataRequest((String)a[0], XContentType.fromMediaTypeOrFormat((String)a[1])));

static {
PARSER.declareString(ConstructingObjectParser.constructorArg(), Job.ID);
PARSER.declareString(ConstructingObjectParser.constructorArg(), CONTENT_TYPE);
PARSER.declareString(PostDataRequest::setResetEnd, RESET_END);
PARSER.declareString(PostDataRequest::setResetStart, RESET_START);
}

private final String jobId;
private final XContentType xContentType;
private final List<BytesReference> bytesReferences;
private final List<Map<String, Object>> objectMaps;
private String resetStart;
private String resetEnd;
private BytesReference content;

/**
* PostDataRequest for sending data in JSON format
* @param jobId non-null jobId of the job to post data to
*/
public static PostDataRequest postJsonDataRequest(String jobId) {
return new PostDataRequest(jobId, XContentType.JSON);
}

/**
* PostDataRequest for sending data in SMILE format
* @param jobId non-null jobId of the job to post data to
*/
public static PostDataRequest postSmileDataRequest(String jobId) {
return new PostDataRequest(jobId, XContentType.SMILE);
}

/**
* Create a new PostDataRequest object
*
* @param jobId non-null jobId of the job to post data to
* @param xContentType content type of the data to post. Only {@link XContentType#JSON} {@link XContentType#SMILE} are supported
*/
public PostDataRequest(String jobId, XContentType xContentType) {
this.jobId = Objects.requireNonNull(jobId, "job_id must not be null");
this.xContentType = Objects.requireNonNull(xContentType, "content_type must not be null");
this.bytesReferences = new ArrayList<>();
this.objectMaps = new ArrayList<>();
}

public String getJobId() {
return jobId;
}

public String getResetStart() {
return resetStart;
}

/**
* Specifies the start of the bucket resetting range
*
* @param resetStart String representation of a timestamp; may be an epoch seconds, epoch millis or an ISO string
*/
public void setResetStart(String resetStart) {
this.resetStart = resetStart;
}

public String getResetEnd() {
return resetEnd;
}

/**
* Specifies the end of the bucket resetting range
*
* @param resetEnd String representation of a timestamp; may be an epoch seconds, epoch millis or an ISO string

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be best to say ISO 8601 instead of just ISO.

Also, this can be set to the string "now".

*/
public void setResetEnd(String resetEnd) {
this.resetEnd = resetEnd;
}

/**
* Gets the transformed content to post.
*
* This combines both documents added through {@link PostDataRequest#addDoc(BytesReference)} and {@link PostDataRequest#addDoc(Map)}
* into a single BytesReference object according to the set XContentType for bulk submission
*
* If content was set via {@link PostDataRequest#setContent(BytesReference)}, then simply that content is returned.
*
* @throws IOException on parsing/serialization errors
*/
public BytesReference getContent() throws IOException {
if (content != null) {
return content;
}
try (XContentBuilder builder = XContentBuilder.builder(xContentType.xContent())) {
for (BytesReference bytesReference : bytesReferences) {
try (StreamInput streamInput = bytesReference.streamInput()) {
builder.rawValue(streamInput, xContentType);
}
}
for (Map<String, Object> objectMap : objectMaps) {
builder.map(objectMap);
}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a problem here because if data has been added as a mixture of bytes references and maps then all the maps will be sent after all the bytes references, and that could mean the data is not sent in ascending time order.

Two possible solutions I can think of are:

  1. Document that all data must be supplied in the same format - either bytes references or maps - and enforce this in the addDoc() methods.
  2. Convert the maps to bytes references in addDoc() so that there are only bytes references being stored.

This also shows that we should say somewhere in the Javadocs that docs will be processed by the job in the order they're added to the request and that therefore they should be added to the request in ascending time order.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking of doing option 2 and simply calling the bytesReferences overload of addDoc from the object map one.

The downside of serializing down to simply use the BytesReference overload is performance. Though, I may be preoptimizing here.

return BytesReference.bytes(builder);
}
}

public XContentType getXContentType() {
return xContentType;
}

/**
* Set the total content to post.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add again that this takes precedence over any individual docs already added.

It could also .clear() the lists of individual docs.

An alternative to consider would be to throw an exception if individual docs have already been added. It seems like client code is badly designed if it adds individual docs then wipes them out by overriding with externally formatted content.

*
* @param content BytesReference content to set, format must match the set XContentType
*/
public void setContent(BytesReference content) {
this.content = content;
}

/**
* Add a document via a ByteReference.
*
* Ignored if total content is set via {@link PostDataRequest#setContent(BytesReference)}
*
* @param bytesReference document to add to bulk request, format must match the set XContentType
*/
public void addDoc(BytesReference bytesReference) {
this.bytesReferences.add(Objects.requireNonNull(bytesReference, "bytesReferences must not be null"));

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we throw an exception if content != null? Or otherwise return early to ignore as the Javadoc says.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good thought, Since they set the the whole bulk content earlier, there is no need to continue to collect the individual docs.

}

/**
* Add a document via an object map
*
* Ignored if total content is set via {@link PostDataRequest#setContent(BytesReference)}
*
* @param objectMap document object to add to bulk request
*/
public void addDoc(Map<String, Object> objectMap) {
this.objectMaps.add(Objects.requireNonNull(objectMap, "objectMap must not be null"));

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we throw an exception if content != null? Or otherwise return early to ignore as the Javadoc says.

}

@Override
public int hashCode() {
return Objects.hash(jobId, resetStart, resetEnd, xContentType);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be good to add a comment that content is deliberately left out as it is on the server side too. (I'm not convinced that was a good decision as it means two radically different posts can be the equal, but we are where we are.) But at least by having a comment it avoids someone adding it in in one place but not the other in the future.

Same for the comparison in equals() below.

}

@Override
public boolean equals(Object obj) {
if(obj == this) {
return true;
}

if (obj == null || getClass() != obj.getClass()) {
return false;
}

PostDataRequest other = (PostDataRequest) obj;
return Objects.equals(jobId, other.jobId) &&
Objects.equals(resetStart, other.resetStart) &&
Objects.equals(resetEnd, other.resetEnd) &&
Objects.equals(xContentType, other.xContentType);
}

@Override
public ActionRequestValidationException validate() {
return null;
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(Job.ID.getPreferredName(), jobId);
builder.field(CONTENT_TYPE.getPreferredName(), xContentType.mediaType());
builder.field(RESET_END.getPreferredName(), resetEnd);
builder.field(RESET_START.getPreferredName(), resetStart);
builder.endObject();
return builder;
}
}
Loading