Skip to content

Commit c22d8c2

Browse files
Complete rework on serialization and deserialization.
Signed-off-by: Yury-Fridlyand <[email protected]>
1 parent 37e7ebf commit c22d8c2

File tree

45 files changed

+608
-686
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+608
-686
lines changed
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.exception;
7+
8+
/**
9+
* This should be thrown on serialization of a PhysicalPlan tree if paging is finished.
10+
* Processing of such exception should outcome of responding no cursor to the user.
11+
*/
12+
public class NoCursorException extends RuntimeException {
13+
}

core/src/main/java/org/opensearch/sql/executor/pagination/Cursor.java

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,17 @@
77

88
import lombok.EqualsAndHashCode;
99
import lombok.Getter;
10+
import lombok.RequiredArgsConstructor;
1011

1112
@EqualsAndHashCode
13+
@RequiredArgsConstructor
1214
public class Cursor {
13-
public static final Cursor None = new Cursor();
15+
public static final Cursor None = new Cursor(null);
1416

1517
@Getter
16-
private final byte[] raw;
17-
18-
private Cursor() {
19-
raw = new byte[] {};
20-
}
21-
22-
public Cursor(byte[] raw) {
23-
this.raw = raw;
24-
}
18+
private final String data;
2519

2620
public String toString() {
27-
return new String(raw);
21+
return data;
2822
}
2923
}

core/src/main/java/org/opensearch/sql/executor/pagination/PaginatedPlanCache.java

Lines changed: 73 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,20 @@
99
import java.io.ByteArrayInputStream;
1010
import java.io.ByteArrayOutputStream;
1111
import java.io.IOException;
12-
import java.util.ArrayList;
13-
import java.util.List;
12+
import java.io.InputStream;
13+
import java.io.NotSerializableException;
14+
import java.io.ObjectInputStream;
15+
import java.io.ObjectOutputStream;
16+
import java.io.Serializable;
17+
import java.util.zip.Deflater;
1418
import java.util.zip.GZIPInputStream;
1519
import java.util.zip.GZIPOutputStream;
1620
import lombok.RequiredArgsConstructor;
1721
import org.opensearch.sql.ast.tree.UnresolvedPlan;
18-
import org.opensearch.sql.expression.NamedExpression;
19-
import org.opensearch.sql.expression.serialization.DefaultExpressionSerializer;
20-
import org.opensearch.sql.planner.physical.PaginateOperator;
22+
import org.opensearch.sql.exception.NoCursorException;
23+
import org.opensearch.sql.planner.SerializablePlan;
2124
import org.opensearch.sql.planner.physical.PhysicalPlan;
22-
import org.opensearch.sql.planner.physical.ProjectOperator;
2325
import org.opensearch.sql.storage.StorageEngine;
24-
import org.opensearch.sql.storage.TableScanOperator;
2526

2627
/**
2728
* This class is entry point to paged requests. It is responsible to cursor serialization
@@ -30,132 +31,101 @@
3031
@RequiredArgsConstructor
3132
public class PaginatedPlanCache {
3233
public static final String CURSOR_PREFIX = "n:";
33-
private final StorageEngine storageEngine;
34+
35+
private final StorageEngine engine;
3436

3537
public boolean canConvertToCursor(UnresolvedPlan plan) {
3638
return plan.accept(new CanPaginateVisitor(), null);
3739
}
3840

3941
/**
40-
* Converts a physical plan tree to a cursor. May cache plan related data somewhere.
42+
* Converts a physical plan tree to a cursor.
4143
*/
42-
public Cursor convertToCursor(PhysicalPlan plan) throws IOException {
43-
if (plan instanceof PaginateOperator) {
44-
var cursor = plan.toCursor();
45-
if (cursor == null) {
46-
return Cursor.None;
47-
}
48-
var raw = CURSOR_PREFIX + compress(cursor);
49-
return new Cursor(raw.getBytes());
44+
public Cursor convertToCursor(PhysicalPlan plan) {
45+
try {
46+
return new Cursor(CURSOR_PREFIX
47+
+ serialize(((SerializablePlan) plan).getPlanForSerialization()));
48+
// ClassCastException thrown when a plan in the tree doesn't implement SerializablePlan
49+
} catch (NotSerializableException | ClassCastException | NoCursorException e) {
50+
return Cursor.None;
5051
}
51-
return Cursor.None;
5252
}
5353

5454
/**
55-
* Compress serialized query plan.
56-
* @param str string representing a query plan
57-
* @return str compressed with gzip.
55+
* Serializes and compresses the object.
56+
* @param object The object.
57+
* @return Encoded binary data.
5858
*/
59-
String compress(String str) throws IOException {
60-
if (str == null || str.length() == 0) {
61-
return "";
59+
protected String serialize(Serializable object) throws NotSerializableException {
60+
try {
61+
ByteArrayOutputStream output = new ByteArrayOutputStream();
62+
ObjectOutputStream objectOutput = new ObjectOutputStream(output);
63+
objectOutput.writeObject(object);
64+
objectOutput.flush();
65+
66+
ByteArrayOutputStream out = new ByteArrayOutputStream();
67+
// GZIP provides 35-45%, lzma from apache commons-compress has few % better compression
68+
GZIPOutputStream gzip = new GZIPOutputStream(out) { {
69+
this.def.setLevel(Deflater.BEST_COMPRESSION);
70+
} };
71+
gzip.write(output.toByteArray());
72+
gzip.close();
73+
74+
return HashCode.fromBytes(out.toByteArray()).toString();
75+
} catch (NotSerializableException e) {
76+
throw e;
77+
} catch (IOException e) {
78+
throw new IllegalStateException("Failed to serialize: " + object, e);
6279
}
63-
ByteArrayOutputStream out = new ByteArrayOutputStream();
64-
GZIPOutputStream gzip = new GZIPOutputStream(out);
65-
gzip.write(str.getBytes());
66-
gzip.close();
67-
return HashCode.fromBytes(out.toByteArray()).toString();
6880
}
6981

7082
/**
71-
* Decompresses a query plan that was compress with {@link PaginatedPlanCache#compress}.
72-
* @param input compressed query plan
73-
* @return decompressed string
83+
* Decompresses and deserializes the binary data.
84+
* @param code Encoded binary data.
85+
* @return An object.
7486
*/
75-
String decompress(String input) throws IOException {
76-
if (input == null || input.length() == 0) {
77-
return "";
87+
protected Serializable deserialize(String code) {
88+
try {
89+
GZIPInputStream gzip = new GZIPInputStream(
90+
new ByteArrayInputStream(HashCode.fromString(code).asBytes()));
91+
ObjectInputStream objectInput = new CursorDeserializationStream(
92+
new ByteArrayInputStream(gzip.readAllBytes()));
93+
return (Serializable) objectInput.readObject();
94+
} catch (Exception e) {
95+
throw new IllegalStateException("Failed to deserialize object", e);
7896
}
79-
GZIPInputStream gzip = new GZIPInputStream(new ByteArrayInputStream(
80-
HashCode.fromString(input).asBytes()));
81-
return new String(gzip.readAllBytes());
8297
}
8398

8499
/**
85-
* Parse `NamedExpression`s from cursor.
86-
* @param listToFill List to fill with data.
87-
* @param cursor Cursor to parse.
88-
* @return Remaining part of the cursor.
100+
* Converts a cursor to a physical plan tree.
89101
*/
90-
private String parseNamedExpressions(List<NamedExpression> listToFill, String cursor) {
91-
var serializer = new DefaultExpressionSerializer();
92-
if (cursor.startsWith(")")) { //empty list
93-
return cursor.substring(cursor.indexOf(',') + 1);
94-
}
95-
while (!cursor.startsWith("(")) {
96-
listToFill.add((NamedExpression)
97-
serializer.deserialize(cursor.substring(0,
98-
Math.min(cursor.indexOf(','), cursor.indexOf(')')))));
99-
cursor = cursor.substring(cursor.indexOf(',') + 1);
100-
}
101-
return cursor;
102-
}
103-
104-
/**
105-
* Converts a cursor to a physical plan tree.
106-
*/
107102
public PhysicalPlan convertToPlan(String cursor) {
108103
if (!cursor.startsWith(CURSOR_PREFIX)) {
109104
throw new UnsupportedOperationException("Unsupported cursor");
110105
}
111106
try {
112-
cursor = cursor.substring(CURSOR_PREFIX.length());
113-
cursor = decompress(cursor);
114-
115-
// TODO Parse with ANTLR or serialize as JSON/XML
116-
if (!cursor.startsWith("(Paginate,")) {
117-
throw new UnsupportedOperationException("Unsupported cursor");
118-
}
119-
// TODO add checks for > 0
120-
cursor = cursor.substring(cursor.indexOf(',') + 1);
121-
final int currentPageIndex = Integer.parseInt(cursor, 0, cursor.indexOf(','), 10);
122-
123-
cursor = cursor.substring(cursor.indexOf(',') + 1);
124-
final int pageSize = Integer.parseInt(cursor, 0, cursor.indexOf(','), 10);
125-
126-
cursor = cursor.substring(cursor.indexOf(',') + 1);
127-
if (!cursor.startsWith("(Project,")) {
128-
throw new UnsupportedOperationException("Unsupported cursor");
129-
}
130-
cursor = cursor.substring(cursor.indexOf(',') + 1);
131-
if (!cursor.startsWith("(namedParseExpressions,")) {
132-
throw new UnsupportedOperationException("Unsupported cursor");
133-
}
134-
135-
cursor = cursor.substring(cursor.indexOf(',') + 1);
136-
List<NamedExpression> namedParseExpressions = new ArrayList<>();
137-
cursor = parseNamedExpressions(namedParseExpressions, cursor);
107+
return (PhysicalPlan) deserialize(cursor.substring(CURSOR_PREFIX.length()));
108+
} catch (Exception e) {
109+
throw new UnsupportedOperationException("Unsupported cursor", e);
110+
}
111+
}
138112

139-
List<NamedExpression> projectList = new ArrayList<>();
140-
if (!cursor.startsWith("(projectList,")) {
141-
throw new UnsupportedOperationException("Unsupported cursor");
142-
}
143-
cursor = cursor.substring(cursor.indexOf(',') + 1);
144-
cursor = parseNamedExpressions(projectList, cursor);
113+
/**
114+
* This function is used in testing only, to get access to {@link CursorDeserializationStream}.
115+
*/
116+
public CursorDeserializationStream getCursorDeserializationStream(InputStream in)
117+
throws IOException {
118+
return new CursorDeserializationStream(in);
119+
}
145120

146-
if (!cursor.startsWith("(OpenSearchPagedIndexScan,")) {
147-
throw new UnsupportedOperationException("Unsupported cursor");
148-
}
149-
cursor = cursor.substring(cursor.indexOf(',') + 1);
150-
var indexName = cursor.substring(0, cursor.indexOf(','));
151-
cursor = cursor.substring(cursor.indexOf(',') + 1);
152-
var scrollId = cursor.substring(0, cursor.indexOf(')'));
153-
TableScanOperator scan = storageEngine.getTableScan(indexName, scrollId);
121+
public class CursorDeserializationStream extends ObjectInputStream {
122+
public CursorDeserializationStream(InputStream in) throws IOException {
123+
super(in);
124+
}
154125

155-
return new PaginateOperator(new ProjectOperator(scan, projectList, namedParseExpressions),
156-
pageSize, currentPageIndex);
157-
} catch (Exception e) {
158-
throw new UnsupportedOperationException("Unsupported cursor", e);
126+
@Override
127+
public Object resolveObject(Object obj) throws IOException {
128+
return obj.equals("engine") ? engine : obj;
159129
}
160130
}
161131
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.planner;
7+
8+
import java.io.Externalizable;
9+
import java.io.IOException;
10+
import java.io.ObjectInput;
11+
import java.io.ObjectInputStream;
12+
import java.io.ObjectOutput;
13+
import org.apache.commons.lang3.NotImplementedException;
14+
import org.opensearch.sql.executor.pagination.PaginatedPlanCache;
15+
16+
/**
17+
* All subtypes of PhysicalPlan which needs to be serialized (in cursor, for pagination feature)
18+
* should follow one of the following options.
19+
* <ul>
20+
* <li>Both:
21+
* <ul>
22+
* <li>Override both methods from {@link Externalizable}.</li>
23+
* <li>Define a public no-arg constructor.</li>
24+
* </ul>
25+
* </li>
26+
* <li>
27+
* Overwrite {@link #getPlanForSerialization} to return
28+
* another instance of {@link SerializablePlan}.
29+
* </li>
30+
* </ul>
31+
*/
32+
public interface SerializablePlan extends Externalizable {
33+
34+
/**
35+
* Argument is an instance of {@link PaginatedPlanCache.CursorDeserializationStream}.
36+
*/
37+
@Override
38+
default void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
39+
throw new NotImplementedException(String.format("`readExternal` is not implemented in %s",
40+
getClass().getSimpleName()));
41+
}
42+
43+
/**
44+
* Each plan which has as a child plan should do.
45+
* <pre>{@code
46+
* out.writeObject(input.getPlanForSerialization());
47+
* }</pre>
48+
*/
49+
@Override
50+
default void writeExternal(ObjectOutput out) throws IOException {
51+
throw new NotImplementedException(String.format("`readExternal` is not implemented in %s",
52+
getClass().getSimpleName()));
53+
}
54+
55+
/**
56+
* Override to return child or delegated plan, so parent plan should skip this one
57+
* for serialization, but it should try to serialize grandchild plan.
58+
* Imagine plan structure like this
59+
* <pre>
60+
* A -> this
61+
* `- B -> child
62+
* `- C -> this
63+
* </pre>
64+
* In that case only plans A and C should be attempted to serialize.
65+
* It is needed to skip a `ResourceMonitorPlan` instance only, actually.
66+
* @return Next plan for serialization.
67+
*/
68+
default SerializablePlan getPlanForSerialization() {
69+
return this;
70+
}
71+
}

0 commit comments

Comments
 (0)