Skip to content

Commit 0eaabf4

Browse files
Yash DattaYash Datta
Yash Datta
authored and
Yash Datta
committed
PARQUET-116: Move the config object from keep method to a configure method in udp predicate
1 parent f51a431 commit 0eaabf4

File tree

9 files changed

+32
-23
lines changed

9 files changed

+32
-23
lines changed

parquet-column/src/main/java/parquet/filter2/predicate/Operators.java

+6-8
Original file line numberDiff line numberDiff line change
@@ -344,16 +344,16 @@ public static final class UserDefined<T extends Comparable<T>, U extends UserDef
344344
private final Column<T> column;
345345
private final Class<U> udpClass;
346346
private final String toString;
347-
private final S o;
347+
private final S udpConfig;
348348
private static final String INSTANTIATION_ERROR_MESSAGE =
349349
"Could not instantiate custom filter: %s. User defined predicates must be static classes with a default constructor.";
350350

351-
UserDefined(Column<T> column, Class<U> udpClass, S o) {
351+
UserDefined(Column<T> column, Class<U> udpClass, S udpConfigParam) {
352352
this.column = checkNotNull(column, "column");
353353
this.udpClass = checkNotNull(udpClass, "udpClass");
354354
String name = getClass().getSimpleName().toLowerCase();
355355
this.toString = name + "(" + column.getColumnPath().toDotString() + ", " + udpClass.getName() + ")";
356-
this.o = o;
356+
this.udpConfig = udpConfigParam;
357357

358358
// defensively try to instantiate the class early to make sure that it's possible
359359
getUserDefinedPredicate();
@@ -367,13 +367,11 @@ public Class<U> getUserDefinedPredicateClass() {
367367
return udpClass;
368368
}
369369

370-
public S getFilterObject() {
371-
return o;
372-
}
373-
374370
public U getUserDefinedPredicate() {
375371
try {
376-
return udpClass.newInstance();
372+
U udpInstance = udpClass.newInstance();
373+
udpInstance.configure(udpConfig);
374+
return udpInstance;
377375
} catch (InstantiationException e) {
378376
throw new RuntimeException(String.format(INSTANTIATION_ERROR_MESSAGE, udpClass), e);
379377
} catch (IllegalAccessException e) {

parquet-column/src/main/java/parquet/filter2/predicate/UserDefinedPredicate.java

+13-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
// TODO: downside is that's fairly unwieldy for users
1515
public abstract class UserDefinedPredicate<T extends Comparable<T>, S extends Serializable> {
1616

17+
/*
18+
* An object that can be used for filtering in the keep method
19+
*/
20+
protected S udpConfig;
1721
/**
1822
* A udp must have a default constructor.
1923
* The udp passed to {@link FilterApi} will not be serialized along with its state.
@@ -22,11 +26,19 @@ public abstract class UserDefinedPredicate<T extends Comparable<T>, S extends Se
2226
*/
2327
public UserDefinedPredicate() { }
2428

29+
/*
30+
* This method is used to set the object that is used in the keep method for filtering.
31+
* Called before returning the new instance of this class.
32+
*/
33+
public void configure(S udpConfigParam) {
34+
this.udpConfig = udpConfigParam;
35+
}
36+
2537
/**
2638
* Return true to keep the record with this value, false to drop it.
2739
* o is a filter object that can be used for filtering the value.
2840
*/
29-
public abstract boolean keep(T value, S o);
41+
public abstract boolean keep(T value);
3042

3143
/**
3244
* Given information about a group of records (eg, the min and max value)

parquet-column/src/test/java/parquet/filter2/predicate/DummyUdp.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
public class DummyUdp extends UserDefinedPredicate<Integer, Serializable> {
66

77
@Override
8-
public boolean keep(Integer value, Serializable o) {
8+
public boolean keep(Integer value) {
99
return false;
1010
}
1111

parquet-column/src/test/java/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ public class TestSchemaCompatibilityValidator {
5353

5454
static class LongDummyUdp extends UserDefinedPredicate<Long, Serializable> {
5555
@Override
56-
public boolean keep(Long value, Serializable o) {
56+
public boolean keep(Long value) {
5757
return false;
5858
}
5959

parquet-generator/src/main/java/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java

+2-4
Original file line numberDiff line numberDiff line change
@@ -222,8 +222,6 @@ private void addUdpBegin() throws IOException {
222222
" ValueInspector valueInspector = null;\n" +
223223
"\n" +
224224
" final U udp = pred.getUserDefinedPredicate();\n" +
225-
"\n" +
226-
" final S o = pred.getFilterObject();\n" +
227225
"\n");
228226
}
229227

@@ -232,13 +230,13 @@ private void addUdpCase(TypeInfo info, boolean invert)throws IOException {
232230
" valueInspector = new ValueInspector() {\n" +
233231
" @Override\n" +
234232
" public void updateNull() {\n" +
235-
" setResult(" + (invert ? "!" : "") + "udp.keep(null, o));\n" +
233+
" setResult(" + (invert ? "!" : "") + "udp.keep(null));\n" +
236234
" }\n" +
237235
"\n" +
238236
" @SuppressWarnings(\"unchecked\")\n" +
239237
" @Override\n" +
240238
" public void update(" + info.primitiveName + " value) {\n" +
241-
" setResult(" + (invert ? "!" : "") + "udp.keep((T) (Object) value, o));\n" +
239+
" setResult(" + (invert ? "!" : "") + "udp.keep((T) (Object) value));\n" +
242240
" }\n" +
243241
" };\n" +
244242
" }\n\n");

parquet-hadoop/src/main/java/parquet/filter2/statisticslevel/StatisticsFilter.java

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package parquet.filter2.statisticslevel;
22

3+
import java.io.Serializable;
34
import java.util.HashMap;
45
import java.util.List;
56
import java.util.Map;
@@ -216,7 +217,7 @@ public Boolean visit(Not not) {
216217
"This predicate contains a not! Did you forget to run this predicate through LogicalInverseRewriter? " + not);
217218
}
218219

219-
private <T extends Comparable<T>, U extends UserDefinedPredicate<T>> Boolean visit(UserDefined<T, U> ud, boolean inverted) {
220+
private <T extends Comparable<T>, U extends UserDefinedPredicate<T, S>, S extends Serializable> Boolean visit(UserDefined<T, U, S> ud, boolean inverted) {
220221
Column<T> filterColumn = ud.getColumn();
221222
ColumnChunkMetaData columnChunk = getColumnChunk(filterColumn.getColumnPath());
222223
U udp = ud.getUserDefinedPredicate();
@@ -232,12 +233,12 @@ private <T extends Comparable<T>, U extends UserDefinedPredicate<T>> Boolean vis
232233
}
233234

234235
@Override
235-
public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> Boolean visit(UserDefined<T, U> ud) {
236+
public <T extends Comparable<T>, U extends UserDefinedPredicate<T, S>, S extends Serializable> Boolean visit(UserDefined<T, U, S> ud) {
236237
return visit(ud, false);
237238
}
238239

239240
@Override
240-
public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> Boolean visit(LogicalNotUserDefined<T, U> lnud) {
241+
public <T extends Comparable<T>, U extends UserDefinedPredicate<T, S>, S extends Serializable> Boolean visit(LogicalNotUserDefined<T, U, S> lnud) {
241242
return visit(lnud.getUserDefined(), true);
242243
}
243244

parquet-hadoop/src/test/java/parquet/filter2/recordlevel/TestRecordLevelFilters.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ public boolean keep(User u) {
148148
public static class StartWithP extends UserDefinedPredicate<Binary, Serializable> {
149149

150150
@Override
151-
public boolean keep(Binary value, Serializable o) {
151+
public boolean keep(Binary value) {
152152
if (value == null) {
153153
return false;
154154
}
@@ -169,12 +169,12 @@ public boolean inverseCanDrop(Statistics<Binary> statistics) {
169169
public static class SetInFilter extends UserDefinedPredicate<Long, HashSet<Long>> {
170170

171171
@Override
172-
public boolean keep(Long value, HashSet o) {
172+
public boolean keep(Long value) {
173173
if (value == null) {
174174
return false;
175175
}
176176

177-
return o.contains(value);
177+
return udpConfig.contains(value);
178178
}
179179

180180
@Override

parquet-hadoop/src/test/java/parquet/filter2/statisticslevel/TestStatisticsFilter.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ public void testOr() {
223223
public static class SevensAndEightsUdp extends UserDefinedPredicate<Integer, Serializable> {
224224

225225
@Override
226-
public boolean keep(Integer value, Serializable o) {
226+
public boolean keep(Integer value) {
227227
throw new RuntimeException("this method should not be called");
228228
}
229229

parquet-scala/src/test/scala/parquet/filter2/dsl/DslTest.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import parquet.filter2.predicate.Operators.{Or, UserDefined, DoubleColumn => JDo
1010
import parquet.filter2.predicate.{FilterApi, Statistics, UserDefinedPredicate}
1111

1212
class DummyFilter extends UserDefinedPredicate[JInt, java.io.Serializable] {
13-
override def keep(value: JInt, o: java.io.Serializable): Boolean = false
13+
override def keep(value: JInt): Boolean = false
1414

1515
override def canDrop(statistics: Statistics[JInt]): Boolean = false
1616

0 commit comments

Comments
 (0)