From 0eaabf45b91867e85e4d3aba9aee6aade3426a7d Mon Sep 17 00:00:00 2001 From: Yash Datta Date: Fri, 31 Oct 2014 12:21:49 +0530 Subject: [PATCH] PARQUET-116: Move the config object from keep method to a configure method in udp predicate --- .../java/parquet/filter2/predicate/Operators.java | 14 ++++++-------- .../filter2/predicate/UserDefinedPredicate.java | 14 +++++++++++++- .../java/parquet/filter2/predicate/DummyUdp.java | 2 +- .../TestSchemaCompatibilityValidator.java | 2 +- ...crementallyUpdatedFilterPredicateGenerator.java | 6 ++---- .../filter2/statisticslevel/StatisticsFilter.java | 7 ++++--- .../recordlevel/TestRecordLevelFilters.java | 6 +++--- .../statisticslevel/TestStatisticsFilter.java | 2 +- .../test/scala/parquet/filter2/dsl/DslTest.scala | 2 +- 9 files changed, 32 insertions(+), 23 deletions(-) diff --git a/parquet-column/src/main/java/parquet/filter2/predicate/Operators.java b/parquet-column/src/main/java/parquet/filter2/predicate/Operators.java index f65ffa1573..740c6ea48c 100644 --- a/parquet-column/src/main/java/parquet/filter2/predicate/Operators.java +++ b/parquet-column/src/main/java/parquet/filter2/predicate/Operators.java @@ -344,16 +344,16 @@ public static final class UserDefined, U extends UserDef private final Column column; private final Class udpClass; private final String toString; - private final S o; + private final S udpConfig; private static final String INSTANTIATION_ERROR_MESSAGE = "Could not instantiate custom filter: %s. User defined predicates must be static classes with a default constructor."; - UserDefined(Column column, Class udpClass, S o) { + UserDefined(Column column, Class udpClass, S udpConfigParam) { this.column = checkNotNull(column, "column"); this.udpClass = checkNotNull(udpClass, "udpClass"); String name = getClass().getSimpleName().toLowerCase(); this.toString = name + "(" + column.getColumnPath().toDotString() + ", " + udpClass.getName() + ")"; - this.o = o; + this.udpConfig = udpConfigParam; // defensively try to instantiate the class early to make sure that it's possible getUserDefinedPredicate(); @@ -367,13 +367,11 @@ public Class getUserDefinedPredicateClass() { return udpClass; } - public S getFilterObject() { - return o; - } - public U getUserDefinedPredicate() { try { - return udpClass.newInstance(); + U udpInstance = udpClass.newInstance(); + udpInstance.configure(udpConfig); + return udpInstance; } catch (InstantiationException e) { throw new RuntimeException(String.format(INSTANTIATION_ERROR_MESSAGE, udpClass), e); } catch (IllegalAccessException e) { diff --git a/parquet-column/src/main/java/parquet/filter2/predicate/UserDefinedPredicate.java b/parquet-column/src/main/java/parquet/filter2/predicate/UserDefinedPredicate.java index 2b6da4b56d..552363a6ff 100644 --- a/parquet-column/src/main/java/parquet/filter2/predicate/UserDefinedPredicate.java +++ b/parquet-column/src/main/java/parquet/filter2/predicate/UserDefinedPredicate.java @@ -14,6 +14,10 @@ // TODO: downside is that's fairly unwieldy for users public abstract class UserDefinedPredicate, S extends Serializable> { + /* + * An object that can be used for filtering in the keep method + */ + protected S udpConfig; /** * A udp must have a default constructor. * The udp passed to {@link FilterApi} will not be serialized along with its state. @@ -22,11 +26,19 @@ public abstract class UserDefinedPredicate, S extends Se */ public UserDefinedPredicate() { } + /* + * This method is used to set the object that is used in the keep method for filtering. + * Called before returning the new instance of this class. + */ + public void configure(S udpConfigParam) { + this.udpConfig = udpConfigParam; + } + /** * Return true to keep the record with this value, false to drop it. * o is a filter object that can be used for filtering the value. */ - public abstract boolean keep(T value, S o); + public abstract boolean keep(T value); /** * Given information about a group of records (eg, the min and max value) diff --git a/parquet-column/src/test/java/parquet/filter2/predicate/DummyUdp.java b/parquet-column/src/test/java/parquet/filter2/predicate/DummyUdp.java index e746506598..f75e315f12 100644 --- a/parquet-column/src/test/java/parquet/filter2/predicate/DummyUdp.java +++ b/parquet-column/src/test/java/parquet/filter2/predicate/DummyUdp.java @@ -5,7 +5,7 @@ public class DummyUdp extends UserDefinedPredicate { @Override - public boolean keep(Integer value, Serializable o) { + public boolean keep(Integer value) { return false; } diff --git a/parquet-column/src/test/java/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java b/parquet-column/src/test/java/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java index 48b409c2cb..b77bea45fe 100644 --- a/parquet-column/src/test/java/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java +++ b/parquet-column/src/test/java/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java @@ -53,7 +53,7 @@ public class TestSchemaCompatibilityValidator { static class LongDummyUdp extends UserDefinedPredicate { @Override - public boolean keep(Long value, Serializable o) { + public boolean keep(Long value) { return false; } diff --git a/parquet-generator/src/main/java/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java b/parquet-generator/src/main/java/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java index 2bc96ac0c7..1d96139b2b 100644 --- a/parquet-generator/src/main/java/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java +++ b/parquet-generator/src/main/java/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java @@ -222,8 +222,6 @@ private void addUdpBegin() throws IOException { " ValueInspector valueInspector = null;\n" + "\n" + " final U udp = pred.getUserDefinedPredicate();\n" + - "\n" + - " final S o = pred.getFilterObject();\n" + "\n"); } @@ -232,13 +230,13 @@ private void addUdpCase(TypeInfo info, boolean invert)throws IOException { " valueInspector = new ValueInspector() {\n" + " @Override\n" + " public void updateNull() {\n" + - " setResult(" + (invert ? "!" : "") + "udp.keep(null, o));\n" + + " setResult(" + (invert ? "!" : "") + "udp.keep(null));\n" + " }\n" + "\n" + " @SuppressWarnings(\"unchecked\")\n" + " @Override\n" + " public void update(" + info.primitiveName + " value) {\n" + - " setResult(" + (invert ? "!" : "") + "udp.keep((T) (Object) value, o));\n" + + " setResult(" + (invert ? "!" : "") + "udp.keep((T) (Object) value));\n" + " }\n" + " };\n" + " }\n\n"); diff --git a/parquet-hadoop/src/main/java/parquet/filter2/statisticslevel/StatisticsFilter.java b/parquet-hadoop/src/main/java/parquet/filter2/statisticslevel/StatisticsFilter.java index 4daed5a2e0..13be123d49 100644 --- a/parquet-hadoop/src/main/java/parquet/filter2/statisticslevel/StatisticsFilter.java +++ b/parquet-hadoop/src/main/java/parquet/filter2/statisticslevel/StatisticsFilter.java @@ -1,5 +1,6 @@ package parquet.filter2.statisticslevel; +import java.io.Serializable; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -216,7 +217,7 @@ public Boolean visit(Not not) { "This predicate contains a not! Did you forget to run this predicate through LogicalInverseRewriter? " + not); } - private , U extends UserDefinedPredicate> Boolean visit(UserDefined ud, boolean inverted) { + private , U extends UserDefinedPredicate, S extends Serializable> Boolean visit(UserDefined ud, boolean inverted) { Column filterColumn = ud.getColumn(); ColumnChunkMetaData columnChunk = getColumnChunk(filterColumn.getColumnPath()); U udp = ud.getUserDefinedPredicate(); @@ -232,12 +233,12 @@ private , U extends UserDefinedPredicate> Boolean vis } @Override - public , U extends UserDefinedPredicate> Boolean visit(UserDefined ud) { + public , U extends UserDefinedPredicate, S extends Serializable> Boolean visit(UserDefined ud) { return visit(ud, false); } @Override - public , U extends UserDefinedPredicate> Boolean visit(LogicalNotUserDefined lnud) { + public , U extends UserDefinedPredicate, S extends Serializable> Boolean visit(LogicalNotUserDefined lnud) { return visit(lnud.getUserDefined(), true); } diff --git a/parquet-hadoop/src/test/java/parquet/filter2/recordlevel/TestRecordLevelFilters.java b/parquet-hadoop/src/test/java/parquet/filter2/recordlevel/TestRecordLevelFilters.java index 8106b51f1d..64bd79a4fd 100644 --- a/parquet-hadoop/src/test/java/parquet/filter2/recordlevel/TestRecordLevelFilters.java +++ b/parquet-hadoop/src/test/java/parquet/filter2/recordlevel/TestRecordLevelFilters.java @@ -148,7 +148,7 @@ public boolean keep(User u) { public static class StartWithP extends UserDefinedPredicate { @Override - public boolean keep(Binary value, Serializable o) { + public boolean keep(Binary value) { if (value == null) { return false; } @@ -169,12 +169,12 @@ public boolean inverseCanDrop(Statistics statistics) { public static class SetInFilter extends UserDefinedPredicate> { @Override - public boolean keep(Long value, HashSet o) { + public boolean keep(Long value) { if (value == null) { return false; } - return o.contains(value); + return udpConfig.contains(value); } @Override diff --git a/parquet-hadoop/src/test/java/parquet/filter2/statisticslevel/TestStatisticsFilter.java b/parquet-hadoop/src/test/java/parquet/filter2/statisticslevel/TestStatisticsFilter.java index fc60df0044..b427c631e3 100644 --- a/parquet-hadoop/src/test/java/parquet/filter2/statisticslevel/TestStatisticsFilter.java +++ b/parquet-hadoop/src/test/java/parquet/filter2/statisticslevel/TestStatisticsFilter.java @@ -223,7 +223,7 @@ public void testOr() { public static class SevensAndEightsUdp extends UserDefinedPredicate { @Override - public boolean keep(Integer value, Serializable o) { + public boolean keep(Integer value) { throw new RuntimeException("this method should not be called"); } diff --git a/parquet-scala/src/test/scala/parquet/filter2/dsl/DslTest.scala b/parquet-scala/src/test/scala/parquet/filter2/dsl/DslTest.scala index 2791defe3a..5fe2010a67 100644 --- a/parquet-scala/src/test/scala/parquet/filter2/dsl/DslTest.scala +++ b/parquet-scala/src/test/scala/parquet/filter2/dsl/DslTest.scala @@ -10,7 +10,7 @@ import parquet.filter2.predicate.Operators.{Or, UserDefined, DoubleColumn => JDo import parquet.filter2.predicate.{FilterApi, Statistics, UserDefinedPredicate} class DummyFilter extends UserDefinedPredicate[JInt, java.io.Serializable] { - override def keep(value: JInt, o: java.io.Serializable): Boolean = false + override def keep(value: JInt): Boolean = false override def canDrop(statistics: Statistics[JInt]): Boolean = false