From 4ab46ecc34a45d923186af4869bad0102761da69 Mon Sep 17 00:00:00 2001 From: Yash Datta Date: Sat, 18 Oct 2014 12:34:33 +0530 Subject: [PATCH] PARQUET-116: Pass a filter object to user defined predicate in filter2 api --- .../main/java/parquet/filter2/predicate/FilterApi.java | 4 ++-- .../main/java/parquet/filter2/predicate/Operators.java | 8 +++++++- .../parquet/filter2/predicate/UserDefinedPredicate.java | 5 +++-- .../src/test/java/parquet/filter2/predicate/DummyUdp.java | 2 +- .../parquet/filter2/predicate/TestFilterApiMethods.java | 4 ++-- .../filter2/predicate/TestLogicalInverseRewriter.java | 6 +++--- .../parquet/filter2/predicate/TestLogicalInverter.java | 8 ++++---- .../predicate/TestSchemaCompatibilityValidator.java | 8 ++++---- .../IncrementallyUpdatedFilterPredicateGenerator.java | 6 ++++-- .../filter2/recordlevel/TestRecordLevelFilters.java | 4 ++-- .../filter2/statisticslevel/TestStatisticsFilter.java | 6 +++--- .../src/main/scala/parquet/filter2/dsl/Dsl.scala | 2 +- .../src/test/scala/parquet/filter2/dsl/DslTest.scala | 4 ++-- 13 files changed, 38 insertions(+), 29 deletions(-) diff --git a/parquet-column/src/main/java/parquet/filter2/predicate/FilterApi.java b/parquet-column/src/main/java/parquet/filter2/predicate/FilterApi.java index 1dd2bbc1c5..05b963843a 100644 --- a/parquet-column/src/main/java/parquet/filter2/predicate/FilterApi.java +++ b/parquet-column/src/main/java/parquet/filter2/predicate/FilterApi.java @@ -146,8 +146,8 @@ public static , C extends Column & SupportsLtGt> GtEq * Keeps records that pass the provided {@link UserDefinedPredicate} */ public static , U extends UserDefinedPredicate> - UserDefined userDefined(Column column, Class clazz) { - return new UserDefined(column, clazz); + UserDefined userDefined(Column column, Class clazz, Object o) { + return new UserDefined(column, clazz, o); } /** diff --git a/parquet-column/src/main/java/parquet/filter2/predicate/Operators.java b/parquet-column/src/main/java/parquet/filter2/predicate/Operators.java index 5d13f8c4ca..2a580585a9 100644 --- a/parquet-column/src/main/java/parquet/filter2/predicate/Operators.java +++ b/parquet-column/src/main/java/parquet/filter2/predicate/Operators.java @@ -344,14 +344,16 @@ public static final class UserDefined, U extends UserDef private final Column column; private final Class udpClass; private final String toString; + private final Object o; private static final String INSTANTIATION_ERROR_MESSAGE = "Could not instantiate custom filter: %s. User defined predicates must be static classes with a default constructor."; - UserDefined(Column column, Class udpClass) { + UserDefined(Column column, Class udpClass, Object o) { this.column = checkNotNull(column, "column"); this.udpClass = checkNotNull(udpClass, "udpClass"); String name = getClass().getSimpleName().toLowerCase(); this.toString = name + "(" + column.getColumnPath().toDotString() + ", " + udpClass.getName() + ")"; + this.o = o; // defensively try to instantiate the class early to make sure that it's possible getUserDefinedPredicate(); @@ -365,6 +367,10 @@ public Class getUserDefinedPredicateClass() { return udpClass; } + public Object getFilterObject() { + return o; + } + public U getUserDefinedPredicate() { try { return udpClass.newInstance(); diff --git a/parquet-column/src/main/java/parquet/filter2/predicate/UserDefinedPredicate.java b/parquet-column/src/main/java/parquet/filter2/predicate/UserDefinedPredicate.java index 99f6c76df2..48004bfc1c 100644 --- a/parquet-column/src/main/java/parquet/filter2/predicate/UserDefinedPredicate.java +++ b/parquet-column/src/main/java/parquet/filter2/predicate/UserDefinedPredicate.java @@ -22,8 +22,9 @@ public UserDefinedPredicate() { } /** * Return true to keep the record with this value, false to drop it. + * o is a filter object that can be used for filtering the value. */ - public abstract boolean keep(T value); + public abstract boolean keep(T value, Object o); /** * Given information about a group of records (eg, the min and max value) @@ -87,4 +88,4 @@ public UserDefinedPredicate() { } * } */ public abstract boolean inverseCanDrop(Statistics statistics); -} \ No newline at end of file +} diff --git a/parquet-column/src/test/java/parquet/filter2/predicate/DummyUdp.java b/parquet-column/src/test/java/parquet/filter2/predicate/DummyUdp.java index 277fa43326..b1e530deef 100644 --- a/parquet-column/src/test/java/parquet/filter2/predicate/DummyUdp.java +++ b/parquet-column/src/test/java/parquet/filter2/predicate/DummyUdp.java @@ -3,7 +3,7 @@ public class DummyUdp extends UserDefinedPredicate { @Override - public boolean keep(Integer value) { + public boolean keep(Integer value, Object o) { return false; } diff --git a/parquet-column/src/test/java/parquet/filter2/predicate/TestFilterApiMethods.java b/parquet-column/src/test/java/parquet/filter2/predicate/TestFilterApiMethods.java index dafd7fd1f1..7c2a5451d7 100644 --- a/parquet-column/src/test/java/parquet/filter2/predicate/TestFilterApiMethods.java +++ b/parquet-column/src/test/java/parquet/filter2/predicate/TestFilterApiMethods.java @@ -79,7 +79,7 @@ public void testToString() { @Test public void testUdp() { - FilterPredicate predicate = or(eq(doubleColumn, 12.0), userDefined(intColumn, DummyUdp.class)); + FilterPredicate predicate = or(eq(doubleColumn, 12.0), userDefined(intColumn, DummyUdp.class, null)); assertTrue(predicate instanceof Or); FilterPredicate ud = ((Or) predicate).getRight(); assertTrue(ud instanceof UserDefined); @@ -90,7 +90,7 @@ public void testUdp() { @Test public void testSerializable() throws Exception { BinaryColumn binary = binaryColumn("foo"); - FilterPredicate p = or(and(userDefined(intColumn, DummyUdp.class), predicate), eq(binary, Binary.fromString("hi"))); + FilterPredicate p = or(and(userDefined(intColumn, DummyUdp.class, null), predicate), eq(binary, Binary.fromString("hi"))); ByteArrayOutputStream baos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(baos); oos.writeObject(p); diff --git a/parquet-column/src/test/java/parquet/filter2/predicate/TestLogicalInverseRewriter.java b/parquet-column/src/test/java/parquet/filter2/predicate/TestLogicalInverseRewriter.java index 0aa360b5ef..86fbd8414e 100644 --- a/parquet-column/src/test/java/parquet/filter2/predicate/TestLogicalInverseRewriter.java +++ b/parquet-column/src/test/java/parquet/filter2/predicate/TestLogicalInverseRewriter.java @@ -32,7 +32,7 @@ public class TestLogicalInverseRewriter { or(ltEq(doubleColumn, 12.0), and( not(or(eq(intColumn, 7), notEq(intColumn, 17))), - userDefined(intColumn, DummyUdp.class)))), + userDefined(intColumn, DummyUdp.class, null)))), or(gt(doubleColumn, 100.0), not(gtEq(intColumn, 77)))); private static final FilterPredicate complexCollapsed = @@ -40,7 +40,7 @@ public class TestLogicalInverseRewriter { and(gt(doubleColumn, 12.0), or( or(eq(intColumn, 7), notEq(intColumn, 17)), - new LogicalNotUserDefined(userDefined(intColumn, DummyUdp.class)))), + new LogicalNotUserDefined(userDefined(intColumn, DummyUdp.class, null)))), or(gt(doubleColumn, 100.0), lt(intColumn, 77))); private static void assertNoOp(FilterPredicate p) { @@ -49,7 +49,7 @@ private static void assertNoOp(FilterPredicate p) { @Test public void testBaseCases() { - UserDefined ud = userDefined(intColumn, DummyUdp.class); + UserDefined ud = userDefined(intColumn, DummyUdp.class, null); assertNoOp(eq(intColumn, 17)); assertNoOp(notEq(intColumn, 17)); diff --git a/parquet-column/src/test/java/parquet/filter2/predicate/TestLogicalInverter.java b/parquet-column/src/test/java/parquet/filter2/predicate/TestLogicalInverter.java index 19e6b68190..40d73871b0 100644 --- a/parquet-column/src/test/java/parquet/filter2/predicate/TestLogicalInverter.java +++ b/parquet-column/src/test/java/parquet/filter2/predicate/TestLogicalInverter.java @@ -26,14 +26,14 @@ public class TestLogicalInverter { private static final IntColumn intColumn = intColumn("a.b.c"); private static final DoubleColumn doubleColumn = doubleColumn("a.b.c"); - private static final UserDefined ud = userDefined(intColumn, DummyUdp.class); + private static final UserDefined ud = userDefined(intColumn, DummyUdp.class, null); private static final FilterPredicate complex = and( or(ltEq(doubleColumn, 12.0), and( not(or(eq(intColumn, 7), notEq(intColumn, 17))), - userDefined(intColumn, DummyUdp.class))), + userDefined(intColumn, DummyUdp.class, null))), or(gt(doubleColumn, 100.0), notEq(intColumn, 77))); private static final FilterPredicate complexInverse = @@ -41,7 +41,7 @@ public class TestLogicalInverter { and(gt(doubleColumn, 12.0), or( or(eq(intColumn, 7), notEq(intColumn, 17)), - new LogicalNotUserDefined(userDefined(intColumn, DummyUdp.class)))), + new LogicalNotUserDefined(userDefined(intColumn, DummyUdp.class, null)))), and(ltEq(doubleColumn, 100.0), eq(intColumn, 77))); @Test @@ -63,7 +63,7 @@ public void testBaseCases() { assertEquals(eq(intColumn, 17), invert(not(eq(intColumn, 17)))); - UserDefined ud = userDefined(intColumn, DummyUdp.class); + UserDefined ud = userDefined(intColumn, DummyUdp.class, null); assertEquals(new LogicalNotUserDefined(ud), invert(ud)); assertEquals(ud, invert(not(ud))); assertEquals(ud, invert(new LogicalNotUserDefined(ud))); diff --git a/parquet-column/src/test/java/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java b/parquet-column/src/test/java/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java index e9e745fb7c..4abdfb95f0 100644 --- a/parquet-column/src/test/java/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java +++ b/parquet-column/src/test/java/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java @@ -46,12 +46,12 @@ public class TestSchemaCompatibilityValidator { or(ltEq(stringC, Binary.fromString("foo")), and( not(or(eq(intBar, 17), notEq(intBar, 17))), - userDefined(intBar, DummyUdp.class))), + userDefined(intBar, DummyUdp.class, null))), or(gt(stringC, Binary.fromString("bar")), notEq(stringC, Binary.fromString("baz")))); static class LongDummyUdp extends UserDefinedPredicate { @Override - public boolean keep(Long value) { + public boolean keep(Long value, Object o) { return false; } @@ -71,7 +71,7 @@ public boolean inverseCanDrop(Statistics statistics) { or(ltEq(stringC, Binary.fromString("foo")), and( not(or(eq(longBar, 17L), notEq(longBar, 17L))), - userDefined(longBar, LongDummyUdp.class))), + userDefined(longBar, LongDummyUdp.class, null))), or(gt(stringC, Binary.fromString("bar")), notEq(stringC, Binary.fromString("baz")))); private static final FilterPredicate complexMixedType = @@ -79,7 +79,7 @@ public boolean inverseCanDrop(Statistics statistics) { or(ltEq(stringC, Binary.fromString("foo")), and( not(or(eq(intBar, 17), notEq(longBar, 17L))), - userDefined(longBar, LongDummyUdp.class))), + userDefined(longBar, LongDummyUdp.class, null))), or(gt(stringC, Binary.fromString("bar")), notEq(stringC, Binary.fromString("baz")))); @Test diff --git a/parquet-generator/src/main/java/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java b/parquet-generator/src/main/java/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java index e0f08e4989..b2ada4ee66 100644 --- a/parquet-generator/src/main/java/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java +++ b/parquet-generator/src/main/java/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java @@ -221,6 +221,8 @@ private void addUdpBegin() throws IOException { " ValueInspector valueInspector = null;\n" + "\n" + " final U udp = pred.getUserDefinedPredicate();\n" + + "\n" + + " final Object o = pred.getFilterObject();\n" + "\n"); } @@ -229,13 +231,13 @@ private void addUdpCase(TypeInfo info, boolean invert)throws IOException { " valueInspector = new ValueInspector() {\n" + " @Override\n" + " public void updateNull() {\n" + - " setResult(" + (invert ? "!" : "") + "udp.keep(null));\n" + + " setResult(" + (invert ? "!" : "") + "udp.keep(null, o));\n" + " }\n" + "\n" + " @SuppressWarnings(\"unchecked\")\n" + " @Override\n" + " public void update(" + info.primitiveName + " value) {\n" + - " setResult(" + (invert ? "!" : "") + "udp.keep((T) (Object) value));\n" + + " setResult(" + (invert ? "!" : "") + "udp.keep((T) (Object) value, o));\n" + " }\n" + " };\n" + " }\n\n"); diff --git a/parquet-hadoop/src/test/java/parquet/filter2/recordlevel/TestRecordLevelFilters.java b/parquet-hadoop/src/test/java/parquet/filter2/recordlevel/TestRecordLevelFilters.java index d771ead163..204639d59c 100644 --- a/parquet-hadoop/src/test/java/parquet/filter2/recordlevel/TestRecordLevelFilters.java +++ b/parquet-hadoop/src/test/java/parquet/filter2/recordlevel/TestRecordLevelFilters.java @@ -143,7 +143,7 @@ public boolean keep(User u) { public static class StartWithP extends UserDefinedPredicate { @Override - public boolean keep(Binary value) { + public boolean keep(Binary value, Object o) { if (value == null) { return false; } @@ -165,7 +165,7 @@ public boolean inverseCanDrop(Statistics statistics) { public void testNameNotStartWithP() throws Exception { BinaryColumn name = binaryColumn("name"); - FilterPredicate pred = not(userDefined(name, StartWithP.class)); + FilterPredicate pred = not(userDefined(name, StartWithP.class, null)); List found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred)); diff --git a/parquet-hadoop/src/test/java/parquet/filter2/statisticslevel/TestStatisticsFilter.java b/parquet-hadoop/src/test/java/parquet/filter2/statisticslevel/TestStatisticsFilter.java index 4e75b207ce..2cc018d090 100644 --- a/parquet-hadoop/src/test/java/parquet/filter2/statisticslevel/TestStatisticsFilter.java +++ b/parquet-hadoop/src/test/java/parquet/filter2/statisticslevel/TestStatisticsFilter.java @@ -222,7 +222,7 @@ public void testOr() { public static class SevensAndEightsUdp extends UserDefinedPredicate { @Override - public boolean keep(Integer value) { + public boolean keep(Integer value, Object o) { throw new RuntimeException("this method should not be called"); } @@ -239,8 +239,8 @@ public boolean inverseCanDrop(Statistics statistics) { @Test public void testUdp() { - FilterPredicate pred = userDefined(intColumn, SevensAndEightsUdp.class); - FilterPredicate invPred = LogicalInverseRewriter.rewrite(not(userDefined(intColumn, SevensAndEightsUdp.class))); + FilterPredicate pred = userDefined(intColumn, SevensAndEightsUdp.class, null); + FilterPredicate invPred = LogicalInverseRewriter.rewrite(not(userDefined(intColumn, SevensAndEightsUdp.class, null))); IntStatistics seven = new IntStatistics(); seven.setMinMax(7, 7); diff --git a/parquet-scala/src/main/scala/parquet/filter2/dsl/Dsl.scala b/parquet-scala/src/main/scala/parquet/filter2/dsl/Dsl.scala index 7e3997758a..78f6e2b371 100644 --- a/parquet-scala/src/main/scala/parquet/filter2/dsl/Dsl.scala +++ b/parquet-scala/src/main/scala/parquet/filter2/dsl/Dsl.scala @@ -29,7 +29,7 @@ object Dsl { private[Dsl] trait Column[T <: Comparable[T], C <: Operators.Column[T]] { val javaColumn: C - def filterBy[U <: UserDefinedPredicate[T]](clazz: Class[U]) = FilterApi.userDefined(javaColumn, clazz) + def filterBy[U <: UserDefinedPredicate[T]](clazz: Class[U]) = FilterApi.userDefined(javaColumn, clazz, null) // this is not supported because it allows for easy mistakes. For example: // val pred = IntColumn("foo") == "hello" diff --git a/parquet-scala/src/test/scala/parquet/filter2/dsl/DslTest.scala b/parquet-scala/src/test/scala/parquet/filter2/dsl/DslTest.scala index 23aa5377f2..a9ad7eb3d1 100644 --- a/parquet-scala/src/test/scala/parquet/filter2/dsl/DslTest.scala +++ b/parquet-scala/src/test/scala/parquet/filter2/dsl/DslTest.scala @@ -9,7 +9,7 @@ import parquet.filter2.predicate.Operators.{Or, UserDefined, DoubleColumn => JDo import parquet.filter2.predicate.{FilterApi, Statistics, UserDefinedPredicate} class DummyFilter extends UserDefinedPredicate[JInt] { - override def keep(value: JInt): Boolean = false + override def keep(value: JInt, o: Object): Boolean = false override def canDrop(statistics: Statistics[JInt]): Boolean = false @@ -39,7 +39,7 @@ class DslTest extends FlatSpec{ val abc = IntColumn("a.b.c") val pred = (abc > 10) || abc.filterBy(classOf[DummyFilter]) - val expected = FilterApi.or(FilterApi.gt[JInt, JIntColumn](abc.javaColumn, 10), FilterApi.userDefined(abc.javaColumn, classOf[DummyFilter])) + val expected = FilterApi.or(FilterApi.gt[JInt, JIntColumn](abc.javaColumn, 10), FilterApi.userDefined(abc.javaColumn, classOf[DummyFilter], null)) assert(pred === expected) val intUserDefined = pred.asInstanceOf[Or].getRight.asInstanceOf[UserDefined[JInt, DummyFilter]]