diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java index e983f04c68..ae9b276005 100644 --- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java +++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java @@ -677,7 +677,7 @@ public String getMinimum() { @Override public String getMaximum() { - /* if we have upper bound is set (in case of truncation) + /* if we have upper bound set (in case of truncation) getMaximum will be null */ if(isUpperBoundSet) { return null; diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java index 3c4342a423..df78eac34f 100644 --- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java +++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java @@ -17,22 +17,18 @@ */ package org.apache.orc.impl; -import org.apache.orc.CompressionKind; - -import java.io.IOException; -import java.math.BigDecimal; -import java.sql.Date; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.TimeZone; - -import org.apache.orc.OrcFile; -import org.apache.orc.util.BloomFilter; -import org.apache.orc.util.BloomFilterIO; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.io.DiskRangeList; +import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; +import org.apache.hadoop.hive.ql.util.TimestampUtils; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.io.Text; import org.apache.orc.BooleanColumnStatistics; import org.apache.orc.ColumnStatistics; import org.apache.orc.CompressionCodec; @@ -42,6 +38,7 @@ import org.apache.orc.DoubleColumnStatistics; import org.apache.orc.IntegerColumnStatistics; import org.apache.orc.OrcConf; +import org.apache.orc.OrcFile; import org.apache.orc.OrcProto; import org.apache.orc.Reader; import org.apache.orc.RecordReader; @@ -49,21 +46,21 @@ import org.apache.orc.StripeInformation; import org.apache.orc.TimestampColumnStatistics; import org.apache.orc.TypeDescription; +import org.apache.orc.util.BloomFilter; +import org.apache.orc.util.BloomFilterIO; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.io.DiskRange; -import org.apache.hadoop.hive.common.io.DiskRangeList; -import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; -import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; -import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.ql.util.TimestampUtils; -import org.apache.hadoop.io.Text; + +import java.io.IOException; +import java.math.BigDecimal; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TimeZone; public class RecordReaderImpl implements RecordReader { static final Logger LOG = LoggerFactory.getLogger(RecordReaderImpl.class); @@ -318,16 +315,30 @@ enum Location { * @param the type of the comparision * @return the location of the point */ - static Location compareToRange(Comparable point, T min, T max) { - int minCompare = point.compareTo(min); + static Location compareToRange(Comparable point, T min, T max, T lowerBound, T upperBound) { + + final boolean isLowerBoundSet = (min == null && lowerBound != null) ? true : false; + final boolean isUpperBoundSet = (max == null && upperBound != null) ? true : false; + + final int minCompare = isLowerBoundSet ? point.compareTo(lowerBound) : point.compareTo(min); if (minCompare < 0) { return Location.BEFORE; + } + + /* since min value is truncated when we have compare=0, it means the predicate string is BEFORE the min value*/ + else if (minCompare == 0 && isLowerBoundSet) { + return Location.BEFORE; } else if (minCompare == 0) { return Location.MIN; } - int maxCompare = point.compareTo(max); + + int maxCompare = isUpperBoundSet ? point.compareTo(upperBound) : point.compareTo(max); if (maxCompare > 0) { return Location.AFTER; + } + /* if upperbound is set then location here will be AFTER */ + else if (maxCompare == 0 && isUpperBoundSet) { + return Location.AFTER; } else if (maxCompare == 0) { return Location.MAX; } @@ -359,7 +370,7 @@ static Object getMax(ColumnStatistics index, boolean useUTCTimestamp) { } else if (index instanceof DoubleColumnStatistics) { return ((DoubleColumnStatistics) index).getMaximum(); } else if (index instanceof StringColumnStatistics) { - return ((StringColumnStatistics) index).getMaximum(); + return ((StringColumnStatistics) index).getUpperBound(); } else if (index instanceof DateColumnStatistics) { return ((DateColumnStatistics) index).getMaximum(); } else if (index instanceof DecimalColumnStatistics) { @@ -406,7 +417,7 @@ static Object getMin(ColumnStatistics index, boolean useUTCTimestamp) { } else if (index instanceof DoubleColumnStatistics) { return ((DoubleColumnStatistics) index).getMinimum(); } else if (index instanceof StringColumnStatistics) { - return ((StringColumnStatistics) index).getMinimum(); + return ((StringColumnStatistics) index).getLowerBound(); } else if (index instanceof DateColumnStatistics) { return ((DateColumnStatistics) index).getMinimum(); } else if (index instanceof DecimalColumnStatistics) { @@ -464,6 +475,7 @@ static TruthValue evaluatePredicateProto(OrcProto.ColumnStatistics statsProto, * @return the set of truth values that may be returned for the given * predicate. */ + static TruthValue evaluatePredicateProto(OrcProto.ColumnStatistics statsProto, PredicateLeaf predicate, OrcProto.Stream.Kind kind, @@ -490,9 +502,21 @@ static TruthValue evaluatePredicateProto(OrcProto.ColumnStatistics statsProto, return TruthValue.YES_NO_NULL; } } + + String lowerBound = null; + String upperBound = null; + + if(cs instanceof StringColumnStatistics) { + lowerBound = ((StringColumnStatistics) cs).getLowerBound(); + minValue = ((StringColumnStatistics) cs).getMinimum(); + + upperBound = ((StringColumnStatistics) cs).getUpperBound(); + maxValue = ((StringColumnStatistics) cs).getMaximum(); + } + return evaluatePredicateRange(predicate, minValue, maxValue, cs.hasNull(), BloomFilterIO.deserialize(kind, encoding, writerVersion, type, bloomFilter), - useUTCTimestamp); + useUTCTimestamp, lowerBound, upperBound); } /** @@ -527,13 +551,26 @@ public static TruthValue evaluatePredicate(ColumnStatistics stats, boolean useUTCTimestamp) { Object minValue = getMin(stats, useUTCTimestamp); Object maxValue = getMax(stats, useUTCTimestamp); - return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull(), bloomFilter, useUTCTimestamp); + + String lowerBound = null; + String upperBound = null; + + if(stats instanceof StringColumnStatistics) { + lowerBound = ((StringColumnStatistics) stats).getLowerBound(); + minValue = ((StringColumnStatistics) stats).getMinimum(); + + upperBound = ((StringColumnStatistics) stats).getUpperBound(); + maxValue = ((StringColumnStatistics) stats).getMaximum(); + } + + return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull(), bloomFilter, useUTCTimestamp, lowerBound, upperBound); } static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, - Object max, boolean hasNull, BloomFilter bloomFilter, boolean useUTCTimestamp) { + Object max, boolean hasNull, BloomFilter bloomFilter, + boolean useUTCTimestamp, Object lowerBound, Object upperBound) { // if we didn't have any values, everything must have been null - if (min == null) { + if (min == null && lowerBound == null) { if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) { return TruthValue.YES; } else { @@ -543,6 +580,10 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, return TruthValue.YES_NO_NULL; } + if(max == UNKNOWN_VALUE) { + return TruthValue.YES_NO; + } + TruthValue result; Object baseObj = predicate.getLiteral(); // Predicate object and stats objects are converted to the type of the predicate object. @@ -550,7 +591,7 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, Object maxValue = getBaseObjectForComparison(predicate.getType(), max); Object predObj = getBaseObjectForComparison(predicate.getType(), baseObj); - result = evaluatePredicateMinMax(predicate, predObj, minValue, maxValue, hasNull); + result = evaluatePredicateMinMax(predicate, predObj, minValue, maxValue, hasNull, lowerBound, upperBound); if (shouldEvaluateBloomFilter(predicate, result, bloomFilter)) { return evaluatePredicateBloomFilter(predicate, predObj, bloomFilter, hasNull, useUTCTimestamp); } else { @@ -577,20 +618,22 @@ private static boolean shouldEvaluateBloomFilter(PredicateLeaf predicate, private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Object predObj, Object minValue, Object maxValue, - boolean hasNull) { + boolean hasNull, + Object lowerBound, + Object upperBound) { Location loc; switch (predicate.getOperator()) { case NULL_SAFE_EQUALS: - loc = compareToRange((Comparable) predObj, minValue, maxValue); + loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound); if (loc == Location.BEFORE || loc == Location.AFTER) { return TruthValue.NO; } else { return TruthValue.YES_NO; } case EQUALS: - loc = compareToRange((Comparable) predObj, minValue, maxValue); - if (minValue.equals(maxValue) && loc == Location.MIN) { + loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound); + if (minValue != null && minValue.equals(maxValue) && loc == Location.MIN) { return hasNull ? TruthValue.YES_NULL : TruthValue.YES; } else if (loc == Location.BEFORE || loc == Location.AFTER) { return hasNull ? TruthValue.NO_NULL : TruthValue.NO; @@ -598,7 +641,7 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO; } case LESS_THAN: - loc = compareToRange((Comparable) predObj, minValue, maxValue); + loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound); if (loc == Location.AFTER) { return hasNull ? TruthValue.YES_NULL : TruthValue.YES; } else if (loc == Location.BEFORE || loc == Location.MIN) { @@ -607,7 +650,7 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO; } case LESS_THAN_EQUALS: - loc = compareToRange((Comparable) predObj, minValue, maxValue); + loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound); if (loc == Location.AFTER || loc == Location.MAX) { return hasNull ? TruthValue.YES_NULL : TruthValue.YES; } else if (loc == Location.BEFORE) { @@ -616,12 +659,17 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO; } case IN: - if (minValue.equals(maxValue)) { + boolean minEqualsMax = predicate.getType() + .equals(PredicateLeaf.Type.STRING) ? + lowerBound.equals(upperBound) : + minValue.equals(maxValue); + + if (minEqualsMax) { // for a single value, look through to see if that value is in the // set for (Object arg : predicate.getLiteralList()) { predObj = getBaseObjectForComparison(predicate.getType(), arg); - loc = compareToRange((Comparable) predObj, minValue, maxValue); + loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound); if (loc == Location.MIN) { return hasNull ? TruthValue.YES_NULL : TruthValue.YES; } @@ -631,7 +679,7 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec // are all of the values outside of the range? for (Object arg : predicate.getLiteralList()) { predObj = getBaseObjectForComparison(predicate.getType(), arg); - loc = compareToRange((Comparable) predObj, minValue, maxValue); + loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound); if (loc == Location.MIN || loc == Location.MIDDLE || loc == Location.MAX) { return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO; @@ -646,10 +694,10 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec } Object predObj1 = getBaseObjectForComparison(predicate.getType(), args.get(0)); - loc = compareToRange((Comparable) predObj1, minValue, maxValue); + loc = compareToRange((Comparable) predObj1, minValue, maxValue, lowerBound, upperBound); if (loc == Location.BEFORE || loc == Location.MIN) { Object predObj2 = getBaseObjectForComparison(predicate.getType(), args.get(1)); - Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue); + Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue, lowerBound, upperBound); if (loc2 == Location.AFTER || loc2 == Location.MAX) { return hasNull ? TruthValue.YES_NULL : TruthValue.YES; } else if (loc2 == Location.BEFORE) { diff --git a/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java b/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java new file mode 100644 index 0000000000..d018efa3e4 --- /dev/null +++ b/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java @@ -0,0 +1,331 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.orc.impl; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.orc.TypeDescription; +import org.apache.orc.util.BloomFilter; +import org.junit.Test; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; + +import static junit.framework.Assert.assertEquals; +import static org.apache.orc.impl.TestRecordReaderImpl.createPredicateLeaf; + +public class TestPredicatePushDownBounds { + + /** + * This test case handles the Equals corner case where the predicate is equal + * to truncated upper and lower bounds. + * + * @throws Exception + */ + @Test + public void testCornerCases() { + + int stringLength = 1100; + byte[] utf8F; + byte[] utf8P; + + final TypeDescription schema = TypeDescription.createString(); + final ColumnStatisticsImpl stat = ColumnStatisticsImpl.create(schema); + + BloomFilter bf = new BloomFilter(100); + // FFF... to PPP... + for (int i = 70; i <= 80; i++) { + final String inputString = StringUtils + .repeat(Character.toString((char) i), stringLength); + bf.addString(inputString); + } + + final String longStringF = StringUtils + .repeat(Character.toString('F'), stringLength); + final String longStringP = StringUtils + .repeat(Character.toString('P'), stringLength); + + /* String that matches the upperbound value after truncation */ + final String upperboundString = + StringUtils.repeat(Character.toString('P'), 1023) + "Q"; + /* String that matches the lower value after truncation */ + final String lowerboundString = StringUtils + .repeat(Character.toString('F'), 1024); + + final String shortStringF = StringUtils.repeat(Character.toString('F'), 50); + final String shortStringP = + StringUtils.repeat(Character.toString('P'), 50) + "Q"; + + /* Test for a case EQUALS where only upperbound is set */ + final PredicateLeaf predicateUpperBoundEquals = TestRecordReaderImpl + .createPredicateLeaf(PredicateLeaf.Operator.EQUALS, + PredicateLeaf.Type.STRING, "x", upperboundString, null); + + /* Test for a case LESS_THAN where only upperbound is set */ + final PredicateLeaf predicateUpperBoundLessThan = TestRecordReaderImpl + .createPredicateLeaf(PredicateLeaf.Operator.LESS_THAN, + PredicateLeaf.Type.STRING, "x", upperboundString, null); + + /* Test for a case LESS_THAN_EQUALS where only upperbound is set */ + final PredicateLeaf predicateUpperBoundLessThanEquals = TestRecordReaderImpl + .createPredicateLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, + PredicateLeaf.Type.STRING, "x", upperboundString, null); + + utf8F = shortStringF.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8F, 0, utf8F.length, 1); + + utf8P = longStringP.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8P, 0, utf8P.length, 1); + + assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl + .evaluatePredicate(stat, predicateUpperBoundEquals, null)); + + assertEquals(SearchArgument.TruthValue.YES, RecordReaderImpl + .evaluatePredicate(stat, predicateUpperBoundLessThan, null)); + + assertEquals(SearchArgument.TruthValue.YES, RecordReaderImpl + .evaluatePredicate(stat, predicateUpperBoundLessThanEquals, null)); + + stat.reset(); + + utf8F = longStringF.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8F, 0, utf8F.length, 1); + + utf8P = shortStringP.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8P, 0, utf8P.length, 1); + + /* Test for a case Equals where only lowerbound is set */ + final PredicateLeaf predicateLowerBoundEquals = createPredicateLeaf( + PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x", + lowerboundString, null); + + /* Test for a case LESS_THAN where only lowerbound is set */ + final PredicateLeaf predicateLowerBoundLessThan = createPredicateLeaf( + PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.STRING, "x", + lowerboundString, null); + + /* Test for a case LESS_THAN_EQUALS where only lowerbound is set */ + final PredicateLeaf predicateLowerBoundLessThanEquals = createPredicateLeaf( + PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.STRING, "x", + lowerboundString, null); + + assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl + .evaluatePredicate(stat, predicateLowerBoundEquals, null)); + + assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl + .evaluatePredicate(stat, predicateLowerBoundLessThan, bf)); + + assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl + .evaluatePredicate(stat, predicateLowerBoundLessThanEquals, null)); + + } + + /** + * A case where the search values fall within the upperbound and lower bound + * range. + * + * @throws Exception + */ + @Test + public void testNormalCase() throws Exception { + + int stringLength = 1100; + /* length of string in BF */ + int bfStringLength = 50; + //int stringLength = 11; + byte[] utf8F; + byte[] utf8P; + + final TypeDescription schema = TypeDescription.createString(); + final ColumnStatisticsImpl stat = ColumnStatisticsImpl.create(schema); + + BloomFilter bf = new BloomFilter(100); + // FFF... to PPP... + for (int i = 70; i <= 80; i++) { + final String inputString = StringUtils + .repeat(Character.toString((char) i), bfStringLength); + bf.addString(inputString); + } + + final String longStringF = StringUtils + .repeat(Character.toString('F'), stringLength); + final String longStringP = StringUtils + .repeat(Character.toString('P'), stringLength); + final String predicateString = StringUtils + .repeat(Character.toString('I'), 50); + + + /* Test for a case where only upperbound is set */ + final PredicateLeaf predicateEquals = createPredicateLeaf( + PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x", + predicateString, null); + + /* trigger lower bound */ + utf8F = longStringF.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8F, 0, utf8F.length, 1); + + /* trigger upper bound */ + utf8P = longStringP.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8P, 0, utf8P.length, 1); + + assertEquals(SearchArgument.TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicate(stat, predicateEquals, bf)); + + } + + /** + * Test for IN search arg when upper and lower bounds are set. + * + * @throws Exception + */ + @Test + public void testIN() throws Exception { + int stringLength = 1100; + byte[] utf8F; + byte[] utf8P; + + final TypeDescription schema = TypeDescription.createString(); + final ColumnStatisticsImpl stat = ColumnStatisticsImpl.create(schema); + + final BloomFilter bf = new BloomFilter(100); + // FFF... to PPP... + for (int i = 70; i <= 80; i++) { + final String inputString = StringUtils + .repeat(Character.toString((char) i), stringLength); + bf.addString(inputString); + } + + final String longStringF = StringUtils + .repeat(Character.toString('F'), stringLength); + final String longStringP = StringUtils + .repeat(Character.toString('P'), stringLength); + + /* String that matches the upperbound value after truncation */ + final String upperboundString = + StringUtils.repeat(Character.toString('P'), 1023) + "Q"; + /* String that matches the lower value after truncation */ + final String lowerboundString = StringUtils + .repeat(Character.toString('F'), 1024); + + final String shortStringF = StringUtils.repeat(Character.toString('F'), 50); + final String shortStringP = + StringUtils.repeat(Character.toString('P'), 50) + "Q"; + + final List args = new ArrayList(); + args.add(upperboundString); + + /* set upper bound */ + utf8F = shortStringF.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8F, 0, utf8F.length, 1); + + utf8P = longStringP.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8P, 0, utf8P.length, 1); + + /* Test for a case IN where only upper bound is set and test literal is equal to upperbound */ + final PredicateLeaf predicateUpperBoundSet = TestRecordReaderImpl + .createPredicateLeaf(PredicateLeaf.Operator.IN, + PredicateLeaf.Type.STRING, "x", null, args); + + assertEquals(SearchArgument.TruthValue.NO, + RecordReaderImpl.evaluatePredicate(stat, predicateUpperBoundSet, null)); + + /* Test for lower bound set only */ + args.clear(); + args.add(lowerboundString); + + stat.reset(); + /* set lower bound */ + utf8F = longStringF.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8F, 0, utf8F.length, 1); + + utf8P = shortStringP.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8P, 0, utf8P.length, 1); + + /* Test for a case IN where only lower bound is set and the test literal is lowerbound string */ + final PredicateLeaf predicateLowerBoundSet = TestRecordReaderImpl + .createPredicateLeaf(PredicateLeaf.Operator.IN, + PredicateLeaf.Type.STRING, "x", null, args); + + assertEquals(SearchArgument.TruthValue.NO, + RecordReaderImpl.evaluatePredicate(stat, predicateLowerBoundSet, null)); + + /* Test for a case LESS_THAN_EQUALS where only upperbound is set */ + final PredicateLeaf predicateUpperBoundLessThanEquals = TestRecordReaderImpl + .createPredicateLeaf(PredicateLeaf.Operator.IN, + PredicateLeaf.Type.STRING, "x", null, args); + + + /* Test the case were both upper and lower bounds are set */ + args.clear(); + args.add(lowerboundString); + args.add(upperboundString); + + stat.reset(); + /* set upper and lower bound */ + utf8F = longStringF.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8F, 0, utf8F.length, 1); + + utf8P = longStringP.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8P, 0, utf8P.length, 1); + + final PredicateLeaf predicateUpperLowerBoundSet = TestRecordReaderImpl + .createPredicateLeaf(PredicateLeaf.Operator.IN, + PredicateLeaf.Type.STRING, "x", null, args); + + assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl + .evaluatePredicate(stat, predicateUpperLowerBoundSet, null)); + + /* test the boundary condition */ + args.clear(); + args.add(longStringF); + args.add(longStringP); + + stat.reset(); + /* set upper and lower bound */ + utf8F = longStringF.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8F, 0, utf8F.length, 1); + + utf8P = longStringP.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8P, 0, utf8P.length, 1); + + final PredicateLeaf predicateUpperLowerBoundSetBoundary = TestRecordReaderImpl + .createPredicateLeaf(PredicateLeaf.Operator.IN, + PredicateLeaf.Type.STRING, "x", null, args); + + assertEquals(SearchArgument.TruthValue.YES_NO, RecordReaderImpl + .evaluatePredicate(stat, predicateUpperLowerBoundSetBoundary, null)); + + } + +} diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java index 529a08b12c..37083ee2fb 100644 --- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java +++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java @@ -36,6 +36,7 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.sql.Date; import java.sql.Timestamp; import java.text.DateFormat; @@ -47,6 +48,7 @@ import java.util.List; import java.util.TimeZone; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; @@ -221,61 +223,61 @@ public void testMaxLengthToReader() throws Exception { @Test public void testCompareToRangeInt() throws Exception { assertEquals(Location.BEFORE, - RecordReaderImpl.compareToRange(19L, 20L, 40L)); + RecordReaderImpl.compareToRange(19L, 20L, 40L, null, null)); assertEquals(Location.AFTER, - RecordReaderImpl.compareToRange(41L, 20L, 40L)); + RecordReaderImpl.compareToRange(41L, 20L, 40L, null, null)); assertEquals(Location.MIN, - RecordReaderImpl.compareToRange(20L, 20L, 40L)); + RecordReaderImpl.compareToRange(20L, 20L, 40L, null, null)); assertEquals(Location.MIDDLE, - RecordReaderImpl.compareToRange(21L, 20L, 40L)); + RecordReaderImpl.compareToRange(21L, 20L, 40L, null, null)); assertEquals(Location.MAX, - RecordReaderImpl.compareToRange(40L, 20L, 40L)); + RecordReaderImpl.compareToRange(40L, 20L, 40L, null, null)); assertEquals(Location.BEFORE, - RecordReaderImpl.compareToRange(0L, 1L, 1L)); + RecordReaderImpl.compareToRange(0L, 1L, 1L, null, null)); assertEquals(Location.MIN, - RecordReaderImpl.compareToRange(1L, 1L, 1L)); + RecordReaderImpl.compareToRange(1L, 1L, 1L, null, null)); assertEquals(Location.AFTER, - RecordReaderImpl.compareToRange(2L, 1L, 1L)); + RecordReaderImpl.compareToRange(2L, 1L, 1L, null, null)); } @Test public void testCompareToRangeString() throws Exception { assertEquals(Location.BEFORE, - RecordReaderImpl.compareToRange("a", "b", "c")); + RecordReaderImpl.compareToRange("a", "b", "c", null, null)); assertEquals(Location.AFTER, - RecordReaderImpl.compareToRange("d", "b", "c")); + RecordReaderImpl.compareToRange("d", "b", "c", null, null)); assertEquals(Location.MIN, - RecordReaderImpl.compareToRange("b", "b", "c")); + RecordReaderImpl.compareToRange("b", "b", "c", null, null)); assertEquals(Location.MIDDLE, - RecordReaderImpl.compareToRange("bb", "b", "c")); + RecordReaderImpl.compareToRange("bb", "b", "c", null, null)); assertEquals(Location.MAX, - RecordReaderImpl.compareToRange("c", "b", "c")); + RecordReaderImpl.compareToRange("c", "b", "c", null, null)); assertEquals(Location.BEFORE, - RecordReaderImpl.compareToRange("a", "b", "b")); + RecordReaderImpl.compareToRange("a", "b", "b", null, null)); assertEquals(Location.MIN, - RecordReaderImpl.compareToRange("b", "b", "b")); + RecordReaderImpl.compareToRange("b", "b", "b", null, null)); assertEquals(Location.AFTER, - RecordReaderImpl.compareToRange("c", "b", "b")); + RecordReaderImpl.compareToRange("c", "b", "b", null, null)); } @Test public void testCompareToCharNeedConvert() throws Exception { assertEquals(Location.BEFORE, - RecordReaderImpl.compareToRange("apple", "hello", "world")); + RecordReaderImpl.compareToRange("apple", "hello", "world", null, null)); assertEquals(Location.AFTER, - RecordReaderImpl.compareToRange("zombie", "hello", "world")); + RecordReaderImpl.compareToRange("zombie", "hello", "world", null, null)); assertEquals(Location.MIN, - RecordReaderImpl.compareToRange("hello", "hello", "world")); + RecordReaderImpl.compareToRange("hello", "hello", "world", null, null)); assertEquals(Location.MIDDLE, - RecordReaderImpl.compareToRange("pilot", "hello", "world")); + RecordReaderImpl.compareToRange("pilot", "hello", "world", null, null)); assertEquals(Location.MAX, - RecordReaderImpl.compareToRange("world", "hello", "world")); + RecordReaderImpl.compareToRange("world", "hello", "world", null, null)); assertEquals(Location.BEFORE, - RecordReaderImpl.compareToRange("apple", "hello", "hello")); + RecordReaderImpl.compareToRange("apple", "hello", "hello", null, null)); assertEquals(Location.MIN, - RecordReaderImpl.compareToRange("hello", "hello", "hello")); + RecordReaderImpl.compareToRange("hello", "hello", "hello", null, null)); assertEquals(Location.AFTER, - RecordReaderImpl.compareToRange("zombie", "hello", "hello")); + RecordReaderImpl.compareToRange("zombie", "hello", "hello", null, null)); } @Test @@ -338,6 +340,7 @@ private static OrcProto.ColumnStatistics createDoubleStats(double min, double ma return OrcProto.ColumnStatistics.newBuilder().setDoubleStatistics(dblStats.build()).build(); } + //fixme private static OrcProto.ColumnStatistics createStringStats(String min, String max, boolean hasNull) { OrcProto.StringStatistics.Builder strStats = OrcProto.StringStatistics.newBuilder();