Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
01e4cdf
Merge remote-tracking branch 'upstream/master'
gatorsmile Nov 13, 2015
6835704
Merge remote-tracking branch 'upstream/master'
gatorsmile Nov 14, 2015
9180687
Merge remote-tracking branch 'upstream/master'
gatorsmile Nov 14, 2015
b38a21e
SPARK-11633
gatorsmile Nov 17, 2015
d2b84af
Merge remote-tracking branch 'upstream/master' into joinMakeCopy
gatorsmile Nov 17, 2015
fda8025
Merge remote-tracking branch 'upstream/master'
gatorspark Nov 17, 2015
ac0dccd
Merge branch 'master' of https://github.com/gatorsmile/spark
gatorspark Nov 17, 2015
6e0018b
Merge remote-tracking branch 'upstream/master'
Nov 20, 2015
0546772
converge
gatorsmile Nov 20, 2015
b37a64f
converge
gatorsmile Nov 20, 2015
661260b
Merge remote-tracking branch 'upstream/master'
gatorsmile Nov 23, 2015
2dfa0fd
Merge remote-tracking branch 'upstream/master'
gatorsmile Nov 25, 2015
d929d9b
Merge remote-tracking branch 'upstream/master'
gatorsmile Nov 25, 2015
4070d2f
Merge remote-tracking branch 'upstream/master'
gatorsmile Dec 7, 2015
38dcfb2
Merge remote-tracking branch 'upstream/master'
gatorsmile Dec 9, 2015
cb3fc83
Merge remote-tracking branch 'upstream/master'
gatorsmile Dec 12, 2015
79be2c3
added a condition for `Not` operator in ParquetFilter.
gatorsmile Dec 12, 2015
2ff70bf
added two more cases for BooleanSimplication
gatorsmile Dec 12, 2015
c9af771
improved the code comments.
gatorsmile Dec 13, 2015
50733c6
push down IN.
gatorsmile Dec 13, 2015
e219ac1
address comments.
gatorsmile Dec 17, 2015
df3d8ab
removed the fix about 'Not' filtering
gatorsmile Dec 18, 2015
8dbacc7
Merge remote-tracking branch 'upstream/master'
gatorsmile Dec 18, 2015
7678ef5
Merge branch 'parquetFilterNot' into pFilterIn
gatorsmile Dec 18, 2015
64cd5e6
Merge branch 'parquetFilterNot' into pFilterIn
gatorsmile Dec 18, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -86,23 +86,27 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {

checkCondition(
('a === 'b || 'b > 3) && ('a === 'b || 'a > 3) && ('a === 'b || 'a < 5),
('a === 'b || 'b > 3 && 'a > 3 && 'a < 5))
'a === 'b || 'b > 3 && 'a > 3 && 'a < 5)
}

test("a && (!a || b)") {
checkCondition(('a && (!('a) || 'b )), ('a && 'b))
checkCondition('a && (!'a || 'b ), 'a && 'b)

checkCondition(('a && ('b || !('a) )), ('a && 'b))
checkCondition('a && ('b || !'a ), 'a && 'b)

checkCondition(((!('a) || 'b ) && 'a), ('b && 'a))
checkCondition((!'a || 'b ) && 'a, 'b && 'a)

checkCondition((('b || !('a) ) && 'a), ('b && 'a))
checkCondition(('b || !'a ) && 'a, 'b && 'a)
}

test("!(a && b) , !(a || b)") {
checkCondition((!('a && 'b)), (!('a) || !('b)))
test("DeMorgan's law") {
checkCondition(!('a && 'b), !'a || !'b)

checkCondition(!('a || 'b), (!('a) && !('b)))
checkCondition(!('a || 'b), !'a && !'b)

checkCondition(!(('a && 'b) || ('c && 'd)), (!'a || !'b) && (!'c || !'d))

checkCondition(!(('a || 'b) && ('c || 'd)), (!'a && !'b) || (!'c && !'d))
}

private val caseInsensitiveAnalyzer =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,9 @@ private[sql] object ParquetFilters {
case sources.GreaterThanOrEqual(name, value) =>
makeGtEq.lift(dataTypeOf(name)).map(_(name, value))

case sources.In(name, valueSet) =>
makeInSet.lift(dataTypeOf(name)).map(_(name, valueSet.toSet))

case sources.And(lhs, rhs) =>
// At here, it is not safe to just convert one side if we do not understand the
// other side. Here is an example used to explain the reason.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -381,4 +381,34 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
}
}
}

test("SPARK-11164: test the parquet filter in") {
import testImplicits._
withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED.key -> "true") {
withSQLConf(SQLConf.PARQUET_UNSAFE_ROW_RECORD_READER_ENABLED.key -> "false") {
withTempPath { dir =>
val path = s"${dir.getCanonicalPath}/table1"
(1 to 5).map(i => (i.toFloat, i%3)).toDF("a", "b").write.parquet(path)

// When a filter is pushed to Parquet, Parquet can apply it to every row.
// So, we can check the number of rows returned from the Parquet
// to make sure our filter pushdown work.
val df = sqlContext.read.parquet(path).where("b in (0,2)")
assert(stripSparkFilter(df).count == 3)

val df1 = sqlContext.read.parquet(path).where("not (b in (1))")
assert(stripSparkFilter(df1).count == 3)

val df2 = sqlContext.read.parquet(path).where("not (b in (1,3) or a <= 2)")
assert(stripSparkFilter(df2).count == 2)

val df3 = sqlContext.read.parquet(path).where("not (b in (1,3) and a <= 2)")
assert(stripSparkFilter(df3).count == 4)

val df4 = sqlContext.read.parquet(path).where("not (a <= 2)")
assert(stripSparkFilter(df4).count == 3)
}
}
}
}
}