Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -738,12 +738,8 @@ public void testIntegerNotIn() {
shouldRead = shouldRead(notIn("all_nulls", 1, 2));
Assert.assertTrue("Should read: notIn on all nulls column", shouldRead);

// ORC-623: ORC seems to incorrectly skip a row group for a notIn(column, {X, ...}) predicate on a column which
// has only 1 non-null value X but also has nulls
if (format != FileFormat.ORC) {
shouldRead = shouldRead(notIn("some_nulls", "aaa", "some"));
Assert.assertTrue("Should read: notIn on some nulls column", shouldRead);
}
shouldRead = shouldRead(notIn("some_nulls", "aaa", "some"));
Assert.assertTrue("Should read: notIn on some nulls column", shouldRead);

shouldRead = shouldRead(notIn("no_nulls", "aaa", ""));
if (format == FileFormat.PARQUET) {
Expand All @@ -755,6 +751,12 @@ public void testIntegerNotIn() {
}
}

@Test
public void testSomeNullsNotEq() {
boolean shouldRead = shouldRead(notEqual("some_nulls", "some"));
Assert.assertTrue("Should read: notEqual on some nulls column", shouldRead);
}

private boolean shouldRead(Expression expression) {
return shouldRead(expression, true);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,11 +168,19 @@ public <T> Action eq(Bound<T> expr, Literal<T> lit) {

@Override
public <T> Action notEq(Bound<T> expr, Literal<T> lit) {
return () -> this.builder.startNot()
.equals(idToColumnName.get(expr.ref().fieldId()),
type(expr.ref().type()),
literal(expr.ref().type(), lit.value()))
.end();
// NOTE: ORC uses SQL semantics for Search Arguments, so an expression like
// `col != 1` will exclude rows where col is NULL along with rows where col = 1
// In contrast, Iceberg's Expressions will keep rows with NULL values
// So the equivalent ORC Search Argument for an Iceberg Expression `col != x`
// is `col IS NULL OR col != x`
return () -> {
this.builder.startOr();
isNull(expr).invoke();
this.builder.startNot();
eq(expr, lit).invoke();
this.builder.end(); // end NOT
this.builder.end(); // end OR
};
}

@Override
Expand All @@ -185,10 +193,19 @@ public <T> Action in(Bound<T> expr, Set<T> literalSet) {

@Override
public <T> Action notIn(Bound<T> expr, Set<T> literalSet) {
return () -> this.builder.startNot()
.in(idToColumnName.get(expr.ref().fieldId()), type(expr.ref().type()),
literalSet.stream().map(lit -> literal(expr.ref().type(), lit)).toArray(Object[]::new))
.end();
// NOTE: ORC uses SQL semantics for Search Arguments, so an expression like
// `col NOT IN {1}` will exclude rows where col is NULL along with rows where col = 1
// In contrast, Iceberg's Expressions will keep rows with NULL values
// So the equivalent ORC Search Argument for an Iceberg Expression `col NOT IN {x}`
// is `col IS NULL OR col NOT IN {x}`
return () -> {
this.builder.startOr();
isNull(expr).invoke();
this.builder.startNot();
in(expr, literalSet).invoke();
this.builder.end(); // end NOT
this.builder.end(); // end OR
};
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ public void testPrimitiveTypes() {
.startNot().lessThanEquals("`float`", Type.FLOAT, 5.0).end()
.startNot().lessThan("`double`", Type.FLOAT, 500.0).end()
.equals("`boolean`", Type.BOOLEAN, true)
.startNot().equals("`string`", Type.STRING, "test").end()
.startOr().isNull("`string`", Type.STRING).startNot().equals("`string`", Type.STRING, "test").end().end()
.in("`decimal`", Type.DECIMAL, new HiveDecimalWritable("-123.45"), new HiveDecimalWritable("123.45"))
.startNot().in("`time`", Type.LONG, 100L, 200L).end()
.startOr().isNull("`time`", Type.LONG).startNot().in("`time`", Type.LONG, 100L, 200L).end().end()
.end()
.build();

Expand Down Expand Up @@ -382,8 +382,9 @@ public void testModifiedComplexSchemaNameMapping() {
.equals("`newMap_r5`.`_key`", Type.STRING, "country")
// Drops listOfStruct.long
.equals("`listOfStruct`.`_elem`.`newLong_r10`", Type.LONG, 100L)
.startNot()
.equals("`listOfPeople`.`_elem`.`name`", Type.STRING, "Bob")
.startOr()
.isNull("`listOfPeople`.`_elem`.`name`", Type.STRING)
.startNot().equals("`listOfPeople`.`_elem`.`name`", Type.STRING, "Bob").end()
.end()
.lessThan("`listOfPeople`.`_elem`.`age_r14`", Type.LONG, 30L)
.end()
Expand Down