Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -53,25 +53,25 @@ public List<SingleColumn> generateChecksumColumns(Column column)
Expression checksum;

if (elementType.isOrderable()) {
FunctionCall arraySort = new FunctionCall(QualifiedName.of("array_sort"), ImmutableList.of(column.getIdentifier()));
FunctionCall arraySort = new FunctionCall(QualifiedName.of("array_sort"), ImmutableList.of(column.getExpression()));

if (elementType instanceof ArrayType || elementType instanceof RowType) {
checksum = new CoalesceExpression(
new FunctionCall(QualifiedName.of("checksum"), ImmutableList.of(new TryExpression(arraySort))),
new FunctionCall(QualifiedName.of("checksum"), ImmutableList.of(column.getIdentifier())));
new FunctionCall(QualifiedName.of("checksum"), ImmutableList.of(column.getExpression())));
}
else {
checksum = new FunctionCall(QualifiedName.of("checksum"), ImmutableList.of(arraySort));
}
}
else {
checksum = new FunctionCall(QualifiedName.of("checksum"), ImmutableList.of(column.getIdentifier()));
checksum = new FunctionCall(QualifiedName.of("checksum"), ImmutableList.of(column.getExpression()));
}

Expression arrayCardinalitySum = new CoalesceExpression(
new FunctionCall(
QualifiedName.of("sum"),
ImmutableList.of(new FunctionCall(QualifiedName.of("cardinality"), ImmutableList.of(column.getIdentifier())))),
ImmutableList.of(new FunctionCall(QualifiedName.of("cardinality"), ImmutableList.of(column.getExpression())))),
new LongLiteral("0"));

return ImmutableList.of(
Expand All @@ -80,7 +80,7 @@ public List<SingleColumn> generateChecksumColumns(Column column)
}

@Override
public ColumnMatchResult validate(Column column, ChecksumResult controlResult, ChecksumResult testResult)
public List<ColumnMatchResult> validate(Column column, ChecksumResult controlResult, ChecksumResult testResult)
{
String checksumColumnAlias = getChecksumColumnAlias(column);
Object controlChecksum = controlResult.getChecksum(checksumColumnAlias);
Expand All @@ -90,14 +90,15 @@ public ColumnMatchResult validate(Column column, ChecksumResult controlResult, C
Object controlCardinalitySum = controlResult.getChecksum(cardinalitySumColumnAlias);
Object testCardinalitySum = testResult.getChecksum(cardinalitySumColumnAlias);

return new ColumnMatchResult(
return ImmutableList.of(new ColumnMatchResult(
Objects.equals(controlChecksum, testChecksum) && Objects.equals(controlCardinalitySum, testCardinalitySum),
column,
format(
"control(checksum: %s, cardinality_sum: %s) test(checksum: %s, cardinality_sum: %s)",
controlChecksum,
controlCardinalitySum,
testChecksum,
testCardinalitySum));
testCardinalitySum)));
}

private static String getChecksumColumnAlias(Column column)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,54 +23,43 @@
import com.facebook.presto.verifier.framework.Column;
import com.facebook.presto.verifier.framework.Column.Category;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;

import javax.inject.Inject;
import javax.inject.Provider;

import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import static com.facebook.presto.sql.QueryUtil.simpleQuery;
import static com.facebook.presto.verifier.framework.Column.Category.ARRAY;
import static com.facebook.presto.verifier.framework.Column.Category.FLOATING_POINT;
import static com.facebook.presto.verifier.framework.Column.Category.ROW;
import static com.facebook.presto.verifier.framework.Column.Category.SIMPLE;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static java.util.function.Function.identity;

public class ChecksumValidator
{
private final Map<Category, ColumnValidator> columnValidators;
private final Map<Category, Provider<ColumnValidator>> columnValidators;

@Inject
public ChecksumValidator(
SimpleColumnValidator simpleColumnValidator,
FloatingPointColumnValidator floatingPointColumnValidator,
ArrayColumnValidator arrayColumnValidator,
RowColumnValidator rowColumnValidator)
public ChecksumValidator(Map<Category, Provider<ColumnValidator>> columnValidators)
{
this.columnValidators = ImmutableMap.of(
SIMPLE, simpleColumnValidator,
FLOATING_POINT, floatingPointColumnValidator,
ARRAY, arrayColumnValidator,
ROW, rowColumnValidator);
this.columnValidators = columnValidators;
}

public Query generateChecksumQuery(QualifiedName tableName, List<Column> columns)
{
ImmutableList.Builder<SelectItem> selectItems = ImmutableList.builder();
selectItems.add(new SingleColumn(new FunctionCall(QualifiedName.of("count"), ImmutableList.of())));
for (Column column : columns) {
selectItems.addAll(columnValidators.get(column.getCategory()).generateChecksumColumns(column));
selectItems.addAll(columnValidators.get(column.getCategory()).get().generateChecksumColumns(column));
}
return simpleQuery(new Select(false, selectItems.build()), new Table(tableName));
}

public Map<Column, ColumnMatchResult> getMismatchedColumns(List<Column> columns, ChecksumResult controlChecksum, ChecksumResult testChecksum)
{
return columns.stream()
.collect(toImmutableMap(identity(), column -> columnValidators.get(column.getCategory()).validate(column, controlChecksum, testChecksum)))
.flatMap(column -> columnValidators.get(column.getCategory()).get().validate(column, controlChecksum, testChecksum).stream())
.collect(toImmutableMap(ColumnMatchResult::getColumn, identity()))
.entrySet()
.stream()
.filter(entry -> !entry.getValue().isMatched())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
*/
package com.facebook.presto.verifier.checksum;

import com.facebook.presto.verifier.framework.Column;

import java.util.Objects;

import static com.google.common.base.MoreObjects.toStringHelper;
Expand All @@ -21,11 +23,13 @@
public class ColumnMatchResult
{
private final boolean matched;
private final Column column;
private final String message;

public ColumnMatchResult(boolean matched, String message)
public ColumnMatchResult(boolean matched, Column column, String message)
{
this.matched = matched;
this.column = requireNonNull(column, "column is null");
this.message = requireNonNull(message, "message is null");
}

Expand All @@ -34,6 +38,11 @@ public boolean isMatched()
return matched;
}

public Column getColumn()
{
return column;
}

public String getMessage()
{
return message;
Expand All @@ -50,20 +59,22 @@ public boolean equals(Object obj)
}
ColumnMatchResult o = (ColumnMatchResult) obj;
return Objects.equals(matched, o.matched) &&
Objects.equals(column, o.column) &&
Objects.equals(message, o.message);
}

@Override
public int hashCode()
{
return Objects.hash(matched, message);
return Objects.hash(matched, column, message);
}

@Override
public String toString()
{
return toStringHelper(this)
.add("matched", matched)
.add("column", column)
.add("message", message)
.toString();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@

import java.util.List;

interface ColumnValidator
public interface ColumnValidator
{
List<SingleColumn> generateChecksumColumns(Column column);

ColumnMatchResult validate(Column column, ChecksumResult controlResult, ChecksumResult testResult);
List<ColumnMatchResult> validate(Column column, ChecksumResult controlResult, ChecksumResult testResult);
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public FloatingPointColumnValidator(VerifierConfig config)
@Override
public List<SingleColumn> generateChecksumColumns(Column column)
{
Expression doubleColumn = column.getType().equals(DOUBLE) ? column.getIdentifier() : new Cast(column.getIdentifier(), DOUBLE.getDisplayName());
Expression doubleColumn = column.getType().equals(DOUBLE) ? column.getExpression() : new Cast(column.getExpression(), DOUBLE.getDisplayName());
Expression positiveInfinity = new FunctionCall(QualifiedName.of("infinity"), ImmutableList.of());
Expression negativeInfinity = new ArithmeticUnaryExpression(MINUS, positiveInfinity);

Expand All @@ -66,7 +66,7 @@ public List<SingleColumn> generateChecksumColumns(Column column)
new FunctionCall(
QualifiedName.of("sum"),
Optional.empty(),
Optional.of(new FunctionCall(QualifiedName.of("is_finite"), ImmutableList.of(column.getIdentifier()))),
Optional.of(new FunctionCall(QualifiedName.of("is_finite"), ImmutableList.of(column.getExpression()))),
Optional.empty(),
false,
ImmutableList.of(doubleColumn)),
Expand All @@ -75,33 +75,33 @@ public List<SingleColumn> generateChecksumColumns(Column column)
new FunctionCall(
QualifiedName.of("count"),
Optional.empty(),
Optional.of(new FunctionCall(QualifiedName.of("is_nan"), ImmutableList.of(column.getIdentifier()))),
Optional.of(new FunctionCall(QualifiedName.of("is_nan"), ImmutableList.of(column.getExpression()))),
Optional.empty(),
false,
ImmutableList.of(column.getIdentifier())),
ImmutableList.of(column.getExpression())),
Optional.of(delimitedIdentifier(getNanCountColumnAlias(column)))),
new SingleColumn(
new FunctionCall(
QualifiedName.of("count"),
Optional.empty(),
Optional.of(new ComparisonExpression(EQUAL, column.getIdentifier(), positiveInfinity)),
Optional.of(new ComparisonExpression(EQUAL, column.getExpression(), positiveInfinity)),
Optional.empty(),
false,
ImmutableList.of(column.getIdentifier())),
ImmutableList.of(column.getExpression())),
Optional.of(delimitedIdentifier(getPositiveInfinityCountColumnAlias(column)))),
new SingleColumn(
new FunctionCall(
QualifiedName.of("count"),
Optional.empty(),
Optional.of(new ComparisonExpression(EQUAL, column.getIdentifier(), negativeInfinity)),
Optional.of(new ComparisonExpression(EQUAL, column.getExpression(), negativeInfinity)),
Optional.empty(),
false,
ImmutableList.of(column.getIdentifier())),
ImmutableList.of(column.getExpression())),
Optional.of(delimitedIdentifier(getNegativeInfinityCountColumnAlias(column)))));
}

@Override
public ColumnMatchResult validate(Column column, ChecksumResult controlResult, ChecksumResult testResult)
public List<ColumnMatchResult> validate(Column column, ChecksumResult controlResult, ChecksumResult testResult)
{
checkArgument(
controlResult.getRowCount() == testResult.getRowCount(),
Expand All @@ -123,24 +123,26 @@ public ColumnMatchResult validate(Column column, ChecksumResult controlResult, C
if (!Objects.equals(controlNanCount, testNanCount) ||
!Objects.equals(controlPositiveInfinityCount, testPositiveInfinityCount) ||
!Objects.equals(controlNegativeInfinityCount, testNegativeInfinityCount)) {
return new ColumnMatchResult(
return ImmutableList.of(new ColumnMatchResult(
false,
column,
format(
"control(NaN: %s, +infinity: %s, -infinity: %s) test(NaN: %s, +infinity: %s, -infinity: %s)",
controlNanCount,
controlPositiveInfinityCount,
controlNegativeInfinityCount,
testNanCount,
testPositiveInfinityCount,
testNegativeInfinityCount));
testNegativeInfinityCount)));
}

Object controlSumObject = controlResult.getChecksum(sumColumnAlias);
Object testSumObject = testResult.getChecksum(sumColumnAlias);
if (controlSumObject == null || testSumObject == null) {
return new ColumnMatchResult(
return ImmutableList.of(new ColumnMatchResult(
controlSumObject == null && testSumObject == null,
format("control(sum: %s) test(sum: %s)", controlSumObject, testSumObject));
column,
format("control(sum: %s) test(sum: %s)", controlSumObject, testSumObject)));
}

// Implementation according to http://floating-point-gui.de/errors/comparison/
Expand All @@ -149,27 +151,30 @@ public ColumnMatchResult validate(Column column, ChecksumResult controlResult, C

// Fail if either sum is NaN or Infinity
if (isNaN(controlSum) || isNaN(testSum) || isInfinite(controlSum) || isInfinite(testSum)) {
return new ColumnMatchResult(
return ImmutableList.of(new ColumnMatchResult(
false,
format("control(sum: %s) test(sum: %s)", controlSum, testSum));
column,
format("control(sum: %s) test(sum: %s)", controlSum, testSum)));
}

// Use absolute error margin if either control sum or test sum is 0
if (controlSum == 0 || testSum == 0) {
double controlMean = controlSum / controlResult.getRowCount();
double testMean = testSum / controlResult.getRowCount();
double difference = abs(controlMean - testMean);
return new ColumnMatchResult(
return ImmutableList.of(new ColumnMatchResult(
difference < absoluteErrorMargin,
format("control(mean: %s) test(mean: %s) difference: %s", controlMean, testMean, difference));
column,
format("control(mean: %s) test(mean: %s) difference: %s", controlMean, testMean, difference)));
}

// Use relative error margin for the common cases
double difference = abs(controlSum - testSum);
double relativeError = difference / min((abs(controlSum) + abs(testSum)) / 2, Double.MAX_VALUE);
return new ColumnMatchResult(
return ImmutableList.of(new ColumnMatchResult(
relativeError < relativeErrorMargin,
format("control(sum: %s) test(sum: %s) relative error: %s", controlSum, testSum, relativeError));
column,
format("control(sum: %s) test(sum: %s) relative error: %s", controlSum, testSum, relativeError)));
}

private static String getSumColumnAlias(Column column)
Expand Down
Loading