Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions java/core/src/java/org/apache/orc/OrcUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ private static void appendOrcTypes(List<OrcProto.Type> result, TypeDescription t
case TIMESTAMP:
type.setKind(OrcProto.Type.Kind.TIMESTAMP);
break;
case TIMESTAMP_INSTANT:
type.setKind(OrcProto.Type.Kind.TIMESTAMP_INSTANT);
break;
case DATE:
type.setKind(OrcProto.Type.Kind.DATE);
break;
Expand Down Expand Up @@ -305,6 +308,9 @@ TypeDescription convertTypeFromProtobuf(List<OrcProto.Type> types,
case TIMESTAMP:
result = TypeDescription.createTimestamp();
break;
case TIMESTAMP_INSTANT:
result = TypeDescription.createTimestampInstant();
break;
case DATE:
result = TypeDescription.createDate();
break;
Expand Down
32 changes: 26 additions & 6 deletions java/core/src/java/org/apache/orc/TypeDescription.java
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ public enum Category {
LIST("array", false),
MAP("map", false),
STRUCT("struct", false),
UNION("uniontype", false);
UNION("uniontype", false),
TIMESTAMP_INSTANT("timestamp with local time zone", false);

Category(String name, boolean isPrimitive) {
this.name = name;
Expand Down Expand Up @@ -179,6 +180,10 @@ public static TypeDescription createTimestamp() {
return new TypeDescription(Category.TIMESTAMP);
}

public static TypeDescription createTimestampInstant() {
return new TypeDescription(Category.TIMESTAMP_INSTANT);
}

public static TypeDescription createBinary() {
return new TypeDescription(Category.BINARY);
}
Expand Down Expand Up @@ -211,18 +216,31 @@ public String toString() {
}

static Category parseCategory(StringPosition source) {
int start = source.position;
StringBuilder word = new StringBuilder();
boolean hadSpace = true;
while (source.position < source.length) {
char ch = source.value.charAt(source.position);
if (!Character.isLetter(ch)) {
if (Character.isLetter(ch)) {
word.append(Character.toLowerCase(ch));
hadSpace = false;
} else if (ch == ' ') {
if (!hadSpace) {
hadSpace = true;
word.append(ch);
}
} else {
break;
}
source.position += 1;
}
if (source.position != start) {
String word = source.value.substring(start, source.position).toLowerCase();
String catString = word.toString();
// if there were trailing spaces, remove them.
if (hadSpace) {
catString = catString.trim();
}
if (!catString.isEmpty()) {
for (Category cat : Category.values()) {
if (cat.getName().equals(word)) {
if (cat.getName().equals(catString)) {
return cat;
}
}
Expand Down Expand Up @@ -349,6 +367,7 @@ static TypeDescription parseType(StringPosition source) {
case SHORT:
case STRING:
case TIMESTAMP:
case TIMESTAMP_INSTANT:
break;
case CHAR:
case VARCHAR:
Expand Down Expand Up @@ -650,6 +669,7 @@ private ColumnVector createColumn(RowBatchVersion version, int maxSize) {
case DATE:
return new LongColumnVector(maxSize);
case TIMESTAMP:
case TIMESTAMP_INSTANT:
return new TimestampColumnVector(maxSize);
case FLOAT:
case DOUBLE:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ static void printType(PrintStream output,
case SHORT:
case STRING:
case TIMESTAMP:
case TIMESTAMP_INSTANT:
break;

case DECIMAL:
Expand Down
39 changes: 34 additions & 5 deletions java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -1427,12 +1427,12 @@ public String toString() {
StringBuilder buf = new StringBuilder(super.toString());
if (getNumberOfValues() != 0) {
buf.append(" min: ");
buf.append(minimum);
buf.append(getMinimum());
buf.append(" max: ");
buf.append(maximum);
buf.append(getMaximum());
if (hasSum) {
buf.append(" sum: ");
buf.append(sum);
buf.append(getSum());
}
}
return buf.toString();
Expand Down Expand Up @@ -1632,7 +1632,7 @@ public int hashCode() {
}
}

private static final class TimestampStatisticsImpl extends ColumnStatisticsImpl
private static class TimestampStatisticsImpl extends ColumnStatisticsImpl
implements TimestampColumnStatistics {
private Long minimum = null;
private Long maximum = null;
Expand Down Expand Up @@ -1791,6 +1791,30 @@ public int hashCode() {
}
}

private static final class TimestampInstantStatisticsImpl extends TimestampStatisticsImpl {
TimestampInstantStatisticsImpl() {
}

TimestampInstantStatisticsImpl(OrcProto.ColumnStatistics stats) {
super(stats);
}

@Override
public void updateTimestamp(Timestamp value) {
updateTimestamp(value.getTime());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we always return the timestamp stats in local timezone? If so PPD on timestamp will still be broken right?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is correct. We will always compare the timestamp instant for the new type.

}

@Override
public Timestamp getMinimum() {
return getMinimumUTC();
}

@Override
public Timestamp getMaximum() {
return getMaximumUTC();
}
}

protected long count = 0;
private boolean hasNull = false;
private long bytesOnDisk = 0;
Expand Down Expand Up @@ -1973,6 +1997,8 @@ public static ColumnStatisticsImpl create(TypeDescription schema) {
return new DateStatisticsImpl();
case TIMESTAMP:
return new TimestampStatisticsImpl();
case TIMESTAMP_INSTANT:
return new TimestampInstantStatisticsImpl();
case BINARY:
return new BinaryStatisticsImpl();
default:
Expand Down Expand Up @@ -2002,7 +2028,10 @@ public static ColumnStatisticsImpl deserialize(TypeDescription schema,
} else if (stats.hasDateStatistics()) {
return new DateStatisticsImpl(stats);
} else if (stats.hasTimestampStatistics()) {
return new TimestampStatisticsImpl(stats);
return schema == null ||
schema.getCategory() == TypeDescription.Category.TIMESTAMP ?
new TimestampStatisticsImpl(stats) :
new TimestampInstantStatisticsImpl(stats);
} else if(stats.hasBinaryStatistics()) {
return new BinaryStatisticsImpl(stats);
} else {
Expand Down
Loading