apache · rdblue · Mar 24, 2017 · Mar 24, 2017 · Mar 24, 2017 · Apr 15, 2017
diff --git a/src/main/java/org/apache/parquet/format/LogicalTypes.java b/src/main/java/org/apache/parquet/format/LogicalTypes.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.parquet.format;
+
+/**
+ * Convenience instances of logical type classes.
+ */
+public class LogicalTypes {
+  public static class TimeUnits {
+    public static final TimeUnit MILLIS = TimeUnit.MILLIS(new MilliSeconds());
+    public static final TimeUnit MICROS = TimeUnit.MICROS(new MicroSeconds());
+  }
+
+  public static LogicalType DECIMAL(int scale, int precision) {
+    return LogicalType.DECIMAL(new DecimalType(scale, precision));
+  }
+
+  public static final LogicalType UTF8 = LogicalType.STRING(new StringType());
+  public static final LogicalType MAP  = LogicalType.MAP(new MapType());
+  public static final LogicalType LIST = LogicalType.LIST(new ListType());
+  public static final LogicalType ENUM = LogicalType.ENUM(new EnumType());
+  public static final LogicalType DATE = LogicalType.DATE(new DateType());
+  public static final LogicalType TIME_MILLIS = LogicalType.TIME(new TimeType(true, TimeUnits.MILLIS));
+  public static final LogicalType TIME_MICROS = LogicalType.TIME(new TimeType(true, TimeUnits.MICROS));
+  public static final LogicalType TIMESTAMP_MILLIS = LogicalType.TIMESTAMP(new TimestampType(true, TimeUnits.MILLIS));
+  public static final LogicalType TIMESTAMP_MICROS = LogicalType.TIMESTAMP(new TimestampType(true, TimeUnits.MICROS));
+  public static final LogicalType INT_8 = LogicalType.INTEGER(new IntType((byte) 8, true));
+  public static final LogicalType INT_16 = LogicalType.INTEGER(new IntType((byte) 16, true));
+  public static final LogicalType INT_32 = LogicalType.INTEGER(new IntType((byte) 32, true));
+  public static final LogicalType INT_64 = LogicalType.INTEGER(new IntType((byte) 64, true));
+  public static final LogicalType UINT_8 = LogicalType.INTEGER(new IntType((byte) 8, false));
+  public static final LogicalType UINT_16 = LogicalType.INTEGER(new IntType((byte) 16, false));
+  public static final LogicalType UINT_32 = LogicalType.INTEGER(new IntType((byte) 32, false));
+  public static final LogicalType UINT_64 = LogicalType.INTEGER(new IntType((byte) 64, false));
+  public static final LogicalType UNKNOWN = LogicalType.UNKNOWN(new NullType());
+  public static final LogicalType JSON = LogicalType.JSON(new JsonType());
+  public static final LogicalType BSON = LogicalType.BSON(new BsonType());
+}
diff --git a/src/main/thrift/parquet.thrift b/src/main/thrift/parquet.thrift
@@ -174,13 +174,6 @@ enum ConvertedType {
    * particular timezone or date.
    */
   INTERVAL = 21;
-
-  /**
-   * Annotates a column that is always null
-   * Sometimes when discovering the schema of existing data
-   * values are always null
-   */
-  NULL = 25;
 }
 
 /**
@@ -231,6 +224,113 @@ struct Statistics {
    6: optional binary min_value;
 }
 
+/** Empty structs to use as logical type annotations */
+struct StringType {}  // allowed for BINARY, must be encoded with UTF-8
+struct MapType {}     // see LogicalTypes.md
+struct ListType {}    // see LogicalTypes.md
+struct EnumType {}    // allowed for BINARY, must be encoded with UTF-8
+struct DateType {}    // allowed for INT32
+
+/**
+ * Logical type to annotate a column that is always null.
+ *
+ * Sometimes when discovering the schema of existing data values are always
+ * null and the physical type is assumed.
+ */
+struct NullType {}    // allowed for any physical type, only null values stored
+
+/**
+ * Decimal logical type annotation
+ *
+ * To maintain forward-compatibility in v1, implementations using this logical
+ * type must also set scale and precision on the annotated SchemaElement.
+ *
+ * Allowed for physical types: INT32, INT64, FIXED, and BINARY
+ */
+struct DecimalType {
+  1: required i32 scale
+  2: required i32 precision
+}
+
+/** Time units for logical types */
+struct MilliSeconds {}
+struct MicroSeconds {}
+union TimeUnit {
+  1: MilliSeconds MILLIS
+  2: MicroSeconds MICROS
+}
+
+/**
+ * Timestamp logical type annotation
+ *
+ * Allowed for physical types: INT64
+ */
+struct TimestampType {
+  1: required bool isAdjustedToUTC
+  2: required TimeUnit unit
+}
+
+/**
+ * Time logical type annotation
+ *
+ * Allowed for physical types: INT32 (millis), INT64 (micros)
+ */
+struct TimeType {
+  1: required bool isAdjustedToUTC
+  2: required TimeUnit unit
+}
+
+/**
+ * Integer logical type annotation
+ *
+ * bitWidth must be 8, 16, 32, or 64.
+ *
+ * Allowed for physical types: INT32, INT64
+ */
+struct IntType {
+  1: required byte bitWidth
+  2: required bool isSigned
+}
+
+/**
+ * Embedded JSON logical type annotation
+ *
+ * Allowed for physical types: BINARY
+ */
+struct JsonType {
+}
+
+/**
+ * Embedded BSON logical type annotation
+ *
+ * Allowed for physical types: BINARY
+ */
+struct BsonType {
+}
+
+/**
+ * LogicalType annotations to replace ConvertedType.
+ *
+ * To maintain compatibility, implementations using LogicalType for a
+ * SchemaElement must also set the corresponding ConvertedType from the
+ * following table.
+ */
+union LogicalType {
+  1:  StringType STRING       // use ConvertedType UTF8 if encoding is UTF-8
+  2:  MapType MAP             // use ConvertedType MAP
+  3:  ListType LIST           // use ConvertedType LIST
+  4:  EnumType ENUM           // use ConvertedType ENUM
+  5:  DecimalType DECIMAL     // use ConvertedType DECIMAL
+  6:  DateType DATE           // use ConvertedType DATE
+  7:  TimeType TIME           // use ConvertedType TIME_MICROS or TIME_MILLIS
+  8:  TimestampType TIMESTAMP // use ConvertedType TIMESTAMP_MICROS or TIMESTAMP_MILLIS
+  // 9: reserved for INTERVAL
+  10: IntType INTEGER         // use ConvertedType INT_* or UINT_*
+  11: NullType UNKNOWN        // no compatible ConvertedType
+  12: JsonType JSON           // use ConvertedType JSON
+  13: BsonType BSON           // use ConvertedType BSON
+}
+
 /**
  * Represents a element inside a schema definition.
  *  - if it is a group (inner node) then type is undefined and num_children is defined
@@ -278,6 +378,13 @@ struct SchemaElement {
    */
   9: optional i32 field_id;
 
+  /**
+   * The logical type of this SchemaElement; only valid for primitives.
+   *
+   * LogicalType replaces ConvertedType, but ConvertedType is still required
+   * for some logical types to ensure forward-compatibility in format v1.
+   */
+  10: optional LogicalType logicalType
 }
 
 /**