From 21b4035e855c74191bf18abd277f704335263b66 Mon Sep 17 00:00:00 2001 From: David Phillips Date: Sun, 27 Jan 2019 00:14:01 -0800 Subject: [PATCH] Update to Hive 3.0.0 --- pom.xml | 207 ++++++++++-------- .../hive/common/util/TimestampParser.java | 191 ---------------- .../apache/hive/hcatalog/data/JsonSerDe.java | 53 +++-- 3 files changed, 148 insertions(+), 303 deletions(-) delete mode 100644 src/main/java/org/apache/hive/common/util/TimestampParser.java diff --git a/pom.xml b/pom.xml index 453c021..11b1a02 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ io.prestosql.hive hive-apache - 2.3.4-2-SNAPSHOT + 3.0.0-1-SNAPSHOT hive-apache Shaded version of Apache Hive for Presto @@ -43,14 +43,13 @@ io.prestosql.hive.\$internal - 2.3.4 + 3.0.0 1.7.7 - 1.8.1 3.5.2 + 1.9.0 2.5.0 1.7.25 - 0.2 @@ -76,32 +75,28 @@ org.apache.hive - hive-metastore + hive-standalone-metastore ${dep.hive.version} - org.apache.hbase - hbase-client - - - org.apache.hadoop - hadoop-archives + org.apache.hive + hive-metastore org.apache.hadoop - hadoop-common + hadoop-hdfs - org.apache.hadoop - hadoop-mapreduce-client-core + com.github.joshelser + dropwizard-metrics-hadoop-metrics2-reporter - org.apache.hadoop - hadoop-hdfs + org.apache.logging.log4j + log4j-1.2-api - org.apache.hadoop - hadoop-yarn-server-resourcemanager + org.apache.logging.log4j + log4j-slf4j-impl org.apache.httpcomponents @@ -111,30 +106,6 @@ org.apache.httpcomponents httpclient - - org.apache.logging.log4j - log4j-slf4j-impl - - - org.apache.curator - curator-framework - - - org.apache.zookeeper - zookeeper - - - co.cask.tephra - tephra-api - - - co.cask.tephra - tephra-core - - - co.cask.tephra - tephra-hbase-compat-1.0 - javolution javolution @@ -147,26 +118,14 @@ com.zaxxer HikariCP - - commons-cli - commons-cli - commons-dbcp commons-dbcp - - commons-pool - commons-pool - org.apache.derby derby - - javax.jdo - jdo-api - org.datanucleus javax.jdo @@ -187,6 +146,10 @@ org.antlr antlr-runtime + + sqlline + sqlline + @@ -199,10 +162,42 @@ org.apache.hive hive-service-rpc + + org.apache.hive + hive-upgrade-acid + + + org.apache.hadoop + hadoop-yarn-server-resourcemanager + + + org.apache.arrow + arrow-vector + org.apache.parquet parquet-hadoop-bundle + + com.carrotsearch + hppc + + + com.vlkan + flatbuffers + + + org.apache.curator + curator-framework + + + org.apache.zookeeper + zookeeper + + + org.apache.logging.log4j + log4j-slf4j-impl + com.google.code.findbugs jsr305 @@ -224,10 +219,18 @@ org.apache.hive hive-llap-tez + + org.apache.hive + hive-upgrade-acid + org.apache.hive hive-vector-code-gen + + org.apache.hadoop + hadoop-yarn-registry + org.apache.logging.log4j log4j-1.2-api @@ -316,6 +319,14 @@ org.apache.hive hive-service-rpc + + org.apache.hive + hive-upgrade-acid + + + org.apache.hadoop + hadoop-hdfs + com.github.joshelser dropwizard-metrics-hadoop-metrics2-reporter @@ -325,12 +336,36 @@ jline - org.eclipse.jetty.aggregate - jetty-all + javax.servlet + javax.servlet-api + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-rewrite + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet - org.eclipse.jetty.orbit - javax.servlet + org.eclipse.jetty + jetty-webapp + + + javolution + javolution + + + net.sf.jpam + jpam org.apache.ant @@ -372,6 +407,14 @@ org.apache.hive hive-exec + + org.apache.hive + hive-metastore + + + org.apache.hive + hive-upgrade-acid + org.apache.hadoop hadoop-annotations @@ -418,8 +461,8 @@ snappy-java - org.codehaus.jackson - jackson-core-asl + commons-pool + commons-pool @@ -441,10 +484,6 @@ org.apache.commons commons-compress - - org.codehaus.jackson - jackson-core-asl - @@ -457,10 +496,6 @@ org.apache.avro avro-ipc - - org.codehaus.jackson - jackson-core-asl - @@ -470,12 +505,6 @@ ${dep.protobuf.version} - - org.iq80.snappy - snappy - ${dep.snappy.version} - - org.jodd jodd-core @@ -568,6 +597,7 @@ joda-time:joda-time org.apache.thrift:libthrift + io.airlift:aircompressor @@ -587,6 +617,10 @@ com.google.common ${shadeBase}.com.google.common + + com.google.thirdparty + ${shadeBase}.com.google.thirdparty + org.objectweb.asm ${shadeBase}.org.objectweb.asm @@ -603,10 +637,6 @@ com.fasterxml.jackson ${shadeBase}.com.fasterxml.jackson - - org.iq80.snappy - ${shadeBase}.org.iq80.snappy - org.json ${shadeBase}.org.json @@ -615,10 +645,6 @@ au.com.bytecode.opencsv ${shadeBase}.au.com.bytecode.opencsv - - io.airlift.compress - ${shadeBase}.io.airlift.compress - jodd ${shadeBase}.jodd @@ -627,21 +653,13 @@ com.codahale.metrics ${shadeBase}.com.codahale.metrics - - parquet.org.apache.thrift - ${shadeBase}.parquet.org.apache.thrift - - - parquet.org.slf4j - ${shadeBase}.parquet.org.slf4j - org.apache.parquet.it.unimi.dsi.fastutil ${shadeBase}.parquet.it.unimi.dsi.fastutil - shaded.parquet.org.codehaus.jackson - ${shadeBase}.parquet.org.codehaus.jackson + shaded.parquet + ${shadeBase}.parquet org.slf4j @@ -666,7 +684,6 @@ hive-log4j2.properties parquet-logging.properties - org/apache/hive/common/util/TimestampParser*.class @@ -676,11 +693,11 @@ hive-exec-log4j2.properties tez-container-log4j2.properties org/apache/hadoop/hive/ql/io/CodecPool*.class - org/apache/tez/dag/api/TaskSpecBuilder*.* + org/apache/tez/** - org.apache.hive:hive-metastore + org.apache.hive:hive-standalone-metastore package.jdo diff --git a/src/main/java/org/apache/hive/common/util/TimestampParser.java b/src/main/java/org/apache/hive/common/util/TimestampParser.java deleted file mode 100644 index 291372e..0000000 --- a/src/main/java/org/apache/hive/common/util/TimestampParser.java +++ /dev/null @@ -1,191 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.common.util; - -import java.math.BigDecimal; -import java.sql.Timestamp; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import java.util.Optional; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.joda.time.DateTime; -import org.joda.time.IllegalInstantException; -import org.joda.time.MutableDateTime; -import org.joda.time.DateTimeFieldType; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; -import org.joda.time.format.DateTimeFormatterBuilder; -import org.joda.time.format.DateTimeParser; -import org.joda.time.format.DateTimeParserBucket; - -import static com.google.common.base.Preconditions.checkState; - -/** - * Timestamp parser using Joda DateTimeFormatter. Parser accepts 0 or more date time format - * patterns. If no format patterns are provided it will default to the normal Timestamp parsing. - * Datetime formats are compatible with Java SimpleDateFormat. Also added special case pattern - * "millis" to parse the string as milliseconds since Unix epoch. - * Since this uses Joda DateTimeFormatter, this parser should be thread safe. - */ -// TODO remove this class and relevant exclusion rules in pom.xml when upstream has updated -// definition of `startingDateValue` -public class TimestampParser { - - protected final static String[] stringArray = new String[] {}; - protected final static String millisFormatString = "millis"; - // @Nullable - private final static DateTime startingDateValue = makeStartingDateValue(); - - // @Nullable - private static DateTime makeStartingDateValue() { - try { - return new DateTime(1970, 1, 1, 0, 0, 0, 0); - } catch (IllegalInstantException e) { - // 1970-01-01 00:00:00 did not exist in some zones. In these zones, we need to take different, - // less optimal parsing route. - return null; - } - } - - protected String[] formatStrings = null; - protected DateTimeFormatter fmt = null; - - public TimestampParser() { - } - - public TimestampParser(TimestampParser tsParser) { - this(tsParser.formatStrings == null ? - null : Arrays.copyOf(tsParser.formatStrings, tsParser.formatStrings.length)); - } - - public TimestampParser(List formatStrings) { - this(formatStrings == null ? null : formatStrings.toArray(stringArray)); - } - - public TimestampParser(String[] formatStrings) { - this.formatStrings = formatStrings; - - // create formatter that includes all of the input patterns - if (formatStrings != null && formatStrings.length > 0) { - DateTimeParser[] parsers = new DateTimeParser[formatStrings.length]; - for (int idx = 0; idx < formatStrings.length; ++idx) { - String formatString = formatStrings[idx]; - if (formatString.equalsIgnoreCase(millisFormatString)) { - // Use milliseconds parser if pattern matches our special-case millis pattern string - parsers[idx] = new MillisDateFormatParser(); - } else { - parsers[idx] = DateTimeFormat.forPattern(formatString).getParser(); - } - } - fmt = new DateTimeFormatterBuilder() - .append(null, parsers) - .toFormatter() - .withDefaultYear(1970); - } - } - - /** - * Parse the input string and return a timestamp value - * @param strValue - * @return - * @throws IllegalArgumentException if input string cannot be parsed into timestamp - */ - public Timestamp parseTimestamp(String strValue) throws IllegalArgumentException { - if (fmt != null) { - Optional parsed = tryParseWithFormat(strValue); - if (parsed.isPresent()) { - return parsed.get(); - } - } - - // Otherwise try default timestamp parsing - return Timestamp.valueOf(strValue); - } - - private Optional tryParseWithFormat(String strValue) { - checkState(fmt != null); - - if (startingDateValue != null) { - // reset value in case any date fields are missing from the date pattern - MutableDateTime mdt = new MutableDateTime(startingDateValue); - - // Using parseInto() avoids throwing exception when parsing, - // allowing fallback to default timestamp parsing if custom patterns fail. - int ret = fmt.parseInto(mdt, strValue, 0); - // Only accept parse results if we parsed the entire string - if (ret == strValue.length()) { - return Optional.of(new Timestamp(mdt.getMillis())); - } - return Optional.empty(); - } - - try { - DateTime dt = fmt.parseDateTime(strValue); - return Optional.of(new Timestamp(dt.getMillis())); - } catch (IllegalArgumentException e) { - return Optional.empty(); - } - } - - /** - * DateTimeParser to parse the date string as the millis since Unix epoch - */ - public static class MillisDateFormatParser implements DateTimeParser { - private static final ThreadLocal numericMatcher = new ThreadLocal() { - @Override - protected Matcher initialValue() { - return Pattern.compile("(-?\\d+)(\\.\\d+)?$").matcher(""); - } - }; - - private final static DateTimeFieldType[] dateTimeFields = { - DateTimeFieldType.year(), - DateTimeFieldType.monthOfYear(), - DateTimeFieldType.dayOfMonth(), - DateTimeFieldType.hourOfDay(), - DateTimeFieldType.minuteOfHour(), - DateTimeFieldType.secondOfMinute(), - DateTimeFieldType.millisOfSecond() - }; - - public int estimateParsedLength() { - return 13; // Shouldn't hit 14 digits until year 2286 - } - - public int parseInto(DateTimeParserBucket bucket, String text, int position) { - String substr = text.substring(position); - Matcher matcher = numericMatcher.get(); - matcher.reset(substr); - if (!matcher.matches()) { - return -1; - } - - // Joda DateTime only has precision to millis, cut off any fractional portion - long millis = Long.parseLong(matcher.group(1)); - DateTime dt = new DateTime(millis); - for (DateTimeFieldType field : dateTimeFields) { - bucket.saveField(field, dt.get(field)); - } - return substr.length(); - } - } -} diff --git a/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java b/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java index 409583d..d0b5a35 100644 --- a/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java +++ b/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -20,6 +20,7 @@ import java.io.ByteArrayInputStream; import java.io.IOException; +import java.nio.charset.CharacterCodingException; import java.sql.Date; import java.sql.Timestamp; import java.util.ArrayList; @@ -50,6 +51,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; @@ -69,6 +71,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hive.common.util.HiveStringUtils; @@ -106,10 +109,7 @@ public void initialize(Configuration conf, Properties tbl) List columnTypes; StructTypeInfo rowTypeInfo; - - LOG.debug("Initializing JsonSerDe"); - LOG.debug("props to serde: {}", tbl.entrySet()); - + LOG.debug("Initializing JsonSerDe: {}", tbl.entrySet()); // Get column names and types String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); @@ -117,15 +117,15 @@ public void initialize(Configuration conf, Properties tbl) final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl .getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA); // all table column names - if (columnNameProperty.length() == 0) { - columnNames = new ArrayList(); + if (columnNameProperty.isEmpty()) { + columnNames = Collections.emptyList(); } else { columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter)); } // all column types - if (columnTypeProperty.length() == 0) { - columnTypes = new ArrayList(); + if (columnTypeProperty.isEmpty()) { + columnTypes = Collections.emptyList(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } @@ -179,11 +179,9 @@ public Object deserialize(Writable blob) throws SerDeException { } } catch (JsonParseException e) { LOG.warn("Error [{}] parsing json text [{}].", e, t); - LOG.debug(null, e); throw new SerDeException(e); } catch (IOException e) { LOG.warn("Error [{}] parsing json text [{}].", e, t); - LOG.debug(null, e); throw new SerDeException(e); } @@ -309,8 +307,20 @@ private Object extractCurrentField(JsonParser p, HCatFieldSchema hcatFieldSchema val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText(); break; case BINARY: - throw new IOException("JsonSerDe does not support BINARY type"); - case DATE: + String b = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText(); + if (b != null) { + try { + String t = Text.decode(b.getBytes(), 0, b.getBytes().length); + return t.getBytes(); + } catch (CharacterCodingException e) { + LOG.warn("Error generating json binary type from object.", e); + return null; + } + } else { + val = null; + } + break; + case DATE: val = (valueToken == JsonToken.VALUE_NULL) ? null : Date.valueOf(p.getText()); break; case TIMESTAMP: @@ -402,7 +412,13 @@ private Object getObjectOfCorrespondingPrimitiveType(String s, PrimitiveTypeInfo case STRING: return s; case BINARY: - throw new IOException("JsonSerDe does not support BINARY type"); + try { + String t = Text.decode(s.getBytes(), 0, s.getBytes().length); + return t.getBytes(); + } catch (CharacterCodingException e) { + LOG.warn("Error generating json binary type from object.", e); + return null; + } case DATE: return Date.valueOf(s); case TIMESTAMP: @@ -505,9 +521,12 @@ private static void buildJSONString(StringBuilder sb, Object o, ObjectInspector appendWithQuotes(sb, s); break; } - case BINARY: { - throw new IOException("JsonSerDe does not support BINARY type"); - } + case BINARY: + byte[] b = ((BinaryObjectInspector) oi).getPrimitiveJavaObject(o); + Text txt = new Text(); + txt.set(b, 0, b.length); + appendWithQuotes(sb, SerDeUtils.escapeString(txt.toString())); + break; case DATE: Date d = ((DateObjectInspector)poi).getPrimitiveJavaObject(o); appendWithQuotes(sb, d.toString());