diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index e184655cfa7f..e9cda2fcd802 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -20,8 +20,6 @@ package org.apache.hadoop.hive.llap.io.api.impl; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.s3a.S3AFileSystem; -import org.apache.hadoop.fs.s3a.S3AInputPolicy; import org.apache.hadoop.hive.ql.exec.vector.VectorizedSupport; import org.apache.hadoop.hive.ql.io.BatchToRowInputFormat; import org.apache.hadoop.hive.conf.HiveConf; @@ -65,9 +63,6 @@ import org.apache.hadoop.mapred.Reporter; import org.apache.hive.common.util.HiveStringUtils; -import static org.apache.hadoop.hive.common.FileUtils.isS3a; -import static org.apache.hadoop.hive.ql.io.HiveInputFormat.isRandomAccessInputFormat; - public class LlapInputFormat implements InputFormat, VectorizedInputFormatInterface, SelfDescribingInputFormatInterface, AvoidSplitCombination { @@ -107,10 +102,6 @@ public RecordReader getRecordReader( FileSplit fileSplit = (FileSplit) split; reporter.setStatus(fileSplit.toString()); FileSystem splitFileSystem = fileSplit.getPath().getFileSystem(job); - if (isS3a(splitFileSystem) && isRandomAccessInputFormat(sourceInputFormat)) { - LlapIoImpl.LOG.debug("Changing S3A input policy to RANDOM"); - ((S3AFileSystem) splitFileSystem).setInputPolicy(S3AInputPolicy.Random); - } try { // At this entry point, we are going to assume that these are logical table columns. // Perhaps we should go thru the code and clean this up to be more explicit; for now, we diff --git a/ql/pom.xml b/ql/pom.xml index b51def5dc431..44b6f1107a7a 100644 --- a/ql/pom.xml +++ b/ql/pom.xml @@ -321,11 +321,6 @@ hadoop-yarn-client true - - org.apache.hadoop - hadoop-aws - ${hadoop.version} - org.apache.orc orc-tools diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index f564ed75f203..d8bb45f3909f 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -24,8 +24,6 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.s3a.S3AFileSystem; -import org.apache.hadoop.fs.s3a.S3AInputPolicy; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.StringInternUtils; import org.apache.hadoop.hive.common.ValidTxnWriteIdList; @@ -40,8 +38,6 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; -import org.apache.hadoop.hive.ql.io.parquet.VectorizedParquetInputFormat; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler; @@ -93,7 +89,6 @@ import java.util.concurrent.Future; import static java.lang.Integer.min; -import static org.apache.hadoop.hive.common.FileUtils.isS3a; /** * HiveInputFormat is a parameterized InputFormat which looks at the path name @@ -384,19 +379,6 @@ public static InputFormat getInputFormatFromCache( return instance; } - /** - * Returns true if the inputFormat performs random seek+read - * @param inputFormat - * @return - */ - public static boolean isRandomAccessInputFormat(InputFormat inputFormat) { - if (inputFormat instanceof OrcInputFormat || - inputFormat instanceof VectorizedParquetInputFormat) { - return true; - } - return false; - } - @Override public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { @@ -449,13 +431,6 @@ public RecordReader getRecordReader(InputSplit split, JobConf job, innerReader = HiveIOExceptionHandlerUtil .handleRecordReaderCreationException(e, job); } - - FileSystem splitFileSystem = splitPath.getFileSystem(job); - if (isS3a(splitFileSystem) && isRandomAccessInputFormat(inputFormat)) { - LOG.debug("Changing S3A input policy to RANDOM"); - ((S3AFileSystem) splitFileSystem).setInputPolicy(S3AInputPolicy.Random); - } - HiveRecordReader rr = new HiveRecordReader(innerReader, job); rr.initIOContext(hsplit, job, inputFormatClass, innerReader); return rr; diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRandomAccessHiveInputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRandomAccessHiveInputFormat.java deleted file mode 100644 index d13cd43d69a9..000000000000 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRandomAccessHiveInputFormat.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.io.HiveInputFormat; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.Reporter; -import org.junit.Test; - -import static org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.createMockExecutionEnvironment; -import static org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.setBlocks; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -public class TestRandomAccessHiveInputFormat { - - Path workDir = new Path(System.getProperty("test.tmp.dir","target/tmp")); - - /** - * MockFileSystem that pretends to be an S3A system - */ - public static class MockS3aFileSystem - extends TestInputOutputFormat.MockFileSystem { - - @Override - public String getScheme() { - return "s3a"; - } - } - - @Test - // Make sure that the FS InputPolicy is changed to Random for ORC on S3A - public void testOrcSplitOnS3A() throws Exception { - // get the object inspector for MyRow - StructObjectInspector inspector; - synchronized (TestOrcFile.class) { - inspector = (StructObjectInspector) - ObjectInspectorFactory.getReflectionObjectInspector(TestInputOutputFormat.MyRow.class, - ObjectInspectorFactory.ObjectInspectorOptions.JAVA); - } - - // Use ORC files stored on S3A - JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"), - "randomAccessVectorized", inspector, true, 1, MockS3aFileSystem.class.getName()); - - // write the orc file to the mock file system - Path path = new Path(conf.get("mapred.input.dir") + "/0_0"); - Writer writer = OrcFile.createWriter(path, OrcFile.writerOptions(conf).blockPadding(false) - .bufferSize(1024).inspector(inspector)); - writer.addRow(new TestInputOutputFormat.MyRow(0, 0)); - writer.addRow(new TestInputOutputFormat.MyRow(1, 2)); - writer.close(); - - setBlocks(path, conf, new TestInputOutputFormat.MockBlock("host0")); - - HiveInputFormat inputFormat = new HiveInputFormat<>(); - - InputSplit[] splits = inputFormat.getSplits(conf, 2); - assertEquals(1, splits.length); - - Throwable thrown = null; - try { - inputFormat.getRecordReader(splits[0], conf, Reporter.NULL); - } catch (Exception e) { - thrown = e; - } - - // As we are mocking a simple FS we just expect an cast exception to occur - assertEquals(thrown.getClass(), ClassCastException.class); - assertTrue(thrown.getMessage().contains("S3AFileSystem")); - } -}