From d75a8bd67dd4d28495f1bc2bdd1f9dc5bdfe376f Mon Sep 17 00:00:00 2001 From: chutium Date: Thu, 31 Jul 2014 16:54:36 +0200 Subject: [PATCH 1/2] [SPARK-2700] [SQL] Hidden files (such as .impala_insert_staging) should be filtered out by sqlContext.parquetFile --- .../scala/org/apache/spark/sql/parquet/ParquetTypes.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala index aaef1a1d474f..8c0c895cb46e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala @@ -373,9 +373,10 @@ private[parquet] object ParquetTypesConverter extends Logging { } ParquetRelation.enableLogForwarding() - val children = fs.listStatus(path).filterNot { - _.getPath.getName == FileOutputCommitter.SUCCEEDED_FILE_NAME - } + val children = fs.listStatus(path).filterNot( + status => (status.getPath.getName.charAt(0) == '.' || + status.getPath.getName == FileOutputCommitter.SUCCEEDED_FILE_NAME) + ) // NOTE (lian): Parquet "_metadata" file can be very slow if the file consists of lots of row // groups. Since Parquet schema is replicated among all row groups, we only need to touch a From b76ae8cbea6bc695af7741657a0a7af31a5ad29d Mon Sep 17 00:00:00 2001 From: chutium Date: Fri, 1 Aug 2014 10:06:06 +0200 Subject: [PATCH 2/2] [SPARK-2700] [SQL] fixed styling issue --- .../scala/org/apache/spark/sql/parquet/ParquetTypes.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala index 8c0c895cb46e..2867dc0a8b1f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala @@ -373,10 +373,10 @@ private[parquet] object ParquetTypesConverter extends Logging { } ParquetRelation.enableLogForwarding() - val children = fs.listStatus(path).filterNot( - status => (status.getPath.getName.charAt(0) == '.' || - status.getPath.getName == FileOutputCommitter.SUCCEEDED_FILE_NAME) - ) + val children = fs.listStatus(path).filterNot { status => + val name = status.getPath.getName + name(0) == '.' || name == FileOutputCommitter.SUCCEEDED_FILE_NAME + } // NOTE (lian): Parquet "_metadata" file can be very slow if the file consists of lots of row // groups. Since Parquet schema is replicated among all row groups, we only need to touch a