Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ public HdfsParquetDataSource(Path path, long size, FSDataInputStream inputStream
this.stats = stats;
}

@Override
public String getPath()
{
return name;
}

@Override
public final long getReadBytes()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
public interface ParquetDataSource
extends Closeable
{
String getPath();

long getReadBytes();

long getSize();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import com.facebook.presto.spi.type.MapType;
import com.facebook.presto.spi.type.Type;
import com.facebook.presto.spi.type.TypeSignatureParameter;
import com.google.common.base.Joiner;
import it.unimi.dsi.fastutil.booleans.BooleanArrayList;
import it.unimi.dsi.fastutil.booleans.BooleanList;
import it.unimi.dsi.fastutil.ints.IntArrayList;
Expand All @@ -53,6 +54,8 @@
import static com.google.common.base.Preconditions.checkArgument;
import static java.lang.Math.min;
import static java.lang.Math.toIntExact;
import static java.lang.String.format;
import static java.util.Locale.ENGLISH;
import static java.util.Objects.requireNonNull;

public class ParquetReader
Expand Down Expand Up @@ -206,7 +209,18 @@ private ColumnChunk readPrimitive(PrimitiveField field)
ParquetColumnChunk columnChunk = new ParquetColumnChunk(descriptor, buffer, 0);
columnReader.setPageReader(columnChunk.readAllPages());
}
return columnReader.readPrimitive(field);
try {
return columnReader.readPrimitive(field);
}
catch (UnsupportedOperationException e) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of relying on the UnsupportedOperationException thrown by the Type classes, it may be cleaner to throw a specific exception (e.g., ParquetSchemaMismatchException or sth like that) in PrimitiveColumnReader::readValue(). What do you think?

throw new ParquetCorruptionException(format(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not really a corruption issue. Sometimes users just alter their tables and hit this issue as well.

"There is a mismatch between parquet file schema and partition schema. " +
"The column %s in file %s is declared as type %s but parquet file declared column type as %s.",
Joiner.on(".").join(columnDescriptor.getPath()).toLowerCase(ENGLISH),
dataSource.getPath(),
field.getType(),
columnDescriptor.getType()));
}
}

private byte[] allocateBlock(int length)
Expand Down