Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import com.facebook.presto.spi.TableNotFoundException;
import com.facebook.presto.spi.connector.ConnectorMetadata;
import com.facebook.presto.spi.connector.ConnectorOutputMetadata;
import com.facebook.presto.spi.statistics.ComputedStatistics;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
Expand Down Expand Up @@ -102,7 +103,7 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con
}

@Override
public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments)
public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics)
{
clearRollback();
return Optional.empty();
Expand Down Expand Up @@ -212,7 +213,7 @@ public ConnectorInsertTableHandle beginInsert(ConnectorSession session, Connecto
}

@Override
public Optional<ConnectorOutputMetadata> finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments)
public Optional<ConnectorOutputMetadata> finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics)
{
clearRollback();
return Optional.empty();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import com.facebook.presto.spi.TableNotFoundException;
import com.facebook.presto.spi.connector.ConnectorMetadata;
import com.facebook.presto.spi.connector.ConnectorOutputMetadata;
import com.facebook.presto.spi.statistics.ComputedStatistics;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.airlift.slice.Slice;
Expand Down Expand Up @@ -173,7 +174,7 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con
}

@Override
public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments)
public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics)
{
JdbcOutputTableHandle handle = (JdbcOutputTableHandle) tableHandle;
jdbcClient.commitCreateTable(handle);
Expand Down Expand Up @@ -203,7 +204,7 @@ public ConnectorInsertTableHandle beginInsert(ConnectorSession session, Connecto
}

@Override
public Optional<ConnectorOutputMetadata> finishInsert(ConnectorSession session, ConnectorInsertTableHandle tableHandle, Collection<Slice> fragments)
public Optional<ConnectorOutputMetadata> finishInsert(ConnectorSession session, ConnectorInsertTableHandle tableHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics)
{
JdbcOutputTableHandle jdbcInsertHandle = (JdbcOutputTableHandle) tableHandle;
jdbcClient.finishInsertTable(jdbcInsertHandle);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import com.facebook.presto.spi.SchemaTablePrefix;
import com.facebook.presto.spi.connector.ConnectorMetadata;
import com.facebook.presto.spi.connector.ConnectorOutputMetadata;
import com.facebook.presto.spi.statistics.ComputedStatistics;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
Expand Down Expand Up @@ -159,7 +160,7 @@ public void renameTable(ConnectorSession session, ConnectorTableHandle tableHand
public void createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, boolean ignoreExisting)
{
ConnectorOutputTableHandle outputTableHandle = beginCreateTable(session, tableMetadata, Optional.empty());
finishCreateTable(session, outputTableHandle, ImmutableList.of());
finishCreateTable(session, outputTableHandle, ImmutableList.of(), ImmutableList.of());
}

@Override
Expand Down Expand Up @@ -220,7 +221,7 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con
}

@Override
public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments)
public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics)
{
BlackHoleOutputTableHandle blackHoleOutputTableHandle = (BlackHoleOutputTableHandle) tableHandle;
BlackHoleTableHandle table = blackHoleOutputTableHandle.getTable();
Expand All @@ -236,7 +237,7 @@ public ConnectorInsertTableHandle beginInsert(ConnectorSession session, Connecto
}

@Override
public Optional<ConnectorOutputMetadata> finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments)
public Optional<ConnectorOutputMetadata> finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics)
{
return Optional.empty();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public void tableIsCreatedAfterCommits()

assertThatNoTableIsCreated();

metadata.finishCreateTable(SESSION, table, ImmutableList.of());
metadata.finishCreateTable(SESSION, table, ImmutableList.of(), ImmutableList.of());

List<SchemaTableName> tables = metadata.listTables(SESSION, Optional.empty());
assertTrue(tables.size() == 1, "Expected only one table.");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import com.facebook.presto.spi.connector.ConnectorMetadata;
import com.facebook.presto.spi.connector.ConnectorOutputMetadata;
import com.facebook.presto.spi.predicate.TupleDomain;
import com.facebook.presto.spi.statistics.ComputedStatistics;
import com.facebook.presto.spi.type.Type;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -312,7 +313,7 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con
}

@Override
public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments)
public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics)
{
return Optional.empty();
}
Expand All @@ -339,7 +340,7 @@ public ConnectorInsertTableHandle beginInsert(ConnectorSession session, Connecto
}

@Override
public Optional<ConnectorOutputMetadata> finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments)
public Optional<ConnectorOutputMetadata> finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics)
{
return Optional.empty();
}
Expand Down
40 changes: 37 additions & 3 deletions presto-docs/src/main/sphinx/connector/hive.rst
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@ security options in the Hive connector.
Hive Configuration Properties
-----------------------------

================================================== ============================================================ ==========
================================================== ============================================================ ============
Property Name Description Default
================================================== ============================================================ ==========
================================================== ============================================================ ============
``hive.metastore.uri`` The URI(s) of the Hive metastore to connect to using the
Thrift protocol. If multiple URIs are provided, the first
URI is used by default and the rest of the URIs are
Expand Down Expand Up @@ -175,7 +175,11 @@ Property Name Description
``hive.non-managed-table-writes-enabled`` Enable writes to non-managed (external) Hive tables. ``false``

``hive.non-managed-table-creates-enabled`` Enable creating non-managed (external) Hive tables. ``true``
================================================== ============================================================ ==========

``hive.collect-column-statistics-on-write`` Enables automatic column level statistics collection ``false``
on write. See `Table Statistics <#table-statistics>`__ for
details.
================================================== ============================================================ ============

Amazon S3 Configuration
-----------------------
Expand Down Expand Up @@ -334,6 +338,36 @@ the ``org.apache.hadoop.conf.Configurable`` interface from the Hadoop Java API,
will be passed in after the object instance is created and before it is asked to provision or retrieve any
encryption keys.

Table Statistics
----------------

The Hive connector automatically collects basic statistics
(``numFiles', ``numRows``, ``rawDataSize``, ``totalSize``)
on ``INSERT`` and ``CREATE TABLE AS`` operations.

The Hive connector can also collect column level statistics:

============= ====================================================================
Column Type Collectible Statistics
============= ====================================================================
``TINYINT`` number of nulls, number of distinct values, min/max values
``SMALLINT`` number of nulls, number of distinct values, min/max values
``INTEGER`` number of nulls, number of distinct values, min/max values
``BIGINT`` number of nulls, number of distinct values, min/max values
``DOUBLE`` number of nulls, number of distinct values, min/max values
``REAL`` number of nulls, number of distinct values, min/max values
``DECIMAL`` number of nulls, number of distinct values, min/max values
``DATE`` number of nulls, number of distinct values, min/max values
``TIMESTAMP`` number of nulls, number of distinct values, min/max values
``VARCHAR`` number of nulls, number of distinct values
``CHAR`` number of nulls, number of distinct values
``VARBINARY`` number of nulls
``BOOLEAN`` number of nulls, number of true/false values
============= ====================================================================

Automatic column level statistics collection on write is controlled by
the ``collect-column-statistics-on-write`` catalog session property.

Schema Evolution
----------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ public class HiveClientConfig

private boolean tableStatisticsEnabled = true;
private int partitionStatisticsSampleSize = 100;
private boolean collectColumnStatisticsOnWrite;

public int getMaxInitialSplits()
{
Expand Down Expand Up @@ -1073,4 +1074,17 @@ public HiveClientConfig setPartitionStatisticsSampleSize(int partitionStatistics
this.partitionStatisticsSampleSize = partitionStatisticsSampleSize;
return this;
}

public boolean isCollectColumnStatisticsOnWrite()
{
return collectColumnStatisticsOnWrite;
}

@Config("hive.collect-column-statistics-on-write")
@ConfigDescription("Enables automatic column level statistics collection on write")
public HiveClientConfig setCollectColumnStatisticsOnWrite(boolean collectColumnStatisticsOnWrite)
{
this.collectColumnStatisticsOnWrite = collectColumnStatisticsOnWrite;
return this;
}
}
Loading