Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/src/main/sphinx/release/release-339.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@

## Hive connector

* Add support for writing Bloom filters in ORC files. ({issue}`3939`)
* Make `location` parameter optional for the `system.register_partition` procedure. ({issue}`4443`)
* Avoid creating tiny splits at the end of block boundaries. ({issue}`4485`)
* Remove requirement to configure `metastore.storage.schema.reader.impl` in Hive 3.x metastore
Expand Down
1 change: 0 additions & 1 deletion docs/src/main/sphinx/release/release-365.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@
* Fix reporting of number of read bytes for tables using `ORC` file format. ({issue}`10048`)
* Account for memory used for deleted row information when reading from ACID tables. ({issue}`9914`, {issue}`10070`)
* Fix `REVOKE GRANT OPTION` to revoke only the grant option instead of revoking the entire privilege. ({issue}`10094`)
* Fix issue that prevented writing bloom filters for ORC files. ({issue}`9792`)
* Fix bug where incorrect rows were deleted when deleting from a transactional table that has original files (before the first major compaction). ({issue}`10095`)
* Fix delete and update failure when changing a table after a major compaction. ({issue}`10120`)
* Fix incorrect results when decoding decimal values in Parquet reader. ({issue}`9971`)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,8 @@ public class HiveMetadata
public static final String PRESTO_VIEW_COMMENT = "Presto View";
public static final String PRESTO_VIEW_EXPANDED_TEXT_MARKER = "/* Presto View */";

private static final String ORC_BLOOM_FILTER_COLUMNS_KEY = "orc.bloom.filter.columns";
private static final String ORC_BLOOM_FILTER_FPP_KEY = "orc.bloom.filter.fpp";
public static final String ORC_BLOOM_FILTER_COLUMNS_KEY = "orc.bloom.filter.columns";
public static final String ORC_BLOOM_FILTER_FPP_KEY = "orc.bloom.filter.fpp";

public static final String SKIP_HEADER_COUNT_KEY = serdeConstants.HEADER_COUNT;
public static final String SKIP_FOOTER_COUNT_KEY = serdeConstants.FOOTER_COUNT;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,11 @@
import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE;
import static io.trino.plugin.hive.HiveErrorCode.HIVE_SERDE_NOT_FOUND;
import static io.trino.plugin.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT;
import static io.trino.plugin.hive.HiveMetadata.ORC_BLOOM_FILTER_COLUMNS_KEY;
import static io.trino.plugin.hive.HiveMetadata.ORC_BLOOM_FILTER_FPP_KEY;
import static io.trino.plugin.hive.HiveMetadata.SKIP_FOOTER_COUNT_KEY;
import static io.trino.plugin.hive.HiveMetadata.SKIP_HEADER_COUNT_KEY;
import static io.trino.plugin.hive.HivePartitionKey.HIVE_DEFAULT_DYNAMIC_PARTITION;
import static io.trino.plugin.hive.HiveTableProperties.ORC_BLOOM_FILTER_COLUMNS;
import static io.trino.plugin.hive.HiveTableProperties.ORC_BLOOM_FILTER_FPP;
import static io.trino.plugin.hive.HiveType.toHiveTypes;
import static io.trino.plugin.hive.metastore.SortingColumn.Order.ASCENDING;
Expand Down Expand Up @@ -1083,18 +1084,18 @@ public static List<HiveType> getColumnTypes(Properties schema)

public static OrcWriterOptions getOrcWriterOptions(Properties schema, OrcWriterOptions orcWriterOptions)
{
if (schema.containsKey(ORC_BLOOM_FILTER_COLUMNS)) {
if (!schema.containsKey(ORC_BLOOM_FILTER_FPP)) {
throw new TrinoException(HIVE_INVALID_METADATA, format("FPP for bloom filter is missing"));
if (schema.containsKey(ORC_BLOOM_FILTER_COLUMNS_KEY)) {
if (!schema.containsKey(ORC_BLOOM_FILTER_FPP_KEY)) {
throw new TrinoException(HIVE_INVALID_METADATA, "FPP for bloom filter is missing");
}
try {
double fpp = parseDouble(schema.getProperty(ORC_BLOOM_FILTER_FPP));
double fpp = parseDouble(schema.getProperty(ORC_BLOOM_FILTER_FPP_KEY));
return orcWriterOptions
.withBloomFilterColumns(ImmutableSet.copyOf(COLUMN_NAMES_SPLITTER.splitToList(schema.getProperty(ORC_BLOOM_FILTER_COLUMNS))))
.withBloomFilterColumns(ImmutableSet.copyOf(COLUMN_NAMES_SPLITTER.splitToList(schema.getProperty(ORC_BLOOM_FILTER_COLUMNS_KEY))))
.withBloomFilterFpp(fpp);
}
catch (NumberFormatException e) {
throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, format("Invalid value for %s property: %s", ORC_BLOOM_FILTER_FPP, schema.getProperty(ORC_BLOOM_FILTER_FPP)));
throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, format("Invalid value for %s property: %s", ORC_BLOOM_FILTER_FPP, schema.getProperty(ORC_BLOOM_FILTER_FPP_KEY)));
}
}
return orcWriterOptions;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.hive.orc;

import io.trino.Session;
import io.trino.plugin.hive.HiveQueryRunner;
import io.trino.testing.AbstractTestQueryFramework;
import io.trino.testing.QueryRunner;
import org.intellij.lang.annotations.Language;
import org.testng.annotations.Test;

import static io.trino.testing.sql.TestTable.randomTableSuffix;
import static java.lang.String.format;
import static org.assertj.core.api.Assertions.assertThat;

public class TestOrcWithBloomFilters
extends AbstractTestQueryFramework
{
@Override
protected QueryRunner createQueryRunner()
throws Exception
{
return HiveQueryRunner.builder()
.addHiveProperty("hive.orc.bloom-filters.enabled", "true")
.addHiveProperty("hive.orc.default-bloom-filter-fpp", "0.001")
.build();
}

@Test
public void testOrcBloomFilterIsWrittenDuringCreate()
{
String tableName = "create_orc_with_bloom_filters_" + randomTableSuffix();
assertUpdate(
format(
"CREATE TABLE %s WITH (%s) AS SELECT orderstatus, totalprice FROM tpch.tiny.orders",
tableName,
getTableProperties("totalprice", "orderstatus", "totalprice")),
15000);

// `totalprice 51890 is chosen to lie between min/max values of row group
assertBloomFilterBasedRowGroupPruning(format("SELECT * FROM %s WHERE totalprice = 51890", tableName));
assertUpdate("DROP TABLE " + tableName);
}

@Test
public void testOrcBloomFilterIsWrittenDuringInsert()
{
String tableName = "insert_orc_with_bloom_filters_" + randomTableSuffix();
assertUpdate(
format(
"CREATE TABLE %s (totalprice DOUBLE, orderstatus VARCHAR) WITH (%s)",
tableName,
getTableProperties("totalprice", "orderstatus", "totalprice")));
assertUpdate(format("INSERT INTO %s SELECT totalprice, orderstatus FROM tpch.tiny.orders", tableName), 15000);

// `totalprice 51890 is chosen to lie between min/max values of row group
assertBloomFilterBasedRowGroupPruning(format("SELECT * FROM %s WHERE totalprice = 51890", tableName));
assertUpdate("DROP TABLE " + tableName);
}

private String getTableProperties(String bloomFilterColumnName, String bucketingColumnName, String sortByColumnName)
{
return format(
"orc_bloom_filter_columns = ARRAY['%s'], bucketed_by = array['%s'], bucket_count = 1, sorted_by = ARRAY['%s']",
bloomFilterColumnName,
bucketingColumnName,
sortByColumnName);
}

private void assertBloomFilterBasedRowGroupPruning(@Language("SQL") String sql)
{
assertQueryStats(
disableBloomFilters(getSession()),
sql,
queryStats -> {
assertThat(queryStats.getPhysicalInputPositions()).isGreaterThan(0);
assertThat(queryStats.getProcessedInputPositions()).isEqualTo(queryStats.getPhysicalInputPositions());
},
results -> assertThat(results.getRowCount()).isEqualTo(0));

assertQueryStats(
getSession(),
sql,
queryStats -> {
assertThat(queryStats.getPhysicalInputPositions()).isEqualTo(0);
assertThat(queryStats.getProcessedInputPositions()).isEqualTo(0);
},
results -> assertThat(results.getRowCount()).isEqualTo(0));
}

private Session disableBloomFilters(Session session)
{
return Session.builder(session)
.setCatalogSessionProperty(session.getCatalog().orElseThrow(), "orc_bloom_filters_enabled", "false")
.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
import static io.airlift.units.DataSize.Unit.MEGABYTE;
import static io.trino.orc.OrcWriterOptions.WriterIdentification.LEGACY_HIVE_COMPATIBLE;
import static io.trino.orc.OrcWriterOptions.WriterIdentification.TRINO;
import static io.trino.plugin.hive.HiveTableProperties.ORC_BLOOM_FILTER_COLUMNS;
import static io.trino.plugin.hive.HiveTableProperties.ORC_BLOOM_FILTER_FPP;
import static io.trino.plugin.hive.HiveMetadata.ORC_BLOOM_FILTER_COLUMNS_KEY;
import static io.trino.plugin.hive.HiveMetadata.ORC_BLOOM_FILTER_FPP_KEY;
import static io.trino.plugin.hive.util.HiveUtil.getOrcWriterOptions;
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
import static org.assertj.core.api.AssertionsForClassTypes.assertThatThrownBy;
Expand Down Expand Up @@ -78,8 +78,8 @@ public void testOrcWriterOptionsFromOrcWriterConfig()
public void testOrcWriterOptionsFromTableProperties()
{
Properties tableProperties = new Properties();
tableProperties.setProperty(ORC_BLOOM_FILTER_COLUMNS, "column_a, column_b");
tableProperties.setProperty(ORC_BLOOM_FILTER_FPP, "0.5");
tableProperties.setProperty(ORC_BLOOM_FILTER_COLUMNS_KEY, "column_a, column_b");
tableProperties.setProperty(ORC_BLOOM_FILTER_FPP_KEY, "0.5");
OrcWriterOptions orcWriterOptions = getOrcWriterOptions(tableProperties, new OrcWriterOptions());
assertThat(orcWriterOptions.getBloomFilterFpp()).isEqualTo(0.5);
assertThat(orcWriterOptions.isBloomFilterColumn("column_a")).isTrue();
Expand All @@ -91,8 +91,8 @@ public void testOrcWriterOptionsFromTableProperties()
public void testOrcWriterOptionsWithInvalidFPPValue()
{
Properties tableProperties = new Properties();
tableProperties.setProperty(ORC_BLOOM_FILTER_COLUMNS, "column_with_bloom_filter");
tableProperties.setProperty(ORC_BLOOM_FILTER_FPP, "abc");
tableProperties.setProperty(ORC_BLOOM_FILTER_COLUMNS_KEY, "column_with_bloom_filter");
tableProperties.setProperty(ORC_BLOOM_FILTER_FPP_KEY, "abc");
assertThatThrownBy(() -> getOrcWriterOptions(tableProperties, new OrcWriterOptions()))
.hasMessage("Invalid value for orc_bloom_filter_fpp property: abc");
}
Expand All @@ -101,8 +101,8 @@ public void testOrcWriterOptionsWithInvalidFPPValue()
public void testOrcBloomFilterWithInvalidRange(String fpp)
{
Properties tableProperties = new Properties();
tableProperties.setProperty(ORC_BLOOM_FILTER_COLUMNS, "column_with_bloom_filter");
tableProperties.setProperty(ORC_BLOOM_FILTER_FPP, fpp);
tableProperties.setProperty(ORC_BLOOM_FILTER_COLUMNS_KEY, "column_with_bloom_filter");
tableProperties.setProperty(ORC_BLOOM_FILTER_FPP_KEY, fpp);
assertThatThrownBy(() -> getOrcWriterOptions(tableProperties, new OrcWriterOptions()))
.hasMessage("bloomFilterFpp should be > 0.0 & < 1.0");
}
Expand Down