Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
<dep.slf4j.version>2.0.16</dep.slf4j.version>
<dep.kafka.version>3.9.1</dep.kafka.version>
<dep.pinot.version>1.3.0</dep.pinot.version>
<dep.druid.version>30.0.1</dep.druid.version>
<dep.druid.version>35.0.1</dep.druid.version>
<dep.jaxb.version>2.3.1</dep.jaxb.version>
<dep.jaxb.runtime.version>4.0.6</dep.jaxb.runtime.version>
<dep.hudi.version>0.14.0</dep.hudi.version>
Expand Down
12 changes: 11 additions & 1 deletion presto-druid/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.hibernate</groupId>
<groupId>org.hibernate.validator</groupId>
<artifactId>hibernate-validator</artifactId>
<version>8.0.3.Final</version>
</dependency>
Expand All @@ -41,6 +41,16 @@
<artifactId>jakarta.el</artifactId>
<version>4.0.1</version>
</dependency>
<dependency>
<groupId>at.yawk.lz4</groupId>
<artifactId>lz4-java</artifactId>
<version>1.10.2</version>
</dependency>
<dependency>
<groupId>org.mozilla</groupId>
<artifactId>rhino</artifactId>
<version>1.8.1</version>
</dependency>
</dependencies>
</dependencyManagement>

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.druid.segment;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Maps;
import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper;
import org.apache.druid.query.OrderBy;
import org.apache.druid.segment.DimensionHandler;
import org.apache.druid.segment.Metadata;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.column.BaseColumnHolder;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.data.Indexed;
import org.joda.time.Interval;

import javax.annotation.Nullable;

import java.util.Collections;
import java.util.List;
import java.util.Map;

public class PrestoQueryableIndex
implements QueryableIndex
{
private final Interval dataInterval;
private final List<String> columnNames;
private final Indexed<String> availableDimensions;
private final BitmapFactory bitmapFactory;
private final Map<String, Supplier<ColumnHolder>> columns;
private final SmooshedFileMapper fileMapper;
@Nullable
private final Metadata metadata;
private final Supplier<Map<String, DimensionHandler>> dimensionHandlers;

public PrestoQueryableIndex(
Interval dataInterval,
Indexed<String> dimNames,
BitmapFactory bitmapFactory,
Map<String, Supplier<ColumnHolder>> columns,
SmooshedFileMapper fileMapper,
@Nullable Metadata metadata,
boolean lazy)
{
Preconditions.checkNotNull(columns.get(ColumnHolder.TIME_COLUMN_NAME));
this.dataInterval = Preconditions.checkNotNull(dataInterval, "dataInterval");
ImmutableList.Builder<String> columnNamesBuilder = ImmutableList.builder();
for (String column : columns.keySet()) {
if (!ColumnHolder.TIME_COLUMN_NAME.equals(column)) {
columnNamesBuilder.add(column);
}
}
this.columnNames = columnNamesBuilder.build();
this.availableDimensions = dimNames;
this.bitmapFactory = bitmapFactory;
this.columns = columns;
this.fileMapper = fileMapper;
this.metadata = metadata;

if (lazy) {
this.dimensionHandlers = Suppliers.memoize(() -> initDimensionHandlers(availableDimensions));
}
else {
this.dimensionHandlers = () -> initDimensionHandlers(availableDimensions);
}
}

@Override
public Interval getDataInterval()
{
return dataInterval;
}

@Override
public int getNumRows()
Comment thread
sourcery-ai[bot] marked this conversation as resolved.
{
return columns.get(ColumnHolder.TIME_COLUMN_NAME).get().getLength();
}

@Override
public List<String> getColumnNames()
{
return columnNames;
}

@Override
public Indexed<String> getAvailableDimensions()
{
return availableDimensions;
}

@Override
public BitmapFactory getBitmapFactoryForDimensions()
{
return bitmapFactory;
}

@Nullable
@Override
public BaseColumnHolder getColumnHolder(String columnName)
{
Supplier<BaseColumnHolder> columnHolderSupplier = (Supplier) this.columns.get(columnName);
return columnHolderSupplier == null ? null : (BaseColumnHolder) columnHolderSupplier.get();
}

@VisibleForTesting
public Map<String, Supplier<ColumnHolder>> getColumns()
{
return columns;
}

@VisibleForTesting
public SmooshedFileMapper getFileMapper()
{
return fileMapper;
}

@Override
public void close()
{
if (fileMapper != null) {
fileMapper.close();
}
}

@Override
public Metadata getMetadata()
{
return metadata;
}

@Override
public List<OrderBy> getOrdering()
{
return Collections.emptyList();
}

@Override
public Map<String, DimensionHandler> getDimensionHandlers()
{
return dimensionHandlers.get();
}

private Map<String, DimensionHandler> initDimensionHandlers(Indexed<String> availableDimensions)
{
Map<String, DimensionHandler> dimensionHandlerMap = Maps.newLinkedHashMap();
for (String dim : availableDimensions) {
final ColumnHolder columnHolder = getColumnHolder(dim);
final DimensionHandler handler = columnHolder.getColumnFormat().getColumnHandler(dim);
dimensionHandlerMap.put(dim, handler);
}
return dimensionHandlerMap;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,11 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Supplier;
import com.google.common.collect.Streams;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.common.utils.SerializerUtils;
import org.apache.druid.jackson.DefaultObjectMapper;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.segment.Metadata;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.SimpleQueryableIndex;
import org.apache.druid.segment.column.ColumnConfig;
import org.apache.druid.segment.column.ColumnDescriptor;
import org.apache.druid.segment.column.ColumnHolder;
Expand Down Expand Up @@ -66,18 +64,18 @@ public class V9SegmentIndexSource
public V9SegmentIndexSource(SegmentColumnSource segmentColumnSource)
{
this.segmentColumnSource = requireNonNull(segmentColumnSource, "segmentColumnSource is null");
NullHandling.initializeForTests();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ShahimSharafudeen Could you please clarify why this line was removed, since it prevents the IllegalStateException that occurs when the code checks null-handling logic?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In Apache Druid 35.0.1, the old null-handling legacy behavior and associated configs/constants (often referred to as NullHandling, druid.generic.useDefaultValueForNull, etc.) have been deprecated and removed because the project has fully moved to ANSI SQL–compliant null semantics, and the legacy behavior is no longer supported.

}

@Override
public QueryableIndex loadIndex(List<ColumnHandle> columnHandles)
throws IOException
{
ByteBuffer indexBuffer = ByteBuffer.wrap(segmentColumnSource.getColumnData(INDEX_METADATA_FILE_NAME));
GenericIndexed.read(indexBuffer, STRING_STRATEGY);
GenericIndexed.read(indexBuffer, STRING_STRATEGY, null);
GenericIndexed<String> allDimensions = GenericIndexed.read(
indexBuffer,
STRING_STRATEGY);
STRING_STRATEGY,
null);

Interval dataInterval = Intervals.utc(indexBuffer.getLong(), indexBuffer.getLong());

Expand Down Expand Up @@ -114,8 +112,13 @@ public QueryableIndex loadIndex(List<ColumnHandle> columnHandles)
columns.put(TIME_COLUMN_NAME, () -> createColumnHolder(TIME_COLUMN_NAME));

Indexed<String> indexed = new ListIndexed<>(availableDimensions);
// TODO: get rid of the time column by creating Presto's SimpleQueryableIndex impl
return new SimpleQueryableIndex(
/*
* Druid 35.0.1 made SimpleQueryableIndex abstract, so created PrestoQueryableIndex
* based on the original implementation.
* TODO: Refactor PrestoQueryableIndex to remove the dependency on the __time column
* and implement a fully Presto-specific QueryableIndex.
*/
return new PrestoQueryableIndex(
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In Druid 35.0.1, the SimpleQueryableIndex class was changed from a concrete class to an abstract class. This introduced a breaking API change that prevents direct instantiation using the new keyword. Therefore, I created PrestoQueryableIndex, a custom concrete implementation of the QueryableIndex interface.

dataInterval,
indexed,
segmentBitmapSerdeFactory.getBitmapFactory(),
Expand All @@ -136,7 +139,7 @@ private ColumnHolder createColumnHolder(String columnName)
try {
ByteBuffer columnData = ByteBuffer.wrap(segmentColumnSource.getColumnData(columnName));
ColumnDescriptor columnDescriptor = readColumnDescriptor(columnData);
return columnDescriptor.read(columnData, ColumnConfig.DEFAULT, null);
return columnDescriptor.read(columnData, ColumnConfig.DEFAULT, null, null);
}
catch (IOException e) {
throw new PrestoException(DRUID_SEGMENT_LOAD_ERROR, e);
Expand Down
Loading