Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions api/src/main/java/org/apache/iceberg/types/Comparators.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@

import java.nio.ByteBuffer;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.function.IntFunction;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.util.UnicodeUtil;

Expand All @@ -44,6 +48,14 @@ private Comparators() {}
.put(Types.BinaryType.get(), Comparators.unsignedBytes())
.build();

public static Comparator<StructLike> forType(Types.StructType struct) {
return new StructLikeComparator(struct);
}

public static <T> Comparator<List<T>> forType(Types.ListType list) {
return new ListComparator<>(list);
}

@SuppressWarnings("unchecked")
public static <T> Comparator<T> forType(Type.PrimitiveType type) {
Comparator<?> cmp = COMPARATORS.get(type);
Expand All @@ -58,6 +70,82 @@ public static <T> Comparator<T> forType(Type.PrimitiveType type) {
throw new UnsupportedOperationException("Cannot determine comparator for type: " + type);
}

@SuppressWarnings("unchecked")
private static <T> Comparator<T> internal(Type type) {
if (type.isPrimitiveType()) {
return forType(type.asPrimitiveType());
} else if (type.isStructType()) {
return (Comparator<T>) forType(type.asStructType());
} else if (type.isListType()) {
return (Comparator<T>) forType(type.asListType());
}

throw new UnsupportedOperationException("Cannot determine comparator for type: " + type);
}

@SuppressWarnings("unchecked")
private static <T> Class<T> internalClass(Type type) {
if (type.isPrimitiveType()) {
return (Class<T>) type.typeId().javaClass();
} else if (type.isStructType()) {
return (Class<T>) StructLike.class;
} else if (type.isListType()) {
return (Class<T>) List.class;
} else if (type.isMapType()) {
return (Class<T>) Map.class;
}

throw new UnsupportedOperationException("Cannot determine expected class for type: " + type);
}

private static class StructLikeComparator implements Comparator<StructLike> {
private final Comparator<Object>[] comparators;
private final Class<?>[] classes;

private StructLikeComparator(Types.StructType struct) {
this.comparators = struct.fields().stream()
.map(field -> internal(field.type()))
.toArray((IntFunction<Comparator<Object>[]>) Comparator[]::new);
this.classes = struct.fields().stream()
.map(field -> internalClass(field.type()))
.toArray(Class<?>[]::new);
}

@Override
public int compare(StructLike o1, StructLike o2) {
for (int i = 0; i < comparators.length; i += 1) {
Class<?> valueClass = classes[i];
int cmp = comparators[i].compare(o1.get(i, valueClass), o2.get(i, valueClass));
if (cmp != 0) {
return cmp;
}
}

return 0;
}
}

private static class ListComparator<T> implements Comparator<List<T>> {
private final Comparator<T> elementComparator;

private ListComparator(Types.ListType list) {
this.elementComparator = internal(list.elementType());
}

@Override
public int compare(List<T> o1, List<T> o2) {
int length = Math.min(o1.size(), o2.size());
for (int i = 0; i < length; i += 1) {
int cmp = elementComparator.compare(o1.get(i), o2.get(i));
if (cmp != 0) {
return cmp;
}
}

return Integer.compare(o1.size(), o2.size());
}
}

public static Comparator<ByteBuffer> unsignedBytes() {
return UnsignedByteBufComparator.INSTANCE;
}
Expand Down
41 changes: 41 additions & 0 deletions api/src/main/java/org/apache/iceberg/types/JavaHash.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.types;

import java.util.Objects;

@FunctionalInterface
public interface JavaHash<T> {
int hash(T value);

@SuppressWarnings("unchecked")
static <T> JavaHash<T> forType(Type type) {
switch (type.typeId()) {
case STRING:
return (JavaHash<T>) JavaHashes.strings();
case STRUCT:
return (JavaHash<T>) JavaHashes.struct(type.asStructType());
case LIST:
return (JavaHash<T>) JavaHashes.list(type.asListType());
default:
return Objects::hashCode;
}
}
}
112 changes: 112 additions & 0 deletions api/src/main/java/org/apache/iceberg/types/JavaHashes.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.types;

import java.util.List;
import java.util.function.IntFunction;
import org.apache.iceberg.StructLike;

public class JavaHashes {
private JavaHashes() {
}

public static int hashCode(CharSequence str) {
int result = 177;
for (int i = 0; i < str.length(); i += 1) {
char ch = str.charAt(i);
result = 31 * result + (int) ch;
}
return result;
}

static JavaHash<Object> strings() {
return CharSequenceHash.INSTANCE;
}

static JavaHash<StructLike> struct(Types.StructType struct) {
return new StructLikeHash(struct);
}

static JavaHash<List<?>> list(Types.ListType list) {
return new ListHash(list);
}

private static class CharSequenceHash implements JavaHash<Object> {
private static final CharSequenceHash INSTANCE = new CharSequenceHash();

private CharSequenceHash() {
}

@Override
public int hash(Object str) {
if (str instanceof CharSequence) {
return JavaHashes.hashCode((CharSequence) str);
} else if (str != null) {
// UnknownTransform results are assumed to be string, the most generic type. But there is no guarantee that the
// values actually are strings so this can receive non-string values to hash. To get a consistent hash code for
// those values, convert to string an hash the string.
return JavaHashes.hashCode(str.toString());
}

return 0;
}
}

private static class StructLikeHash implements JavaHash<StructLike> {
private final JavaHash<Object>[] hashes;

private StructLikeHash(Types.StructType struct) {
this.hashes = struct.fields().stream()
.map(Types.NestedField::type)
.map(JavaHash::forType)
.toArray((IntFunction<JavaHash<Object>[]>) JavaHash[]::new);
}

@Override
public int hash(StructLike struct) {
int result = 97;
int len = hashes.length;
result = 41 * result + len;
for (int i = 0; i < len; i += 1) {
result = 41 * result + hashes[i].hash(struct.get(i, Object.class));
}
return result;
}
}

private static class ListHash implements JavaHash<List<?>> {
private final JavaHash<Object> elementHash;

private ListHash(Types.ListType list) {
this.elementHash = JavaHash.forType(list.elementType());
}

@Override
public int hash(List<?> list) {
int result = 17;
int len = list.size();
result = 37 * result + len;
for (int i = 0; i < len; i += 1) {
result = 37 * result + elementHash.hash(list.get(i));
}
return result;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import java.io.Serializable;
import org.apache.iceberg.types.Comparators;
import org.apache.iceberg.types.JavaHashes;

/**
* Wrapper class to adapt CharSequence for use in maps and sets.
Expand Down Expand Up @@ -59,12 +60,7 @@ public boolean equals(Object other) {

@Override
public int hashCode() {
int result = 177;
for (int i = 0; i < wrapped.length(); i += 1) {
char ch = wrapped.charAt(i);
result = 31 * result + (int) ch;
}
return result;
return JavaHashes.hashCode(wrapped);
}

@Override
Expand Down
14 changes: 10 additions & 4 deletions core/src/main/java/org/apache/iceberg/PartitionsTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ private static StaticDataTask.Row convertPartition(Partition partition) {
}

private static Iterable<Partition> partitions(Table table, Long snapshotId) {
PartitionSet partitions = new PartitionSet();
PartitionMap partitions = new PartitionMap(table.spec().partitionType());
TableScan scan = table.newScan();

if (snapshotId != null) {
Expand All @@ -95,15 +95,21 @@ private class PartitionsScan extends StaticTableScan {
}
}

static class PartitionSet {
static class PartitionMap {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was renamed to avoid a conflict with the new PartitionSet class, and updated to use StructLikeWrapper with correct equals/hashCode implementations.

private final Map<StructLikeWrapper, Partition> partitions = Maps.newHashMap();
private final StructLikeWrapper reused = StructLikeWrapper.wrap(null);
private final Types.StructType type;
private final StructLikeWrapper reused;

PartitionMap(Types.StructType type) {
this.type = type;
this.reused = StructLikeWrapper.forType(type);
}

Partition get(StructLike key) {
Partition partition = partitions.get(reused.set(key));
if (partition == null) {
partition = new Partition(key);
partitions.put(StructLikeWrapper.wrap(key), partition);
partitions.put(StructLikeWrapper.forType(type).set(key), partition);
}
return partition;
}
Expand Down
Loading