From b77d672f417350d1fd8b651dbf5112d1432470e9 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Fri, 7 Aug 2020 13:24:58 -0700 Subject: [PATCH 1/3] Fix struct comparison and add struct and partition set implementations. --- .../org/apache/iceberg/types/Comparators.java | 88 ++++++++ .../org/apache/iceberg/types/JavaHash.java | 41 ++++ .../org/apache/iceberg/types/JavaHashes.java | 107 ++++++++++ .../iceberg/util/CharSequenceWrapper.java | 8 +- .../org/apache/iceberg/PartitionsTable.java | 14 +- .../org/apache/iceberg/util/PartitionSet.java | 190 ++++++++++++++++++ .../apache/iceberg/util/StructLikeSet.java | 152 ++++++++++++++ .../iceberg/util/StructLikeWrapper.java | 50 ++++- .../actions/RewriteDataFilesAction.java | 2 +- 9 files changed, 634 insertions(+), 18 deletions(-) create mode 100644 api/src/main/java/org/apache/iceberg/types/JavaHash.java create mode 100644 api/src/main/java/org/apache/iceberg/types/JavaHashes.java create mode 100644 core/src/main/java/org/apache/iceberg/util/PartitionSet.java create mode 100644 core/src/main/java/org/apache/iceberg/util/StructLikeSet.java diff --git a/api/src/main/java/org/apache/iceberg/types/Comparators.java b/api/src/main/java/org/apache/iceberg/types/Comparators.java index 57ad1d476126..4e4e2769a60b 100644 --- a/api/src/main/java/org/apache/iceberg/types/Comparators.java +++ b/api/src/main/java/org/apache/iceberg/types/Comparators.java @@ -21,6 +21,10 @@ import java.nio.ByteBuffer; import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.function.IntFunction; +import org.apache.iceberg.StructLike; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.util.UnicodeUtil; @@ -44,6 +48,14 @@ private Comparators() {} .put(Types.BinaryType.get(), Comparators.unsignedBytes()) .build(); + public static Comparator forType(Types.StructType struct) { + return new StructLikeComparator(struct); + } + + public static Comparator> forType(Types.ListType list) { + return new ListComparator<>(list); + } + @SuppressWarnings("unchecked") public static Comparator forType(Type.PrimitiveType type) { Comparator cmp = COMPARATORS.get(type); @@ -58,6 +70,82 @@ public static Comparator forType(Type.PrimitiveType type) { throw new UnsupportedOperationException("Cannot determine comparator for type: " + type); } + @SuppressWarnings("unchecked") + private static Comparator internal(Type type) { + if (type.isPrimitiveType()) { + return forType(type.asPrimitiveType()); + } else if (type.isStructType()) { + return (Comparator) forType(type.asStructType()); + } else if (type.isListType()) { + return (Comparator) forType(type.asListType()); + } + + throw new UnsupportedOperationException("Cannot determine comparator for type: " + type); + } + + @SuppressWarnings("unchecked") + private static Class internalClass(Type type) { + if (type.isPrimitiveType()) { + return (Class) type.typeId().javaClass(); + } else if (type.isStructType()) { + return (Class) StructLike.class; + } else if (type.isListType()) { + return (Class) List.class; + } else if (type.isMapType()) { + return (Class) Map.class; + } + + throw new UnsupportedOperationException("Cannot determine expected class for type: " + type); + } + + private static class StructLikeComparator implements Comparator { + private final Comparator[] comparators; + private final Class[] classes; + + private StructLikeComparator(Types.StructType struct) { + this.comparators = struct.fields().stream() + .map(field -> internal(field.type())) + .toArray((IntFunction[]>) Comparator[]::new); + this.classes = struct.fields().stream() + .map(field -> internalClass(field.type())) + .toArray(Class[]::new); + } + + @Override + public int compare(StructLike o1, StructLike o2) { + for (int i = 0; i < comparators.length; i += 1) { + Class valueClass = classes[i]; + int cmp = comparators[i].compare(o1.get(i, valueClass), o2.get(i, valueClass)); + if (cmp != 0) { + return cmp; + } + } + + return 0; + } + } + + private static class ListComparator implements Comparator> { + private final Comparator elementComparator; + + private ListComparator(Types.ListType list) { + this.elementComparator = internal(list.elementType()); + } + + @Override + public int compare(List o1, List o2) { + int length = Math.min(o1.size(), o2.size()); + for (int i = 0; i < length; i += 1) { + int cmp = elementComparator.compare(o1.get(i), o2.get(i)); + if (cmp != 0) { + return cmp; + } + } + + return Integer.compare(o1.size(), o2.size()); + } + } + public static Comparator unsignedBytes() { return UnsignedByteBufComparator.INSTANCE; } diff --git a/api/src/main/java/org/apache/iceberg/types/JavaHash.java b/api/src/main/java/org/apache/iceberg/types/JavaHash.java new file mode 100644 index 000000000000..d64fc1c1e67c --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/types/JavaHash.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.types; + +import java.util.Objects; + +@FunctionalInterface +public interface JavaHash { + int hash(T value); + + @SuppressWarnings("unchecked") + static JavaHash forType(Type type) { + switch (type.typeId()) { + case STRING: + return (JavaHash) JavaHashes.strings(); + case STRUCT: + return (JavaHash) JavaHashes.struct(type.asStructType()); + case LIST: + return (JavaHash) JavaHashes.list(type.asListType()); + default: + return Objects::hashCode; + } + } +} diff --git a/api/src/main/java/org/apache/iceberg/types/JavaHashes.java b/api/src/main/java/org/apache/iceberg/types/JavaHashes.java new file mode 100644 index 000000000000..dcee336ae16a --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/types/JavaHashes.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.types; + +import java.util.List; +import java.util.Objects; +import java.util.function.IntFunction; +import org.apache.iceberg.StructLike; + +public class JavaHashes { + public static int hashCode(CharSequence str) { + int result = 177; + for (int i = 0; i < str.length(); i += 1) { + char ch = str.charAt(i); + result = 31 * result + (int) ch; + } + return result; + } + + static JavaHash strings() { + return CharSequenceHash.INSTANCE; + } + + static JavaHash struct(Types.StructType struct) { + return new StructLikeHash(struct); + } + + static JavaHash> list(Types.ListType list) { + return new ListHash(list); + } + + private static class CharSequenceHash implements JavaHash { + private static final CharSequenceHash INSTANCE = new CharSequenceHash(); + + private CharSequenceHash() { + } + + @Override + public int hash(CharSequence str) { + if (str == null) { + return 0; + } + + return JavaHashes.hashCode(str); + } + } + + private static class StructLikeHash implements JavaHash { + private final JavaHash[] hashes; + + private StructLikeHash(Types.StructType struct) { + this.hashes = struct.fields().stream() + .map(field -> + "unknown-partition".equals(field.doc()) ? + (JavaHash) Objects::hashCode : + JavaHash.forType(field.type())) + .toArray((IntFunction[]>) JavaHash[]::new); + } + + @Override + public int hash(StructLike struct) { + int result = 97; + int len = hashes.length; + result = 41 * result + len; + for (int i = 0; i < len; i += 1) { + result = 41 * result + hashes[i].hash(struct.get(i, Object.class)); + } + return result; + } + } + + private static class ListHash implements JavaHash> { + private final JavaHash elementHash; + + private ListHash(Types.ListType list) { + this.elementHash = JavaHash.forType(list.elementType()); + } + + @Override + public int hash(List list) { + int result = 17; + int len = list.size(); + result = 37 * result + len; + for (int i = 0; i < len; i += 1) { + result = 37 * result + elementHash.hash(list.get(i)); + } + return result; + } + } +} diff --git a/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java b/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java index 0ec95b812374..9405bbba5bef 100644 --- a/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java +++ b/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java @@ -21,6 +21,7 @@ import java.io.Serializable; import org.apache.iceberg.types.Comparators; +import org.apache.iceberg.types.JavaHashes; /** * Wrapper class to adapt CharSequence for use in maps and sets. @@ -59,12 +60,7 @@ public boolean equals(Object other) { @Override public int hashCode() { - int result = 177; - for (int i = 0; i < wrapped.length(); i += 1) { - char ch = wrapped.charAt(i); - result = 31 * result + (int) ch; - } - return result; + return JavaHashes.hashCode(wrapped); } @Override diff --git a/core/src/main/java/org/apache/iceberg/PartitionsTable.java b/core/src/main/java/org/apache/iceberg/PartitionsTable.java index 6cb1409675af..94b4e023bd16 100644 --- a/core/src/main/java/org/apache/iceberg/PartitionsTable.java +++ b/core/src/main/java/org/apache/iceberg/PartitionsTable.java @@ -75,7 +75,7 @@ private static StaticDataTask.Row convertPartition(Partition partition) { } private static Iterable partitions(Table table, Long snapshotId) { - PartitionSet partitions = new PartitionSet(); + PartitionMap partitions = new PartitionMap(table.spec().partitionType()); TableScan scan = table.newScan(); if (snapshotId != null) { @@ -95,15 +95,21 @@ private class PartitionsScan extends StaticTableScan { } } - static class PartitionSet { + static class PartitionMap { private final Map partitions = Maps.newHashMap(); - private final StructLikeWrapper reused = StructLikeWrapper.wrap(null); + private final Types.StructType type; + private final StructLikeWrapper reused; + + PartitionMap(Types.StructType type) { + this.type = type; + this.reused = StructLikeWrapper.forType(type); + } Partition get(StructLike key) { Partition partition = partitions.get(reused.set(key)); if (partition == null) { partition = new Partition(key); - partitions.put(StructLikeWrapper.wrap(key), partition); + partitions.put(StructLikeWrapper.forType(type).set(key), partition); } return partition; } diff --git a/core/src/main/java/org/apache/iceberg/util/PartitionSet.java b/core/src/main/java/org/apache/iceberg/util/PartitionSet.java new file mode 100644 index 000000000000..22b2ba011ac2 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/util/PartitionSet.java @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.util; + +import java.util.Collection; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.StructLike; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Iterables; +import org.apache.iceberg.relocated.com.google.common.collect.Iterators; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.types.Types; + +public class PartitionSet implements Set> { + public static PartitionSet create(Map specsById) { + return new PartitionSet(specsById); + } + + private final Map partitionTypeById; + private final Map> partitionSetById; + + private PartitionSet(Map specsById) { + ImmutableMap.Builder builder = ImmutableMap.builder(); + specsById.forEach((specId, spec) -> builder.put(specId, spec.partitionType())); + this.partitionTypeById = builder.build(); + this.partitionSetById = Maps.newHashMap(); + } + + @Override + public int size() { + return partitionSetById.values().stream().mapToInt(Set::size).sum(); + } + + @Override + public boolean isEmpty() { + return partitionSetById.values().stream().allMatch(Set::isEmpty); + } + + @Override + public boolean contains(Object o) { + if (o instanceof Pair) { + Object first = ((Pair) o).first(); + Object second = ((Pair) o).second(); + if (first instanceof Integer && second instanceof StructLike) { + return contains((Integer) first, (StructLike) second); + } + } + + return false; + } + + public boolean contains(int specId, StructLike struct) { + Set partitionSet = partitionSetById.get(specId); + if (partitionSet != null) { + return partitionSet.contains(struct); + } + + return false; + } + + @Override + public boolean add(Pair pair) { + Preconditions.checkArgument(pair.first() != null, "Cannot track partition with null spec id"); + return add(pair.first(), pair.second()); + } + + public boolean add(int specId, StructLike struct) { + Set partitionSet = partitionSetById.computeIfAbsent(specId, + id -> StructLikeSet.create(partitionTypeById.get(id))); + return partitionSet.add(struct); + } + + @Override + public boolean remove(Object o) { + if (o instanceof Pair) { + Object first = ((Pair) o).first(); + Object second = ((Pair) o).second(); + if (first instanceof Integer && second instanceof StructLike) { + return remove((Integer) first, (StructLike) second); + } + } + + return false; + } + + public boolean remove(int specId, StructLike struct) { + Set partitionSet = partitionSetById.get(specId); + if (partitionSet != null) { + return partitionSet.remove(struct); + } + + return false; + } + + @Override + public Iterator> iterator() { + Iterable>> setsAsPairs = Iterables.transform(partitionSetById.entrySet(), + idAndSet -> Iterables.transform(idAndSet.getValue(), struct -> Pair.of(idAndSet.getKey(), struct))); + + return Iterables.concat(setsAsPairs).iterator(); + } + + @Override + public Object[] toArray() { + return Iterators.toArray(iterator(), Pair.class); + } + + @Override + @SuppressWarnings("unchecked") + public T[] toArray(T[] destArray) { + int size = size(); + if (destArray.length < size) { + return (T[]) toArray(); + } + + Iterator> iter = iterator(); + int ind = 0; + while (iter.hasNext()) { + destArray[ind] = (T) iter.next(); + ind += 1; + } + + if (destArray.length > size) { + destArray[size] = null; + } + + return destArray; + } + + @Override + public boolean containsAll(Collection objects) { + if (objects != null) { + return Iterables.all(objects, this::contains); + } + return false; + } + + @Override + public boolean addAll(Collection> pairs) { + boolean changed = false; + if (pairs != null) { + for (Pair pair : pairs) { + changed |= add(pair); + } + } + return changed; + } + + @Override + public boolean retainAll(Collection c) { + throw new UnsupportedOperationException("retainAll is not supported"); + } + + @Override + public boolean removeAll(Collection objects) { + boolean changed = false; + if (objects != null) { + for (Object object : objects) { + changed |= remove(object); + } + } + return changed; + } + + @Override + public void clear() { + partitionSetById.clear(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/util/StructLikeSet.java b/core/src/main/java/org/apache/iceberg/util/StructLikeSet.java new file mode 100644 index 000000000000..f4e92399e4c9 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/util/StructLikeSet.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.util; + +import java.util.Collection; +import java.util.Iterator; +import java.util.Set; +import org.apache.iceberg.StructLike; +import org.apache.iceberg.relocated.com.google.common.collect.Iterables; +import org.apache.iceberg.relocated.com.google.common.collect.Iterators; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.apache.iceberg.types.Types; + +public class StructLikeSet implements Set { + public static StructLikeSet create(Types.StructType type) { + return new StructLikeSet(type); + } + + private final Types.StructType type; + private final Set wrapperSet; + private final ThreadLocal wrappers; + + private StructLikeSet(Types.StructType type) { + this.type = type; + this.wrapperSet = Sets.newHashSet(); + this.wrappers = ThreadLocal.withInitial(() -> StructLikeWrapper.forType(type)); + } + + @Override + public int size() { + return wrapperSet.size(); + } + + @Override + public boolean isEmpty() { + return wrapperSet.isEmpty(); + } + + @Override + public boolean contains(Object obj) { + if (obj instanceof StructLike) { + StructLikeWrapper wrapper = wrappers.get(); + boolean result = wrapperSet.contains(wrapper.set((StructLike) obj)); + wrapper.set(null); // don't hold a reference to the value + return result; + } + return false; + } + + @Override + public Iterator iterator() { + return Iterators.transform(wrapperSet.iterator(), StructLikeWrapper::get); + } + + @Override + public Object[] toArray() { + return Iterators.toArray(iterator(), StructLike.class); + } + + @Override + @SuppressWarnings("unchecked") + public T[] toArray(T[] destArray) { + int size = wrapperSet.size(); + if (destArray.length < size) { + return (T[]) toArray(); + } + + Iterator iter = iterator(); + int ind = 0; + while (iter.hasNext()) { + destArray[ind] = (T) iter.next(); + ind += 1; + } + + if (destArray.length > size) { + destArray[size] = null; + } + + return destArray; + } + + @Override + public boolean add(StructLike struct) { + return wrapperSet.add(StructLikeWrapper.forType(type).set(struct)); + } + + @Override + public boolean remove(Object obj) { + if (obj instanceof CharSequence) { + StructLikeWrapper wrapper = wrappers.get(); + boolean result = wrapperSet.remove(wrapper.set((StructLike) obj)); + wrapper.set(null); // don't hold a reference to the value + return result; + } + return false; + } + + @Override + public boolean containsAll(Collection objects) { + if (objects != null) { + return Iterables.all(objects, this::contains); + } + return false; + } + + @Override + public boolean addAll(Collection structs) { + if (structs != null) { + return Iterables.addAll(wrapperSet, + Iterables.transform(structs, struct -> StructLikeWrapper.forType(type).set(struct))); + } + return false; + } + + @Override + public boolean retainAll(Collection objects) { + throw new UnsupportedOperationException("retailAll is not supported"); + } + + @Override + public boolean removeAll(Collection objects) { + boolean changed = false; + if (objects != null) { + for (Object object : objects) { + changed |= remove(object); + } + } + return changed; + } + + @Override + public void clear() { + wrapperSet.clear(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/util/StructLikeWrapper.java b/core/src/main/java/org/apache/iceberg/util/StructLikeWrapper.java index 95015bc3f22c..809fc7bb0d69 100644 --- a/core/src/main/java/org/apache/iceberg/util/StructLikeWrapper.java +++ b/core/src/main/java/org/apache/iceberg/util/StructLikeWrapper.java @@ -19,8 +19,12 @@ package org.apache.iceberg.util; +import java.util.Comparator; import java.util.Objects; import org.apache.iceberg.StructLike; +import org.apache.iceberg.types.Comparators; +import org.apache.iceberg.types.JavaHash; +import org.apache.iceberg.types.Types; /** * Wrapper to adapt StructLike for use in maps and sets by implementing equals and hashCode. @@ -31,14 +35,34 @@ public static StructLikeWrapper wrap(StructLike struct) { return new StructLikeWrapper(struct); } + public static StructLikeWrapper forType(Types.StructType struct) { + return new StructLikeWrapper(struct); + } + + private final Comparator comparator; + private final JavaHash structHash; + private Integer hashCode; private StructLike struct; private StructLikeWrapper(StructLike struct) { - this.struct = struct; + this((Types.StructType) null); + set(struct); + } + + private StructLikeWrapper(Types.StructType type) { + if (type != null) { + this.comparator = Comparators.forType(type); + this.structHash = JavaHash.forType(type); + } else { + this.comparator = null; + this.structHash = null; + } + this.hashCode = null; } public StructLikeWrapper set(StructLike newStruct) { this.struct = newStruct; + this.hashCode = null; return this; } @@ -69,6 +93,10 @@ public boolean equals(Object other) { return false; } + if (comparator != null) { + return comparator.compare(this.struct, that.struct) == 0; + } + for (int i = 0; i < len; i += 1) { if (!Objects.equals(struct.get(i, Object.class), that.struct.get(i, Object.class))) { return false; @@ -80,12 +108,20 @@ public boolean equals(Object other) { @Override public int hashCode() { - int result = 97; - int len = struct.size(); - result = 41 * result + len; - for (int i = 0; i < len; i += 1) { - result = 41 * result + Objects.hashCode(struct.get(i, Object.class)); + if (hashCode == null) { + if (structHash != null) { + this.hashCode = structHash.hash(struct); + } else { + int result = 97; + int len = struct.size(); + result = 41 * result + len; + for (int i = 0; i < len; i += 1) { + result = 41 * result + Objects.hashCode(struct.get(i, Object.class)); + } + this.hashCode = result; + } } - return result; + + return hashCode; } } diff --git a/spark/src/main/java/org/apache/iceberg/actions/RewriteDataFilesAction.java b/spark/src/main/java/org/apache/iceberg/actions/RewriteDataFilesAction.java index 645dd0e67b24..e1d179a8ea8f 100644 --- a/spark/src/main/java/org/apache/iceberg/actions/RewriteDataFilesAction.java +++ b/spark/src/main/java/org/apache/iceberg/actions/RewriteDataFilesAction.java @@ -247,7 +247,7 @@ private Map> groupTasksByPartition( try { tasksIter.forEachRemaining(task -> { - StructLikeWrapper structLike = StructLikeWrapper.wrap(task.file().partition()); + StructLikeWrapper structLike = StructLikeWrapper.forType(spec.partitionType()).set(task.file().partition()); tasksGroupedByPartition.put(structLike, task); }); From 5b9f0479d2891bdee5ba9535c49a0ef1fdb017a3 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Fri, 7 Aug 2020 13:59:11 -0700 Subject: [PATCH 2/3] Fix checkstyle. --- api/src/main/java/org/apache/iceberg/types/JavaHashes.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/api/src/main/java/org/apache/iceberg/types/JavaHashes.java b/api/src/main/java/org/apache/iceberg/types/JavaHashes.java index dcee336ae16a..21abfae34d4a 100644 --- a/api/src/main/java/org/apache/iceberg/types/JavaHashes.java +++ b/api/src/main/java/org/apache/iceberg/types/JavaHashes.java @@ -25,6 +25,9 @@ import org.apache.iceberg.StructLike; public class JavaHashes { + private JavaHashes() { + } + public static int hashCode(CharSequence str) { int result = 177; for (int i = 0; i < str.length(); i += 1) { From 5307628bad69a2b8642e666f77fb5e8245d99369 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Mon, 10 Aug 2020 15:04:56 -0700 Subject: [PATCH 3/3] Remove work-around for unknown transforms. --- .../org/apache/iceberg/types/JavaHashes.java | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/types/JavaHashes.java b/api/src/main/java/org/apache/iceberg/types/JavaHashes.java index 21abfae34d4a..31e62dd93320 100644 --- a/api/src/main/java/org/apache/iceberg/types/JavaHashes.java +++ b/api/src/main/java/org/apache/iceberg/types/JavaHashes.java @@ -20,7 +20,6 @@ package org.apache.iceberg.types; import java.util.List; -import java.util.Objects; import java.util.function.IntFunction; import org.apache.iceberg.StructLike; @@ -37,7 +36,7 @@ public static int hashCode(CharSequence str) { return result; } - static JavaHash strings() { + static JavaHash strings() { return CharSequenceHash.INSTANCE; } @@ -49,19 +48,24 @@ static JavaHash> list(Types.ListType list) { return new ListHash(list); } - private static class CharSequenceHash implements JavaHash { + private static class CharSequenceHash implements JavaHash { private static final CharSequenceHash INSTANCE = new CharSequenceHash(); private CharSequenceHash() { } @Override - public int hash(CharSequence str) { - if (str == null) { - return 0; + public int hash(Object str) { + if (str instanceof CharSequence) { + return JavaHashes.hashCode((CharSequence) str); + } else if (str != null) { + // UnknownTransform results are assumed to be string, the most generic type. But there is no guarantee that the + // values actually are strings so this can receive non-string values to hash. To get a consistent hash code for + // those values, convert to string an hash the string. + return JavaHashes.hashCode(str.toString()); } - return JavaHashes.hashCode(str); + return 0; } } @@ -70,10 +74,8 @@ private static class StructLikeHash implements JavaHash { private StructLikeHash(Types.StructType struct) { this.hashes = struct.fields().stream() - .map(field -> - "unknown-partition".equals(field.doc()) ? - (JavaHash) Objects::hashCode : - JavaHash.forType(field.type())) + .map(Types.NestedField::type) + .map(JavaHash::forType) .toArray((IntFunction[]>) JavaHash[]::new); }