From 904fe658b2c6707ecea1844b9830e9949ca455a1 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Thu, 18 Sep 2025 15:37:07 +0800 Subject: [PATCH 01/15] Add geospatial bounding box class and implements intersects function --- .../iceberg/geospatial/BoundingBox.java | 155 +++++++ .../iceberg/geospatial/GeospatialBound.java | 328 +++++++++++++ .../GeospatialPredicateEvaluators.java | 128 ++++++ .../iceberg/geospatial/TestBoundingBox.java | 140 ++++++ .../geospatial/TestGeospatialBound.java | 228 ++++++++++ .../TestGeospatialPredicateEvaluators.java | 430 ++++++++++++++++++ 6 files changed, 1409 insertions(+) create mode 100644 api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java create mode 100644 api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java create mode 100644 api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java create mode 100644 api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java create mode 100644 api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialBound.java create mode 100644 api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java diff --git a/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java b/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java new file mode 100644 index 000000000000..0bdb36e4f465 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.geospatial; + +import java.io.Serializable; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Objects; + +/** + * Represents a geospatial bounding box composed of minimum and maximum bounds. + * + *

A bounding box (also called a Minimum Bounding Rectangle or MBR) is defined by two points: the + * minimum and maximum coordinates that define the box's corners. This provides a simple + * approximation of a more complex geometry for efficient filtering and data skipping. + */ +public class BoundingBox implements Serializable { + /** + * Create a {@link BoundingBox} object from buffers containing min and max bounds + * + * @param min the serialized minimum bound + * @param max the serialized maximum bound + * @return a BoundingBox instance + */ + public static BoundingBox fromByteBuffers(ByteBuffer min, ByteBuffer max) { + return new BoundingBox( + GeospatialBound.fromByteBuffer(min), GeospatialBound.fromByteBuffer(max)); + } + + /** + * Deserialize a byte buffer as a {@link BoundingBox} object + * + * @param buffer the serialized bounding box + * @return a BoundingBox instance + */ + public static BoundingBox fromByteBuffer(ByteBuffer buffer) { + int originalPosition = buffer.position(); + ByteOrder originalOrder = buffer.order(); + + try { + buffer.order(ByteOrder.LITTLE_ENDIAN); + + int minLen = buffer.getInt(); + ByteBuffer min = buffer.slice(); + min.limit(minLen); + buffer.position(buffer.position() + minLen); + + int maxLen = buffer.getInt(); + ByteBuffer max = buffer.slice(); + max.limit(maxLen); + + return fromByteBuffers(min, max); + } finally { + // Restore original position and byte order + buffer.position(originalPosition); + buffer.order(originalOrder); + } + } + + /** + * Create an empty bounding box + * + * @return an empty bounding box + */ + public static BoundingBox empty() { + return new BoundingBox( + GeospatialBound.createXY(Double.NaN, Double.NaN), + GeospatialBound.createXY(Double.NaN, Double.NaN)); + } + + public BoundingBox(GeospatialBound min, GeospatialBound max) { + this.min = min; + this.max = max; + } + + private final GeospatialBound min; + private final GeospatialBound max; + + /** + * Get the minimum corner of the bounding box. + * + * @return the minimum bound + */ + public GeospatialBound min() { + return min; + } + + /** + * Get the maximum corner of the bounding box. + * + * @return the maximum bound + */ + public GeospatialBound max() { + return max; + } + + /** + * Serializes this bounding box to a byte buffer. The serialized byte buffer could be deserialized + * using {@link #fromByteBuffer(ByteBuffer)}. + * + * @return a byte buffer containing the serialized bounding box + */ + public ByteBuffer toByteBuffer() { + ByteBuffer minBuffer = min.toByteBuffer(); + ByteBuffer maxBuffer = max.toByteBuffer(); + + int totalSize = Integer.BYTES + minBuffer.remaining() + Integer.BYTES + maxBuffer.remaining(); + ByteBuffer buffer = ByteBuffer.allocate(totalSize).order(ByteOrder.LITTLE_ENDIAN); + + buffer.putInt(minBuffer.remaining()); + buffer.put(minBuffer); + buffer.putInt(maxBuffer.remaining()); + buffer.put(maxBuffer); + buffer.flip(); + return buffer; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } else if (!(other instanceof BoundingBox)) { + return false; + } + + BoundingBox that = (BoundingBox) other; + return Objects.equals(min, that.min) && Objects.equals(max, that.max); + } + + @Override + public int hashCode() { + return Objects.hash(min, max); + } + + @Override + public String toString() { + return "BoundingBox{min={" + min.simpleString() + "}, max={" + max.simpleString() + "}}"; + } +} diff --git a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java new file mode 100644 index 000000000000..41d8e7290726 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java @@ -0,0 +1,328 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.geospatial; + +import java.io.Serializable; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Objects; + +/** + * Represents a geospatial bound (minimum or maximum) for Iceberg tables. + * + *

According to the Bound + * serialization section of Iceberg Table spec, geospatial bounds are serialized differently + * from the regular WKB representation. Geometry and geography bounds are single point encoded as a + * concatenation of 8-byte little-endian IEEE 754 coordinate values in the order X, Y, Z (optional), + * M (optional). + * + *

The encoding varies based on which coordinates are present: + * + *

+ * + *

This class represents a lower or upper geospatial bound and handles serialization and + * deserialization of these bounds to/from byte arrays, conforming to the Iceberg specification. + */ +public class GeospatialBound implements Serializable { + /** + * Parses a geospatial bound from a byte buffer according to Iceberg spec. + * + *

Based on the buffer size, this method determines which coordinates are present: - 16 bytes + * (2 doubles): x and y only - 24 bytes (3 doubles): x, y, and z - 32 bytes (4 doubles): x, y, z + * (might be NaN), and m + * + *

The ordinates are encoded as 8-byte little-endian IEEE 754 values. + * + * @param buffer the ByteBuffer containing the serialized geospatial bound + * @return a GeospatialBound object representing the parsed bound + * @throws IllegalArgumentException if the buffer has an invalid size + */ + public static GeospatialBound fromByteBuffer(ByteBuffer buffer) { + // Save original position and byte order to restore them later + int originalPosition = buffer.position(); + ByteOrder originalOrder = buffer.order(); + + try { + buffer.order(ByteOrder.LITTLE_ENDIAN); + int size = buffer.remaining(); + + if (size == 2 * Double.BYTES) { + // x:y format (2 doubles) + double coordX = buffer.getDouble(); + double coordY = buffer.getDouble(); + return createXY(coordX, coordY); + } else if (size == 3 * Double.BYTES) { + // x:y:z format (3 doubles) + double coordX = buffer.getDouble(); + double coordY = buffer.getDouble(); + double coordZ = buffer.getDouble(); + return createXYZ(coordX, coordY, coordZ); + } else if (size == 4 * Double.BYTES) { + // x:y:z:m format (4 doubles) - z might be NaN + double coordX = buffer.getDouble(); + double coordY = buffer.getDouble(); + double coordZ = buffer.getDouble(); + double coordM = buffer.getDouble(); + return new GeospatialBound(coordX, coordY, coordZ, coordM); + } else { + throw new IllegalArgumentException( + "Invalid buffer size for GeospatialBound: expected 16, 24, or 32 bytes, got " + size); + } + } finally { + // Restore original position and byte order + buffer.position(originalPosition); + buffer.order(originalOrder); + } + } + + /** + * Parses a geospatial bound from a byte array according to Iceberg spec. + * + * @param bytes the byte array containing the serialized geospatial bound + * @return a GeospatialBound object representing the parsed bound + * @throws IllegalArgumentException if the byte array has an invalid length + */ + public static GeospatialBound fromByteArray(byte[] bytes) { + int length = bytes.length; + if (length != 2 * Double.BYTES && length != 3 * Double.BYTES && length != 4 * Double.BYTES) { + throw new IllegalArgumentException( + "Invalid byte array length for GeospatialBound: expected 16, 24, or 32 bytes, got " + + length); + } + + return fromByteBuffer(ByteBuffer.wrap(bytes)); + } + + /** + * Creates a GeospatialBound with X and Y coordinates only. + * + * @param x the X coordinate (longitude/easting) + * @param y the Y coordinate (latitude/northing) + * @return a GeospatialBound with XY coordinates + */ + @SuppressWarnings("ParameterName") + public static GeospatialBound createXY(double x, double y) { + return new GeospatialBound(x, y, Double.NaN, Double.NaN); + } + + /** + * Creates a GeospatialBound with X, Y, and Z coordinates, with no M value. + * + * @param x the X coordinate (longitude/easting) + * @param y the Y coordinate (latitude/northing) + * @param z the Z coordinate (elevation) + * @return a GeospatialBound with XYZ coordinates + */ + @SuppressWarnings("ParameterName") + public static GeospatialBound createXYZ(double x, double y, double z) { + return new GeospatialBound(x, y, z, Double.NaN); + } + + /** + * Creates a GeospatialBound with X, Y, Z, and M coordinates. + * + * @param x the X coordinate (longitude/easting) + * @param y the Y coordinate (latitude/northing) + * @param z the Z coordinate (elevation) + * @param m the M value (measure) + * @return a GeospatialBound with XYZM coordinates + */ + @SuppressWarnings("ParameterName") + public static GeospatialBound createXYZM(double x, double y, double z, double m) { + return new GeospatialBound(x, y, z, m); + } + + /** + * Creates a GeospatialBound with X, Y, and M values, with no Z coordinate. + * + * @param x the X coordinate (longitude/easting) + * @param y the Y coordinate (latitude/northing) + * @param m the M value (measure) + * @return a GeospatialBound with XYM coordinates + */ + @SuppressWarnings("ParameterName") + public static GeospatialBound createXYM(double x, double y, double m) { + return new GeospatialBound(x, y, Double.NaN, m); + } + + @SuppressWarnings("MemberName") + private final double x; + + @SuppressWarnings("MemberName") + private final double y; + + @SuppressWarnings("MemberName") + private final double z; + + @SuppressWarnings("MemberName") + private final double m; + + /** Private constructor - use factory methods instead. */ + @SuppressWarnings("ParameterName") + private GeospatialBound(double x, double y, double z, double m) { + this.x = x; + this.y = y; + this.z = z; + this.m = m; + } + + /** + * Get the X coordinate (longitude/easting). + * + * @return X coordinate value + */ + @SuppressWarnings("MethodName") + public double x() { + return x; + } + + /** + * Get the Y coordinate (latitude/northing). + * + * @return Y coordinate value + */ + @SuppressWarnings("MethodName") + public double y() { + return y; + } + + /** + * Get the Z coordinate (typically elevation). + * + * @return Z coordinate value or NaN if not set + */ + @SuppressWarnings("MethodName") + public double z() { + return z; + } + + /** + * Get the M value (measure). + * + * @return M value or NaN if not set + */ + @SuppressWarnings("MethodName") + public double m() { + return m; + } + + /** + * Check if this bound has a defined Z coordinate. + * + * @return true if Z is not NaN + */ + public boolean hasZ() { + return !Double.isNaN(z); + } + + /** + * Check if this bound has a defined M value. + * + * @return true if M is not NaN + */ + public boolean hasM() { + return !Double.isNaN(m); + } + + /** + * Serializes this geospatial bound to a byte buffer according to Iceberg spec. + * + *

Following the Iceberg spec, the bound is serialized based on which coordinates are set: - + * x:y (2 doubles) when both z and m are unset - x:y:z (3 doubles) when only m is unset - + * x:y:NaN:m (4 doubles) when only z is unset - x:y:z:m (4 doubles) when all coordinates are set + * + * @return A ByteBuffer containing the serialized geospatial bound + */ + public ByteBuffer toByteBuffer() { + // Calculate size based on which coordinates are present + int size; + if (!hasZ() && !hasM()) { + // Just x and y + size = 2 * Double.BYTES; + } else if (hasZ() && !hasM()) { + // x, y, and z (no m) + size = 3 * Double.BYTES; + } else { + // x, y, z (or NaN), and m + size = 4 * Double.BYTES; + } + + ByteBuffer buffer = ByteBuffer.allocate(size).order(ByteOrder.LITTLE_ENDIAN); + buffer.putDouble(x); + buffer.putDouble(y); + + if (hasZ() || hasM()) { + // If we have z or m or both, we need to include z (could be NaN) + buffer.putDouble(z); + } + + if (hasM()) { + // If we have m, include it + buffer.putDouble(m); + } + + buffer.flip(); + return buffer; + } + + @Override + public String toString() { + return "GeospatialBound(" + simpleString() + ")"; + } + + public String simpleString() { + StringBuilder sb = new StringBuilder(); + sb.append("x=").append(x).append(", y=").append(y); + + if (hasZ()) { + sb.append(", z=").append(z); + } + + if (hasM()) { + sb.append(", m=").append(m); + } + + return sb.toString(); + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } else if (!(other instanceof GeospatialBound)) { + return false; + } + + GeospatialBound that = (GeospatialBound) other; + return Double.compare(that.x, x) == 0 + && Double.compare(that.y, y) == 0 + && Double.compare(that.z, z) == 0 + && Double.compare(that.m, m) == 0; + } + + @Override + public int hashCode() { + return Objects.hash(GeospatialBound.class, x, y, z, m); + } +} diff --git a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java new file mode 100644 index 000000000000..64943902445c --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.geospatial; + +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.types.Type; + +public class GeospatialPredicateEvaluators { + private GeospatialPredicateEvaluators() {} + + public interface GeospatialPredicateEvaluator { + /** + * Test whether this bounding box intersects with another. + * + * @param bbox1 the first bounding box + * @param bbox2 the second bounding box + * @return true if this box intersects the other box + */ + boolean intersects(BoundingBox bbox1, BoundingBox bbox2); + } + + public static GeospatialPredicateEvaluator create(Type type) { + switch (type.typeId()) { + case GEOMETRY: + return new GeometryEvaluator(); + case GEOGRAPHY: + return new GeographyEvaluator(); + default: + throw new UnsupportedOperationException("Unsupported type for BoundingBox: " + type); + } + } + + static class GeometryEvaluator implements GeospatialPredicateEvaluator { + @Override + public boolean intersects(BoundingBox bbox1, BoundingBox bbox2) { + return intersectsWithWrapAround(bbox1, bbox2); + } + + /** + * Check if two bounding boxes intersect, taking wrap-around into account. + * + *

Wraparound (or antimeridian crossing) occurs when a geography crosses the 180°/-180° + * longitude line on a map. In these cases, the minimum X value is greater than the maximum X + * value (xmin > xmax). This represents a bounding box that wraps around the globe. + * + *

For example, a bounding box with xmin=170° and xmax=-170° represents an area that spans + * from 170° east to 190° east (or equivalently, -170° west). This is important for geometries + * that cross the antimeridian, like a path from Japan to Alaska. + * + *

When xmin > xmax, a point matches if its X coordinate is either X ≥ xmin OR X ≤ xmax, + * rather than the usual X ≥ xmin AND X ≤ xmax. In geographic terms, if the westernmost + * longitude is greater than the easternmost longitude, this indicates an antimeridian crossing. + * + *

The Iceberg specification does not explicitly rule out the use of wrap-around in bounding + * boxes for geometry types, so we handle wrap-around for both geography and geometry bounding + * boxes. + * + * @param bbox1 the first bounding box + * @param bbox2 the second bounding box + * @return true if the bounding boxes intersect + */ + static boolean intersectsWithWrapAround(BoundingBox bbox1, BoundingBox bbox2) { + // Let's check y first, and if y does not intersect, we can return false + if (bbox1.min().y() > bbox2.max().y() || bbox1.max().y() < bbox2.min().y()) { + return false; + } + + // Now check x, need to take wrap-around into account + if (bbox1.min().x() <= bbox1.max().x() && bbox2.min().x() <= bbox2.max().x()) { + // No wrap-around + return bbox1.min().x() <= bbox2.max().x() && bbox1.max().x() >= bbox2.min().x(); + } else if (bbox1.min().x() > bbox1.max().x() && bbox2.min().x() <= bbox2.max().x()) { + // bbox1 wraps around the antimeridian, bbox2 does not + return bbox1.min().x() <= bbox2.max().x() || bbox1.max().x() >= bbox2.min().x(); + } else if (bbox1.min().x() <= bbox1.max().x() && bbox2.min().x() > bbox2.max().x()) { + // bbox2 wraps around the antimeridian, bbox1 does not + return intersectsWithWrapAround(bbox2, bbox1); + } else { + // Both wrap around the antimeridian, they must intersect + return true; + } + } + } + + static class GeographyEvaluator implements GeospatialPredicateEvaluator { + @Override + public boolean intersects(BoundingBox bbox1, BoundingBox bbox2) { + validateBoundingBox(bbox1); + validateBoundingBox(bbox2); + return GeometryEvaluator.intersectsWithWrapAround(bbox1, bbox2); + } + + /** + * For geography types, coordinates are restricted to the canonical ranges of [-180°, 180°] for + * longitude (X) and [-90°, 90°] for latitude (Y). + * + * @param bbox the bounding box to validate + * @throws IllegalArgumentException if the bounding box is invalid + */ + private void validateBoundingBox(BoundingBox bbox) { + Preconditions.checkArgument( + bbox.min().y() >= -90 && bbox.max().y() <= 90, "Latitude out of range: %s", bbox); + Preconditions.checkArgument( + bbox.min().x() >= -180 + && bbox.min().x() <= 180 + && bbox.max().x() >= -180 + && bbox.max().x() <= 180, + "Longitude out of range: %s", + bbox); + } + } +} diff --git a/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java b/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java new file mode 100644 index 000000000000..e54849844d45 --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.geospatial; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import org.junit.jupiter.api.Test; + +public class TestBoundingBox { + + @Test + public void testConstructorAndAccessors() { + GeospatialBound min = GeospatialBound.createXY(1.0, 2.0); + GeospatialBound max = GeospatialBound.createXY(3.0, 4.0); + + BoundingBox box = new BoundingBox(min, max); + + assertThat(box.min()).isEqualTo(min); + assertThat(box.max()).isEqualTo(max); + assertThat(box.min().x()).isEqualTo(1.0); + assertThat(box.min().y()).isEqualTo(2.0); + assertThat(box.max().x()).isEqualTo(3.0); + assertThat(box.max().y()).isEqualTo(4.0); + } + + @Test + public void testCreateFromByteBuffers() { + // Create byte buffers for XY bounds + ByteBuffer minBuffer = ByteBuffer.allocate(16); + minBuffer.order(ByteOrder.LITTLE_ENDIAN); + minBuffer.putDouble(0, 1.0); // x + minBuffer.putDouble(8, 2.0); // y + + ByteBuffer maxBuffer = ByteBuffer.allocate(16); + maxBuffer.order(ByteOrder.LITTLE_ENDIAN); + maxBuffer.putDouble(0, 3.0); // x + maxBuffer.putDouble(8, 4.0); // y + + BoundingBox box = BoundingBox.fromByteBuffers(minBuffer, maxBuffer); + + assertThat(box.min().x()).isEqualTo(1.0); + assertThat(box.min().y()).isEqualTo(2.0); + assertThat(box.max().x()).isEqualTo(3.0); + assertThat(box.max().y()).isEqualTo(4.0); + assertThat(minBuffer.order()).isEqualTo(ByteOrder.LITTLE_ENDIAN); + assertThat(maxBuffer.order()).isEqualTo(ByteOrder.LITTLE_ENDIAN); + } + + @Test + public void testCreateFromBigEndianByteBuffers() { + // Create byte buffers for XY bounds + ByteBuffer minBuffer = ByteBuffer.allocate(16); + minBuffer.order(ByteOrder.LITTLE_ENDIAN); + minBuffer.putDouble(0, 10.0); // x + minBuffer.putDouble(8, 20.0); // y + minBuffer.order(ByteOrder.BIG_ENDIAN); + + ByteBuffer maxBuffer = ByteBuffer.allocate(16); + maxBuffer.order(ByteOrder.LITTLE_ENDIAN); + maxBuffer.putDouble(0, 30.0); // x + maxBuffer.putDouble(8, 40.0); // y + maxBuffer.order(ByteOrder.BIG_ENDIAN); + + BoundingBox box = BoundingBox.fromByteBuffers(minBuffer, maxBuffer); + + assertThat(box.min().x()).isEqualTo(10.0); + assertThat(box.min().y()).isEqualTo(20.0); + assertThat(box.max().x()).isEqualTo(30.0); + assertThat(box.max().y()).isEqualTo(40.0); + assertThat(minBuffer.order()).isEqualTo(ByteOrder.BIG_ENDIAN); + assertThat(maxBuffer.order()).isEqualTo(ByteOrder.BIG_ENDIAN); + } + + @Test + public void testEqualsAndHashCode() { + GeospatialBound min1 = GeospatialBound.createXY(1.0, 2.0); + GeospatialBound max1 = GeospatialBound.createXY(3.0, 4.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + // Same values + GeospatialBound min2 = GeospatialBound.createXY(1.0, 2.0); + GeospatialBound max2 = GeospatialBound.createXY(3.0, 4.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + // Different values + GeospatialBound min3 = GeospatialBound.createXY(0.0, 0.0); + GeospatialBound max3 = GeospatialBound.createXY(10.0, 10.0); + BoundingBox box3 = new BoundingBox(min3, max3); + + // Test equals + assertThat(box1).isEqualTo(box2); + assertThat(box1).isNotEqualTo(box3); + assertThat(box1).isNotEqualTo(null); + assertThat(box1).isNotEqualTo("not a box"); + + // Test hashCode + assertThat(box1.hashCode()).isEqualTo(box2.hashCode()); + assertThat(box1.hashCode()).isNotEqualTo(box3.hashCode()); + } + + @Test + public void testToString() { + GeospatialBound min = GeospatialBound.createXY(1.0, 2.0); + GeospatialBound max = GeospatialBound.createXY(3.0, 4.0); + BoundingBox box = new BoundingBox(min, max); + assertThat(box.toString()).isEqualTo("BoundingBox{min={x=1.0, y=2.0}, max={x=3.0, y=4.0}}"); + } + + @Test + public void testRoundTripSerDe() { + GeospatialBound min = GeospatialBound.createXY(1.0, 2.0); + GeospatialBound max = GeospatialBound.createXY(3.0, 4.0); + BoundingBox original = new BoundingBox(min, max); + BoundingBox deserialized = roundTripSerDe(original); + assertThat(deserialized).isEqualTo(original); + } + + private BoundingBox roundTripSerDe(BoundingBox original) { + ByteBuffer buffer = original.toByteBuffer(); + return BoundingBox.fromByteBuffer(buffer); + } +} diff --git a/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialBound.java b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialBound.java new file mode 100644 index 000000000000..f4b3a7deebfa --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialBound.java @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.geospatial; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.nio.ByteBuffer; +import org.apache.iceberg.util.ByteBuffers; +import org.junit.jupiter.api.Test; + +public class TestGeospatialBound { + + @Test + public void testCreateXY() { + GeospatialBound bound = GeospatialBound.createXY(1.0, 2.0); + assertThat(bound.x()).isEqualTo(1.0); + assertThat(bound.y()).isEqualTo(2.0); + assertThat(bound.hasZ()).isFalse(); + assertThat(bound.hasM()).isFalse(); + assertThat(Double.isNaN(bound.z())).isTrue(); + assertThat(Double.isNaN(bound.m())).isTrue(); + } + + @Test + public void testCreateXYZ() { + GeospatialBound bound = GeospatialBound.createXYZ(1.0, 2.0, 3.0); + assertThat(bound.x()).isEqualTo(1.0); + assertThat(bound.y()).isEqualTo(2.0); + assertThat(bound.z()).isEqualTo(3.0); + assertThat(bound.hasZ()).isTrue(); + assertThat(bound.hasM()).isFalse(); + assertThat(Double.isNaN(bound.m())).isTrue(); + } + + @Test + public void testCreateXYM() { + GeospatialBound bound = GeospatialBound.createXYM(1.0, 2.0, 4.0); + assertThat(bound.x()).isEqualTo(1.0); + assertThat(bound.y()).isEqualTo(2.0); + assertThat(bound.m()).isEqualTo(4.0); + assertThat(bound.hasZ()).isFalse(); + assertThat(bound.hasM()).isTrue(); + assertThat(Double.isNaN(bound.z())).isTrue(); + } + + @Test + public void testCreateXYZM() { + GeospatialBound bound = GeospatialBound.createXYZM(1.0, 2.0, 3.0, 4.0); + assertThat(bound.x()).isEqualTo(1.0); + assertThat(bound.y()).isEqualTo(2.0); + assertThat(bound.z()).isEqualTo(3.0); + assertThat(bound.m()).isEqualTo(4.0); + assertThat(bound.hasZ()).isTrue(); + assertThat(bound.hasM()).isTrue(); + } + + @Test + public void testEqualsAndHashCode() { + GeospatialBound xy1 = GeospatialBound.createXY(1.0, 2.0); + GeospatialBound xy2 = GeospatialBound.createXY(1.0, 2.0); + GeospatialBound xy3 = GeospatialBound.createXY(2.0, 1.0); + assertThat(xy1).isEqualTo(xy2); + assertThat(xy1.hashCode()).isEqualTo(xy2.hashCode()); + assertThat(xy1).isNotEqualTo(xy3); + + GeospatialBound xyz1 = GeospatialBound.createXYZ(1.0, 2.0, 3.0); + GeospatialBound xyz2 = GeospatialBound.createXYZ(1.0, 2.0, 3.0); + GeospatialBound xyz3 = GeospatialBound.createXYZ(1.0, 2.0, 4.0); + assertThat(xyz1).isEqualTo(xyz2); + assertThat(xyz1.hashCode()).isEqualTo(xyz2.hashCode()); + assertThat(xyz1).isNotEqualTo(xyz3); + assertThat(xyz1).isNotEqualTo(xy1); + + GeospatialBound xym1 = GeospatialBound.createXYM(1.0, 2.0, 4.0); + GeospatialBound xym2 = GeospatialBound.createXYM(1.0, 2.0, 4.0); + GeospatialBound xym3 = GeospatialBound.createXYM(1.0, 2.0, 5.0); + assertThat(xym1).isEqualTo(xym2); + assertThat(xym1.hashCode()).isEqualTo(xym2.hashCode()); + assertThat(xym1).isNotEqualTo(xym3); + assertThat(xym1).isNotEqualTo(xy1); + + GeospatialBound xyzm1 = GeospatialBound.createXYZM(1.0, 2.0, 3.0, 4.0); + GeospatialBound xyzm2 = GeospatialBound.createXYZM(1.0, 2.0, 3.0, 4.0); + GeospatialBound xyzm3 = GeospatialBound.createXYZM(1.0, 2.0, 3.0, 5.0); + assertThat(xyzm1).isEqualTo(xyzm2); + assertThat(xyzm1.hashCode()).isEqualTo(xyzm2.hashCode()); + assertThat(xyzm1).isNotEqualTo(xyzm3); + assertThat(xyzm1).isNotEqualTo(xyz1); + } + + @Test + public void testToString() { + GeospatialBound xy = GeospatialBound.createXY(1.0, 2.0); + assertThat(xy.toString()).isEqualTo("GeospatialBound(x=1.0, y=2.0)"); + + GeospatialBound xyz = GeospatialBound.createXYZ(1.0, 2.0, 3.0); + assertThat(xyz.toString()).isEqualTo("GeospatialBound(x=1.0, y=2.0, z=3.0)"); + + GeospatialBound xym = GeospatialBound.createXYM(1.0, 2.0, 4.0); + assertThat(xym.toString()).isEqualTo("GeospatialBound(x=1.0, y=2.0, m=4.0)"); + + GeospatialBound xyzm = GeospatialBound.createXYZM(1.0, 2.0, 3.0, 4.0); + assertThat(xyzm.toString()).isEqualTo("GeospatialBound(x=1.0, y=2.0, z=3.0, m=4.0)"); + } + + @Test + public void testSimpleString() { + GeospatialBound xy = GeospatialBound.createXY(1.0, 2.0); + assertThat(xy.simpleString()).isEqualTo("x=1.0, y=2.0"); + + GeospatialBound xyz = GeospatialBound.createXYZ(1.0, 2.0, 3.0); + assertThat(xyz.simpleString()).isEqualTo("x=1.0, y=2.0, z=3.0"); + + GeospatialBound xym = GeospatialBound.createXYM(1.0, 2.0, 4.0); + assertThat(xym.simpleString()).isEqualTo("x=1.0, y=2.0, m=4.0"); + + GeospatialBound xyzm = GeospatialBound.createXYZM(1.0, 2.0, 3.0, 4.0); + assertThat(xyzm.simpleString()).isEqualTo("x=1.0, y=2.0, z=3.0, m=4.0"); + } + + @Test + public void testSerde() { + // Test XY format (16 bytes: x:y) + // These bytes represent x=10.0, y=13.0 + byte[] xyBytes = + new byte[] { + 0, 0, 0, 0, 0, 0, 36, 64, // 10.0 in little-endian IEEE 754 + 0, 0, 0, 0, 0, 0, 42, 64 // 13.0 in little-endian IEEE 754 + }; + GeospatialBound xy = GeospatialBound.fromByteArray(xyBytes); + assertThat(xy.x()).isEqualTo(10.0); + assertThat(xy.y()).isEqualTo(13.0); + assertThat(xy.hasZ()).isFalse(); + assertThat(xy.hasM()).isFalse(); + assertThat(ByteBuffers.toByteArray(xy.toByteBuffer())).isEqualTo(xyBytes); + + // Test XYZ format (24 bytes: x:y:z) + // These bytes represent x=10.0, y=13.0, z=15.0 + byte[] xyzBytes = + new byte[] { + 0, 0, 0, 0, 0, 0, 36, 64, // 10.0 in little-endian IEEE 754 + 0, 0, 0, 0, 0, 0, 42, 64, // 13.0 in little-endian IEEE 754 + 0, 0, 0, 0, 0, 0, 46, 64 // 15.0 in little-endian IEEE 754 + }; + GeospatialBound xyz = GeospatialBound.fromByteArray(xyzBytes); + assertThat(xyz.x()).isEqualTo(10.0); + assertThat(xyz.y()).isEqualTo(13.0); + assertThat(xyz.z()).isEqualTo(15.0); + assertThat(xyz.hasZ()).isTrue(); + assertThat(xyz.hasM()).isFalse(); + assertThat(ByteBuffers.toByteArray(xyz.toByteBuffer())).isEqualTo(xyzBytes); + // Test XYM format (32 bytes: x:y:NaN:m) + // These bytes represent x=10.0, y=13.0, z=NaN, m=20.0 + byte[] xymBytes = + new byte[] { + 0, 0, 0, 0, 0, 0, 36, 64, // 10.0 in little-endian IEEE 754 + 0, 0, 0, 0, 0, 0, 42, 64, // 13.0 in little-endian IEEE 754 + 0, 0, 0, 0, 0, 0, (byte) 248, 127, // NaN in little-endian IEEE 754 + 0, 0, 0, 0, 0, 0, 52, 64 // 20.0 in little-endian IEEE 754 + }; + GeospatialBound xym = GeospatialBound.fromByteArray(xymBytes); + assertThat(xym.x()).isEqualTo(10.0); + assertThat(xym.y()).isEqualTo(13.0); + assertThat(Double.isNaN(xym.z())).isTrue(); + assertThat(xym.m()).isEqualTo(20.0); + assertThat(xym.hasZ()).isFalse(); + assertThat(xym.hasM()).isTrue(); + assertThat(ByteBuffers.toByteArray(xym.toByteBuffer())).isEqualTo(xymBytes); + + // Test XYZM format (32 bytes: x:y:z:m) + // These bytes represent x=10.0, y=13.0, z=15.0, m=20.0 + byte[] xyzmBytes = + new byte[] { + 0, 0, 0, 0, 0, 0, 36, 64, // 10.0 in little-endian IEEE 754 + 0, 0, 0, 0, 0, 0, 42, 64, // 13.0 in little-endian IEEE 754 + 0, 0, 0, 0, 0, 0, 46, 64, // 15.0 in little-endian IEEE 754 + 0, 0, 0, 0, 0, 0, 52, 64 // 20.0 in little-endian IEEE 754 + }; + GeospatialBound xyzm = GeospatialBound.fromByteArray(xyzmBytes); + assertThat(xyzm.x()).isEqualTo(10.0); + assertThat(xyzm.y()).isEqualTo(13.0); + assertThat(xyzm.z()).isEqualTo(15.0); + assertThat(xyzm.m()).isEqualTo(20.0); + assertThat(xyzm.hasZ()).isTrue(); + assertThat(xyzm.hasM()).isTrue(); + assertThat(ByteBuffers.toByteArray(xyzm.toByteBuffer())).isEqualTo(xyzmBytes); + } + + private GeospatialBound roundTripSerDe(GeospatialBound original) { + ByteBuffer buffer = original.toByteBuffer(); + return GeospatialBound.fromByteBuffer(buffer); + } + + @Test + public void testRoundTripSerDe() { + // Test XY serialization + GeospatialBound xy = GeospatialBound.createXY(1.1, 2.2); + assertThat(roundTripSerDe(xy)).isEqualTo(xy); + + // Test XYZ serialization + GeospatialBound xyz = GeospatialBound.createXYZ(1.1, 2.2, 3.3); + assertThat(roundTripSerDe(xyz)).isEqualTo(xyz); + + // Test XYM serialization + GeospatialBound xym = GeospatialBound.createXYM(1.1, 2.2, 4.4); + assertThat(roundTripSerDe(xym)).isEqualTo(xym); + + // Test XYZM serialization + GeospatialBound xyzm = GeospatialBound.createXYZM(1.1, 2.2, 3.3, 4.4); + assertThat(roundTripSerDe(xyzm)).isEqualTo(xyzm); + } +} diff --git a/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java new file mode 100644 index 000000000000..e3c2abbd1f86 --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java @@ -0,0 +1,430 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.geospatial; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import org.apache.iceberg.types.EdgeAlgorithm; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +public class TestGeospatialPredicateEvaluators { + + @Test + public void testGeometryType() { + Type geometryType = Types.GeometryType.crs84(); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geometryType); + + assertThat(evaluator).isInstanceOf(GeospatialPredicateEvaluators.GeometryEvaluator.class); + } + + @Test + public void testOverlappingBoxesIntersect() { + Type geometryType = Types.GeometryType.crs84(); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geometryType); + + GeospatialBound min1 = GeospatialBound.createXY(0.0, 0.0); + GeospatialBound max1 = GeospatialBound.createXY(5.0, 5.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + GeospatialBound min2 = GeospatialBound.createXY(3.0, 3.0); + GeospatialBound max2 = GeospatialBound.createXY(8.0, 8.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + assertThat(evaluator.intersects(box1, box2)).isTrue(); + assertThat(evaluator.intersects(box2, box1)).isTrue(); + } + + @Test + public void testNonOverlappingBoxesDontIntersect() { + Type geometryType = Types.GeometryType.crs84(); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geometryType); + + GeospatialBound min1 = GeospatialBound.createXY(0.0, 0.0); + GeospatialBound max1 = GeospatialBound.createXY(2.0, 2.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + GeospatialBound min2 = GeospatialBound.createXY(3.0, 3.0); + GeospatialBound max2 = GeospatialBound.createXY(5.0, 5.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + assertThat(evaluator.intersects(box1, box2)).isFalse(); + assertThat(evaluator.intersects(box2, box1)).isFalse(); + } + + @Test + public void testBoxesTouchingAtCornerIntersect() { + Type geometryType = Types.GeometryType.crs84(); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geometryType); + + GeospatialBound min1 = GeospatialBound.createXY(0.0, 0.0); + GeospatialBound max1 = GeospatialBound.createXY(2.0, 2.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + GeospatialBound min2 = GeospatialBound.createXY(2.0, 2.0); + GeospatialBound max2 = GeospatialBound.createXY(4.0, 4.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + assertThat(evaluator.intersects(box1, box2)).isTrue(); + assertThat(evaluator.intersects(box2, box1)).isTrue(); + } + + @Test + public void testBoxesTouchingAtEdgeIntersect() { + Type geometryType = Types.GeometryType.crs84(); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geometryType); + + GeospatialBound min1 = GeospatialBound.createXY(0.0, 0.0); + GeospatialBound max1 = GeospatialBound.createXY(2.0, 2.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + GeospatialBound min2 = GeospatialBound.createXY(2.0, 0.0); + GeospatialBound max2 = GeospatialBound.createXY(4.0, 2.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + assertThat(evaluator.intersects(box1, box2)).isTrue(); + assertThat(evaluator.intersects(box2, box1)).isTrue(); + } + + @Test + public void testBoxContainedWithinAnotherIntersects() { + Type geometryType = Types.GeometryType.crs84(); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geometryType); + + GeospatialBound min1 = GeospatialBound.createXY(0.0, 0.0); + GeospatialBound max1 = GeospatialBound.createXY(10.0, 10.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + GeospatialBound min2 = GeospatialBound.createXY(2.0, 2.0); + GeospatialBound max2 = GeospatialBound.createXY(5.0, 5.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + assertThat(evaluator.intersects(box1, box2)).isTrue(); + assertThat(evaluator.intersects(box2, box1)).isTrue(); + } + + @Test + public void testBoxesWithZCoordinate() { + Type geometryType = Types.GeometryType.crs84(); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geometryType); + + // Two boxes with Z coordinates that overlap in X and Y but not in Z + // Note: The current implementation only checks X and Y coordinates + GeospatialBound min1 = GeospatialBound.createXYZ(0.0, 0.0, 0.0); + GeospatialBound max1 = GeospatialBound.createXYZ(2.0, 2.0, 1.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + GeospatialBound min2 = GeospatialBound.createXYZ(1.0, 1.0, 2.0); + GeospatialBound max2 = GeospatialBound.createXYZ(3.0, 3.0, 3.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + // They should intersect because the current implementation only checks X and Y + assertThat(evaluator.intersects(box1, box2)).isTrue(); + assertThat(evaluator.intersects(box2, box1)).isTrue(); + } + + @Test + public void testBoxesWithMCoordinate() { + Type geometryType = Types.GeometryType.crs84(); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geometryType); + + // Two boxes with M coordinates that overlap in X and Y but not in M + // Note: The current implementation only checks X and Y coordinates + GeospatialBound min1 = GeospatialBound.createXYM(0.0, 0.0, 0.0); + GeospatialBound max1 = GeospatialBound.createXYM(2.0, 2.0, 1.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + GeospatialBound min2 = GeospatialBound.createXYM(1.0, 1.0, 2.0); + GeospatialBound max2 = GeospatialBound.createXYM(3.0, 3.0, 3.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + // They should intersect because the current implementation only checks X and Y + assertThat(evaluator.intersects(box1, box2)).isTrue(); + assertThat(evaluator.intersects(box2, box1)).isTrue(); + } + + @Test + public void testGeometryWrapAroundOnA() { + Type geometryType = Types.GeometryType.crs84(); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geometryType); + + // First box wraps around antimeridian (min.x > max.x), second doesn't + GeospatialBound min1 = GeospatialBound.createXY(170.0, 0.0); + GeospatialBound max1 = GeospatialBound.createXY(-170.0, 10.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + // Box that overlaps with the part after the wrap around + GeospatialBound min2 = GeospatialBound.createXY(-175.0, 5.0); + GeospatialBound max2 = GeospatialBound.createXY(-160.0, 15.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + assertThat(evaluator.intersects(box1, box2)).isTrue(); + assertThat(evaluator.intersects(box2, box1)).isTrue(); + + // Box that overlaps with the part before the wrap around + GeospatialBound min3 = GeospatialBound.createXY(160.0, 5.0); + GeospatialBound max3 = GeospatialBound.createXY(175.0, 15.0); + BoundingBox box3 = new BoundingBox(min3, max3); + + assertThat(evaluator.intersects(box1, box3)).isTrue(); + assertThat(evaluator.intersects(box3, box1)).isTrue(); + + // Box that doesn't overlap with either part + GeospatialBound min4 = GeospatialBound.createXY(-150.0, 20.0); + GeospatialBound max4 = GeospatialBound.createXY(-140.0, 30.0); + BoundingBox box4 = new BoundingBox(min4, max4); + + assertThat(evaluator.intersects(box1, box4)).isFalse(); + assertThat(evaluator.intersects(box4, box1)).isFalse(); + } + + @Test + public void testGeometryWrapAroundOnB() { + Type geometryType = Types.GeometryType.crs84(); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geometryType); + + // First box doesn't wrap around, second does (min.x > max.x) + GeospatialBound min1 = GeospatialBound.createXY(-175.0, 5.0); + GeospatialBound max1 = GeospatialBound.createXY(-160.0, 15.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + GeospatialBound min2 = GeospatialBound.createXY(170.0, 0.0); + GeospatialBound max2 = GeospatialBound.createXY(-170.0, 10.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + assertThat(evaluator.intersects(box1, box2)).isTrue(); + assertThat(evaluator.intersects(box2, box1)).isTrue(); + } + + @Test + public void testBothGeometryWrappingAround() { + Type geometryType = Types.GeometryType.crs84(); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geometryType); + + // Both boxes wrap around (min.x > max.x) + GeospatialBound min1 = GeospatialBound.createXY(170.0, 0.0); + GeospatialBound max1 = GeospatialBound.createXY(-170.0, 10.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + GeospatialBound min2 = GeospatialBound.createXY(160.0, 5.0); + GeospatialBound max2 = GeospatialBound.createXY(-160.0, 15.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + // When both wrap around, they must intersect + assertThat(evaluator.intersects(box1, box2)).isTrue(); + assertThat(evaluator.intersects(box2, box1)).isTrue(); + } + + @Test + public void testBasicGeographyCases() { + Type geographyType = Types.GeographyType.of("srid:4326", EdgeAlgorithm.SPHERICAL); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geographyType); + + // Two overlapping boxes + GeospatialBound min1 = GeospatialBound.createXY(0.0, 0.0); + GeospatialBound max1 = GeospatialBound.createXY(10.0, 10.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + GeospatialBound min2 = GeospatialBound.createXY(5.0, 5.0); + GeospatialBound max2 = GeospatialBound.createXY(15.0, 15.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + assertThat(evaluator.intersects(box1, box2)).isTrue(); + assertThat(evaluator.intersects(box2, box1)).isTrue(); + + // Non-overlapping boxes + GeospatialBound min3 = GeospatialBound.createXY(20.0, 20.0); + GeospatialBound max3 = GeospatialBound.createXY(30.0, 30.0); + BoundingBox box3 = new BoundingBox(min3, max3); + + assertThat(evaluator.intersects(box1, box3)).isFalse(); + assertThat(evaluator.intersects(box3, box1)).isFalse(); + + // Boxes at extreme valid latitudes + GeospatialBound min4 = GeospatialBound.createXY(-10.0, -90.0); + GeospatialBound max4 = GeospatialBound.createXY(10.0, -80.0); + BoundingBox box4 = new BoundingBox(min4, max4); + + GeospatialBound min5 = GeospatialBound.createXY(-5.0, 80.0); + GeospatialBound max5 = GeospatialBound.createXY(15.0, 90.0); + BoundingBox box5 = new BoundingBox(min5, max5); + + assertThat(evaluator.intersects(box4, box5)).isFalse(); + assertThat(evaluator.intersects(box5, box4)).isFalse(); + } + + @Test + public void testGeographyWrapAround() { + Type geographyType = Types.GeographyType.of("srid:4326", EdgeAlgorithm.SPHERICAL); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geographyType); + + // Box that wraps around the antimeridian + GeospatialBound min1 = GeospatialBound.createXY(170.0, 0.0); + GeospatialBound max1 = GeospatialBound.createXY(-170.0, 10.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + // Box that overlaps with the part after the wrap around + GeospatialBound min2 = GeospatialBound.createXY(-175.0, 5.0); + GeospatialBound max2 = GeospatialBound.createXY(-160.0, 15.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + assertThat(evaluator.intersects(box1, box2)).isTrue(); + assertThat(evaluator.intersects(box2, box1)).isTrue(); + } + + @Test + public void testInvalidGeographyLatitude() { + Type geographyType = Types.GeographyType.of("srid:4326", EdgeAlgorithm.SPHERICAL); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geographyType); + + // Box with latitude below -90 + GeospatialBound min1 = GeospatialBound.createXY(0.0, -91.0); + GeospatialBound max1 = GeospatialBound.createXY(10.0, 0.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + // Box with latitude above 90 + GeospatialBound min2 = GeospatialBound.createXY(0.0, 0.0); + GeospatialBound max2 = GeospatialBound.createXY(10.0, 91.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + GeospatialBound validMin = GeospatialBound.createXY(0.0, 0.0); + GeospatialBound validMax = GeospatialBound.createXY(10.0, 10.0); + BoundingBox validBox = new BoundingBox(validMin, validMax); + + assertThatThrownBy(() -> evaluator.intersects(box1, validBox)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Latitude out of range"); + + assertThatThrownBy(() -> evaluator.intersects(validBox, box1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Latitude out of range"); + + assertThatThrownBy(() -> evaluator.intersects(box2, validBox)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Latitude out of range"); + + assertThatThrownBy(() -> evaluator.intersects(validBox, box2)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Latitude out of range"); + } + + @Test + public void testInvalidGeographyLongitude() { + Type geographyType = Types.GeographyType.of("srid:4326", EdgeAlgorithm.SPHERICAL); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geographyType); + + // Box with longitude below -180 + GeospatialBound min1 = GeospatialBound.createXY(-181.0, 0.0); + GeospatialBound max1 = GeospatialBound.createXY(0.0, 10.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + // Box with longitude above 180 + GeospatialBound min2 = GeospatialBound.createXY(0.0, 0.0); + GeospatialBound max2 = GeospatialBound.createXY(181.0, 10.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + GeospatialBound validMin = GeospatialBound.createXY(0.0, 0.0); + GeospatialBound validMax = GeospatialBound.createXY(10.0, 10.0); + BoundingBox validBox = new BoundingBox(validMin, validMax); + + assertThatThrownBy(() -> evaluator.intersects(box1, validBox)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Longitude out of range"); + + assertThatThrownBy(() -> evaluator.intersects(validBox, box1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Longitude out of range"); + + assertThatThrownBy(() -> evaluator.intersects(box2, validBox)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Longitude out of range"); + + assertThatThrownBy(() -> evaluator.intersects(validBox, box2)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Longitude out of range"); + } + + @Test + public void testExtremeLongitudeBoundaries() { + // Tests valid boxes at the extreme boundaries of longitude + Type geographyType = Types.GeographyType.of("srid:4326", EdgeAlgorithm.SPHERICAL); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geographyType); + + // Box at -180 longitude + GeospatialBound min1 = GeospatialBound.createXY(-180.0, 0.0); + GeospatialBound max1 = GeospatialBound.createXY(-170.0, 10.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + // Box at 180 longitude + GeospatialBound min2 = GeospatialBound.createXY(170.0, 0.0); + GeospatialBound max2 = GeospatialBound.createXY(180.0, 10.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + // These boxes should not intersect + assertThat(evaluator.intersects(box1, box2)).isFalse(); + assertThat(evaluator.intersects(box2, box1)).isFalse(); + + // Box that wraps around the antimeridian, touching -180 and 180 + GeospatialBound min3 = GeospatialBound.createXY(180.0, 0.0); + GeospatialBound max3 = GeospatialBound.createXY(-180.0, 10.0); + BoundingBox box3 = new BoundingBox(min3, max3); + + // This should intersect with both boxes at the extreme edges + assertThat(evaluator.intersects(box1, box3)).isTrue(); + assertThat(evaluator.intersects(box3, box1)).isTrue(); + assertThat(evaluator.intersects(box2, box3)).isTrue(); + assertThat(evaluator.intersects(box3, box2)).isTrue(); + } + + @Test + public void testSphericalGeographyType() { + Type geographyType = Types.GeographyType.of("srid:4326", EdgeAlgorithm.SPHERICAL); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geographyType); + + assertThat(evaluator).isInstanceOf(GeospatialPredicateEvaluators.GeographyEvaluator.class); + } + + @Test + public void testUnsupportedType() { + Type stringType = Types.StringType.get(); + + assertThatThrownBy(() -> GeospatialPredicateEvaluators.create(stringType)) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageContaining("Unsupported type for BoundingBox"); + } +} From 0f5ca1b5db93ffc1573e754c93ffc18b6b1024f3 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Thu, 18 Sep 2025 19:07:21 +0800 Subject: [PATCH 02/15] Fix remaining review comments in https://github.com/apache/iceberg/pull/12346 --- api/src/main/java/org/apache/iceberg/types/Types.java | 8 ++++---- .../test/java/org/apache/iceberg/types/TestTypes.java | 9 ++++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java b/api/src/main/java/org/apache/iceberg/types/Types.java index 1c16c444d4e6..ec6076b04fa0 100644 --- a/api/src/main/java/org/apache/iceberg/types/Types.java +++ b/api/src/main/java/org/apache/iceberg/types/Types.java @@ -81,7 +81,6 @@ public static Type fromTypeName(String typeString) { Matcher geometry = GEOMETRY_PARAMETERS.matcher(typeString); if (geometry.matches()) { String crs = geometry.group(1); - Preconditions.checkArgument(!crs.contains(","), "Invalid CRS: %s", crs); return GeometryType.of(crs); } @@ -599,7 +598,7 @@ public TypeID typeId() { } public String crs() { - return crs; + return crs != null ? crs : DEFAULT_CRS; } @Override @@ -631,6 +630,7 @@ public String toString() { public static class GeographyType extends PrimitiveType { public static final String DEFAULT_CRS = "OGC:CRS84"; + public static final EdgeAlgorithm DEFAULT_ALGORITHM = EdgeAlgorithm.SPHERICAL; public static GeographyType crs84() { return new GeographyType(); @@ -664,11 +664,11 @@ public TypeID typeId() { } public String crs() { - return crs; + return crs != null ? crs : DEFAULT_CRS; } public EdgeAlgorithm algorithm() { - return algorithm; + return algorithm != null ? algorithm : DEFAULT_ALGORITHM; } @Override diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypes.java b/api/src/test/java/org/apache/iceberg/types/TestTypes.java index cc8d3586b862..fa5ed4304d3c 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestTypes.java @@ -98,6 +98,8 @@ public void fromPrimitiveString() { assertThat(Types.fromPrimitiveString("geometry")).isEqualTo(Types.GeometryType.crs84()); assertThat(Types.fromPrimitiveString("Geometry")).isEqualTo(Types.GeometryType.crs84()); + assertThat(((Types.GeometryType) Types.fromPrimitiveString("geometry")).crs()) + .isEqualTo(Types.GeometryType.DEFAULT_CRS); assertThat(Types.fromPrimitiveString("geometry(srid:3857)")) .isEqualTo(Types.GeometryType.of("srid:3857")); assertThat(Types.fromPrimitiveString("geometry( srid:3857 )")) @@ -113,12 +115,13 @@ public void fromPrimitiveString() { assertThatExceptionOfType(IllegalArgumentException.class) .isThrownBy(() -> Types.fromPrimitiveString("geometry( )")) .withMessageContaining("Invalid CRS: (empty string)"); - assertThatExceptionOfType(IllegalArgumentException.class) - .isThrownBy(() -> Types.fromPrimitiveString("geometry(srid:123,456)")) - .withMessageContaining("Invalid CRS: srid:123,456"); assertThat(Types.fromPrimitiveString("geography")).isEqualTo(Types.GeographyType.crs84()); assertThat(Types.fromPrimitiveString("Geography")).isEqualTo(Types.GeographyType.crs84()); + assertThat(((Types.GeographyType) Types.fromPrimitiveString("geography")).crs()) + .isEqualTo(Types.GeographyType.DEFAULT_CRS); + assertThat(((Types.GeographyType) Types.fromPrimitiveString("geography")).algorithm()) + .isEqualTo(Types.GeographyType.DEFAULT_ALGORITHM); assertThat(Types.fromPrimitiveString("geography(srid:4269)")) .isEqualTo(Types.GeographyType.of("srid:4269")); assertThat(Types.fromPrimitiveString("geography(srid: 4269)")) From 6bfe46c67a0cfe7621d5a655c3aa51fdcfa829a5 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Thu, 18 Sep 2025 20:31:29 +0800 Subject: [PATCH 03/15] BoundingBox does not need to be serializable --- .../main/java/org/apache/iceberg/geospatial/BoundingBox.java | 3 +-- .../java/org/apache/iceberg/geospatial/GeospatialBound.java | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java b/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java index 0bdb36e4f465..80e191524a8f 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java @@ -18,7 +18,6 @@ */ package org.apache.iceberg.geospatial; -import java.io.Serializable; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.Objects; @@ -30,7 +29,7 @@ * minimum and maximum coordinates that define the box's corners. This provides a simple * approximation of a more complex geometry for efficient filtering and data skipping. */ -public class BoundingBox implements Serializable { +public class BoundingBox { /** * Create a {@link BoundingBox} object from buffers containing min and max bounds * diff --git a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java index 41d8e7290726..18ef9cbb4a31 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java @@ -18,7 +18,6 @@ */ package org.apache.iceberg.geospatial; -import java.io.Serializable; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.Objects; @@ -44,7 +43,7 @@ *

This class represents a lower or upper geospatial bound and handles serialization and * deserialization of these bounds to/from byte arrays, conforming to the Iceberg specification. */ -public class GeospatialBound implements Serializable { +public class GeospatialBound { /** * Parses a geospatial bound from a byte buffer according to Iceberg spec. * From 33e610ed7d84dd7aff885bb07d31eb0d2611f380 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Mon, 22 Sep 2025 12:02:24 +0800 Subject: [PATCH 04/15] Fix review comments --- .../iceberg/geospatial/BoundingBox.java | 45 ++++-------- .../iceberg/geospatial/GeospatialBound.java | 68 ++++++++----------- .../GeospatialPredicateEvaluators.java | 47 ++++++++++--- .../iceberg/geospatial/TestBoundingBox.java | 8 +-- 4 files changed, 79 insertions(+), 89 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java b/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java index 80e191524a8f..b823ff1c8f1e 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java @@ -21,6 +21,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.Objects; +import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; /** * Represents a geospatial bounding box composed of minimum and maximum bounds. @@ -49,38 +50,18 @@ public static BoundingBox fromByteBuffers(ByteBuffer min, ByteBuffer max) { * @return a BoundingBox instance */ public static BoundingBox fromByteBuffer(ByteBuffer buffer) { - int originalPosition = buffer.position(); - ByteOrder originalOrder = buffer.order(); - - try { - buffer.order(ByteOrder.LITTLE_ENDIAN); - - int minLen = buffer.getInt(); - ByteBuffer min = buffer.slice(); - min.limit(minLen); - buffer.position(buffer.position() + minLen); - - int maxLen = buffer.getInt(); - ByteBuffer max = buffer.slice(); - max.limit(maxLen); - - return fromByteBuffers(min, max); - } finally { - // Restore original position and byte order - buffer.position(originalPosition); - buffer.order(originalOrder); - } - } + buffer.order(ByteOrder.LITTLE_ENDIAN); - /** - * Create an empty bounding box - * - * @return an empty bounding box - */ - public static BoundingBox empty() { - return new BoundingBox( - GeospatialBound.createXY(Double.NaN, Double.NaN), - GeospatialBound.createXY(Double.NaN, Double.NaN)); + int minLen = buffer.getInt(); + ByteBuffer min = buffer.slice(); + min.limit(minLen); + buffer.position(buffer.position() + minLen); + + int maxLen = buffer.getInt(); + ByteBuffer max = buffer.slice(); + max.limit(maxLen); + + return fromByteBuffers(min, max); } public BoundingBox(GeospatialBound min, GeospatialBound max) { @@ -149,6 +130,6 @@ public int hashCode() { @Override public String toString() { - return "BoundingBox{min={" + min.simpleString() + "}, max={" + max.simpleString() + "}}"; + return MoreObjects.toStringHelper(BoundingBox.class).add("min", min).add("max", max).toString(); } } diff --git a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java index 18ef9cbb4a31..3772487476c0 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java @@ -21,6 +21,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.Objects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; /** * Represents a geospatial bound (minimum or maximum) for Iceberg tables. @@ -59,39 +60,31 @@ public class GeospatialBound { */ public static GeospatialBound fromByteBuffer(ByteBuffer buffer) { // Save original position and byte order to restore them later - int originalPosition = buffer.position(); - ByteOrder originalOrder = buffer.order(); - - try { - buffer.order(ByteOrder.LITTLE_ENDIAN); - int size = buffer.remaining(); - - if (size == 2 * Double.BYTES) { - // x:y format (2 doubles) - double coordX = buffer.getDouble(); - double coordY = buffer.getDouble(); - return createXY(coordX, coordY); - } else if (size == 3 * Double.BYTES) { - // x:y:z format (3 doubles) - double coordX = buffer.getDouble(); - double coordY = buffer.getDouble(); - double coordZ = buffer.getDouble(); - return createXYZ(coordX, coordY, coordZ); - } else if (size == 4 * Double.BYTES) { - // x:y:z:m format (4 doubles) - z might be NaN - double coordX = buffer.getDouble(); - double coordY = buffer.getDouble(); - double coordZ = buffer.getDouble(); - double coordM = buffer.getDouble(); - return new GeospatialBound(coordX, coordY, coordZ, coordM); - } else { - throw new IllegalArgumentException( - "Invalid buffer size for GeospatialBound: expected 16, 24, or 32 bytes, got " + size); - } - } finally { - // Restore original position and byte order - buffer.position(originalPosition); - buffer.order(originalOrder); + buffer.order(ByteOrder.LITTLE_ENDIAN); + int size = buffer.remaining(); + Preconditions.checkArgument( + size == 2 * Double.BYTES || size == 3 * Double.BYTES || size == 4 * Double.BYTES, + "Invalid geo spatial bound buffer size: %s. Valid sizes are 16, 24, or 32 bytes.", + size); + + if (size == 2 * Double.BYTES) { + // x:y format (2 doubles) + double coordX = buffer.getDouble(); + double coordY = buffer.getDouble(); + return createXY(coordX, coordY); + } else if (size == 3 * Double.BYTES) { + // x:y:z format (3 doubles) + double coordX = buffer.getDouble(); + double coordY = buffer.getDouble(); + double coordZ = buffer.getDouble(); + return createXYZ(coordX, coordY, coordZ); + } else { + // x:y:z:m format (4 doubles) - z might be NaN + double coordX = buffer.getDouble(); + double coordY = buffer.getDouble(); + double coordZ = buffer.getDouble(); + double coordM = buffer.getDouble(); + return new GeospatialBound(coordX, coordY, coordZ, coordM); } } @@ -103,13 +96,6 @@ public static GeospatialBound fromByteBuffer(ByteBuffer buffer) { * @throws IllegalArgumentException if the byte array has an invalid length */ public static GeospatialBound fromByteArray(byte[] bytes) { - int length = bytes.length; - if (length != 2 * Double.BYTES && length != 3 * Double.BYTES && length != 4 * Double.BYTES) { - throw new IllegalArgumentException( - "Invalid byte array length for GeospatialBound: expected 16, 24, or 32 bytes, got " - + length); - } - return fromByteBuffer(ByteBuffer.wrap(bytes)); } @@ -322,6 +308,6 @@ public boolean equals(Object other) { @Override public int hashCode() { - return Objects.hash(GeospatialBound.class, x, y, z, m); + return Objects.hash(x, y, z, m); } } diff --git a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java index 64943902445c..e14712734e83 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java @@ -20,13 +20,14 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; public class GeospatialPredicateEvaluators { private GeospatialPredicateEvaluators() {} public interface GeospatialPredicateEvaluator { /** - * Test whether this bounding box intersects with another. + * Determines whether the two bounding boxes intersect. * * @param bbox1 the first bounding box * @param bbox2 the second bounding box @@ -35,18 +36,42 @@ public interface GeospatialPredicateEvaluator { boolean intersects(BoundingBox bbox1, BoundingBox bbox2); } + /** + * Create an evaluator for evaluating bounding box relationship for the given geospatial type. + * + * @param type the geospatial type, should be one of Type.TypeID.GEOMETRY or Type.TypeID.GEOGRAPHY + * @return the evaluator + */ public static GeospatialPredicateEvaluator create(Type type) { switch (type.typeId()) { case GEOMETRY: - return new GeometryEvaluator(); + return create((Types.GeometryType) type); case GEOGRAPHY: - return new GeographyEvaluator(); + return create((Types.GeographyType) type); default: throw new UnsupportedOperationException("Unsupported type for BoundingBox: " + type); } } - static class GeometryEvaluator implements GeospatialPredicateEvaluator { + /** + * Create an evaluator for evaluating bounding box relationship for planar geometries + * + * @return the evaluator + */ + public static GeometryEvaluator create(Types.GeometryType type) { + return new GeometryEvaluator(); + } + + /** + * Create an evaluator for evaluating bounding box relationship for geographies + * + * @return the evaluator + */ + public static GeographyEvaluator create(Types.GeographyType type) { + return new GeographyEvaluator(); + } + + public static class GeometryEvaluator implements GeospatialPredicateEvaluator { @Override public boolean intersects(BoundingBox bbox1, BoundingBox bbox2) { return intersectsWithWrapAround(bbox1, bbox2); @@ -90,7 +115,7 @@ static boolean intersectsWithWrapAround(BoundingBox bbox1, BoundingBox bbox2) { return bbox1.min().x() <= bbox2.max().x() || bbox1.max().x() >= bbox2.min().x(); } else if (bbox1.min().x() <= bbox1.max().x() && bbox2.min().x() > bbox2.max().x()) { // bbox2 wraps around the antimeridian, bbox1 does not - return intersectsWithWrapAround(bbox2, bbox1); + return bbox2.min().x() <= bbox1.max().x() || bbox2.max().x() >= bbox1.min().x(); } else { // Both wrap around the antimeridian, they must intersect return true; @@ -98,7 +123,7 @@ static boolean intersectsWithWrapAround(BoundingBox bbox1, BoundingBox bbox2) { } } - static class GeographyEvaluator implements GeospatialPredicateEvaluator { + public static class GeographyEvaluator implements GeospatialPredicateEvaluator { @Override public boolean intersects(BoundingBox bbox1, BoundingBox bbox2) { validateBoundingBox(bbox1); @@ -115,12 +140,12 @@ public boolean intersects(BoundingBox bbox1, BoundingBox bbox2) { */ private void validateBoundingBox(BoundingBox bbox) { Preconditions.checkArgument( - bbox.min().y() >= -90 && bbox.max().y() <= 90, "Latitude out of range: %s", bbox); + bbox.min().y() >= -90.0d && bbox.max().y() <= 90.0d, "Latitude out of range: %s", bbox); Preconditions.checkArgument( - bbox.min().x() >= -180 - && bbox.min().x() <= 180 - && bbox.max().x() >= -180 - && bbox.max().x() <= 180, + bbox.min().x() >= -180.0d + && bbox.min().x() <= 180.0d + && bbox.max().x() >= -180.0d + && bbox.max().x() <= 180.0d, "Longitude out of range: %s", bbox); } diff --git a/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java b/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java index e54849844d45..23e9ee0968e7 100644 --- a/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java +++ b/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java @@ -60,8 +60,6 @@ public void testCreateFromByteBuffers() { assertThat(box.min().y()).isEqualTo(2.0); assertThat(box.max().x()).isEqualTo(3.0); assertThat(box.max().y()).isEqualTo(4.0); - assertThat(minBuffer.order()).isEqualTo(ByteOrder.LITTLE_ENDIAN); - assertThat(maxBuffer.order()).isEqualTo(ByteOrder.LITTLE_ENDIAN); } @Test @@ -85,8 +83,6 @@ public void testCreateFromBigEndianByteBuffers() { assertThat(box.min().y()).isEqualTo(20.0); assertThat(box.max().x()).isEqualTo(30.0); assertThat(box.max().y()).isEqualTo(40.0); - assertThat(minBuffer.order()).isEqualTo(ByteOrder.BIG_ENDIAN); - assertThat(maxBuffer.order()).isEqualTo(ByteOrder.BIG_ENDIAN); } @Test @@ -121,7 +117,9 @@ public void testToString() { GeospatialBound min = GeospatialBound.createXY(1.0, 2.0); GeospatialBound max = GeospatialBound.createXY(3.0, 4.0); BoundingBox box = new BoundingBox(min, max); - assertThat(box.toString()).isEqualTo("BoundingBox{min={x=1.0, y=2.0}, max={x=3.0, y=4.0}}"); + assertThat(box.toString()) + .isEqualTo( + "BoundingBox{min=GeospatialBound(x=1.0, y=2.0), max=GeospatialBound(x=3.0, y=4.0)}"); } @Test From 41336838f774c83a2e91fa8031756bb2d7fdea56 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Mon, 22 Sep 2025 14:59:44 +0800 Subject: [PATCH 05/15] Handle Z and M dimensions when checking for bounding box intersects --- .../GeospatialPredicateEvaluators.java | 75 ++++++++-- .../TestGeospatialPredicateEvaluators.java | 128 ++++++++++++++++-- 2 files changed, 181 insertions(+), 22 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java index e14712734e83..8b6e2cf223d4 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java @@ -101,24 +101,71 @@ public boolean intersects(BoundingBox bbox1, BoundingBox bbox2) { * @return true if the bounding boxes intersect */ static boolean intersectsWithWrapAround(BoundingBox bbox1, BoundingBox bbox2) { - // Let's check y first, and if y does not intersect, we can return false - if (bbox1.min().y() > bbox2.max().y() || bbox1.max().y() < bbox2.min().y()) { + // Check Z dimension (elevation) if both boxes have Z coordinates - no wrap-around + if (bbox1.min().hasZ() && bbox1.max().hasZ() && bbox2.min().hasZ() && bbox2.max().hasZ()) { + if (!intersects(bbox1.min().z(), bbox1.max().z(), bbox2.min().z(), bbox2.max().z())) { + return false; + } + } + + // Check M dimension (measure) if both boxes have M coordinates - no wrap-around + if (bbox1.min().hasM() && bbox1.max().hasM() && bbox2.min().hasM() && bbox2.max().hasM()) { + if (!intersects(bbox1.min().m(), bbox1.max().m(), bbox2.min().m(), bbox2.max().m())) { + return false; + } + } + + // Check Y dimension (latitude/northing) - no wrap-around + if (!intersects(bbox1.min().y(), bbox1.max().y(), bbox2.min().y(), bbox2.max().y())) { return false; } - // Now check x, need to take wrap-around into account - if (bbox1.min().x() <= bbox1.max().x() && bbox2.min().x() <= bbox2.max().x()) { - // No wrap-around - return bbox1.min().x() <= bbox2.max().x() && bbox1.max().x() >= bbox2.min().x(); - } else if (bbox1.min().x() > bbox1.max().x() && bbox2.min().x() <= bbox2.max().x()) { - // bbox1 wraps around the antimeridian, bbox2 does not - return bbox1.min().x() <= bbox2.max().x() || bbox1.max().x() >= bbox2.min().x(); - } else if (bbox1.min().x() <= bbox1.max().x() && bbox2.min().x() > bbox2.max().x()) { - // bbox2 wraps around the antimeridian, bbox1 does not - return bbox2.min().x() <= bbox1.max().x() || bbox2.max().x() >= bbox1.min().x(); - } else { - // Both wrap around the antimeridian, they must intersect + // Check X dimension (longitude/easting) - with wrap-around support + return intersectsWithWrapAround( + bbox1.min().x(), bbox1.max().x(), bbox2.min().x(), bbox2.max().x()); + } + + /** + * Check if two intervals intersect using regular interval logic. Two intervals [min1, max1] and + * [min2, max2] intersect if min1 <= max2 AND max1 >= min2. + * + * @param min1 minimum of first interval + * @param max1 maximum of first interval + * @param min2 minimum of second interval + * @param max2 maximum of second interval + * @return true if the intervals intersect + */ + private static boolean intersects(double min1, double max1, double min2, double max2) { + return min1 <= max2 && max1 >= min2; + } + + /** + * Check if two intervals intersect with wrap-around support for longitude/X dimension. Handles + * antimeridian crossing where min > max indicates wrapping around the globe. + * + * @param min1 minimum of first interval (may be > max1 if wrapping) + * @param max1 maximum of first interval (may be < min1 if wrapping) + * @param min2 minimum of second interval (may be > max2 if wrapping) + * @param max2 maximum of second interval (may be < min2 if wrapping) + * @return true if the intervals intersect + */ + private static boolean intersectsWithWrapAround( + double min1, double max1, double min2, double max2) { + boolean interval1WrapsAround = min1 > max1; + boolean interval2WrapsAround = min2 > max2; + + if (!interval1WrapsAround && !interval2WrapsAround) { + // No wrap-around in either interval - use regular intersection + return intersects(min1, max1, min2, max2); + } else if (interval1WrapsAround && interval2WrapsAround) { + // Both intervals wrap around - they must intersect somewhere return true; + } else if (interval1WrapsAround) { + // interval1 wraps around, interval2 does not + return min1 <= max2 || max1 >= min2; + } else { + // interval2 wraps around, interval1 does not + return min2 <= max1 || max2 >= min1; } } } diff --git a/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java index e3c2abbd1f86..226587ac7b15 100644 --- a/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java +++ b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java @@ -134,7 +134,6 @@ public void testBoxesWithZCoordinate() { GeospatialPredicateEvaluators.create(geometryType); // Two boxes with Z coordinates that overlap in X and Y but not in Z - // Note: The current implementation only checks X and Y coordinates GeospatialBound min1 = GeospatialBound.createXYZ(0.0, 0.0, 0.0); GeospatialBound max1 = GeospatialBound.createXYZ(2.0, 2.0, 1.0); BoundingBox box1 = new BoundingBox(min1, max1); @@ -143,9 +142,16 @@ public void testBoxesWithZCoordinate() { GeospatialBound max2 = GeospatialBound.createXYZ(3.0, 3.0, 3.0); BoundingBox box2 = new BoundingBox(min2, max2); - // They should intersect because the current implementation only checks X and Y - assertThat(evaluator.intersects(box1, box2)).isTrue(); - assertThat(evaluator.intersects(box2, box1)).isTrue(); + GeospatialBound min3 = GeospatialBound.createXYZ(1.0, 1.0, 1.0); + GeospatialBound max3 = GeospatialBound.createXYZ(3.0, 3.0, 3.0); + BoundingBox box3 = new BoundingBox(min3, max3); + + assertThat(evaluator.intersects(box1, box2)).isFalse(); + assertThat(evaluator.intersects(box2, box1)).isFalse(); + assertThat(evaluator.intersects(box1, box3)).isTrue(); + assertThat(evaluator.intersects(box3, box1)).isTrue(); + assertThat(evaluator.intersects(box2, box3)).isTrue(); + assertThat(evaluator.intersects(box3, box2)).isTrue(); } @Test @@ -155,7 +161,6 @@ public void testBoxesWithMCoordinate() { GeospatialPredicateEvaluators.create(geometryType); // Two boxes with M coordinates that overlap in X and Y but not in M - // Note: The current implementation only checks X and Y coordinates GeospatialBound min1 = GeospatialBound.createXYM(0.0, 0.0, 0.0); GeospatialBound max1 = GeospatialBound.createXYM(2.0, 2.0, 1.0); BoundingBox box1 = new BoundingBox(min1, max1); @@ -164,9 +169,16 @@ public void testBoxesWithMCoordinate() { GeospatialBound max2 = GeospatialBound.createXYM(3.0, 3.0, 3.0); BoundingBox box2 = new BoundingBox(min2, max2); - // They should intersect because the current implementation only checks X and Y - assertThat(evaluator.intersects(box1, box2)).isTrue(); - assertThat(evaluator.intersects(box2, box1)).isTrue(); + GeospatialBound min3 = GeospatialBound.createXYM(1.0, 1.0, 1.0); + GeospatialBound max3 = GeospatialBound.createXYM(3.0, 3.0, 3.0); + BoundingBox box3 = new BoundingBox(min3, max3); + + assertThat(evaluator.intersects(box1, box2)).isFalse(); + assertThat(evaluator.intersects(box2, box1)).isFalse(); + assertThat(evaluator.intersects(box1, box3)).isTrue(); + assertThat(evaluator.intersects(box3, box1)).isTrue(); + assertThat(evaluator.intersects(box2, box3)).isTrue(); + assertThat(evaluator.intersects(box3, box2)).isTrue(); } @Test @@ -410,6 +422,106 @@ public void testExtremeLongitudeBoundaries() { assertThat(evaluator.intersects(box3, box2)).isTrue(); } + @Test + public void testBoxesWithXYZMCoordinates() { + Type geometryType = Types.GeometryType.crs84(); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geometryType); + + // Two boxes with all XYZM coordinates that overlap in X, Y, Z but not in M + GeospatialBound min1 = GeospatialBound.createXYZM(0.0, 0.0, 0.0, 0.0); + GeospatialBound max1 = GeospatialBound.createXYZM(2.0, 2.0, 2.0, 1.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + GeospatialBound min2 = GeospatialBound.createXYZM(1.0, 1.0, 1.0, 2.0); + GeospatialBound max2 = GeospatialBound.createXYZM(3.0, 3.0, 3.0, 3.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + // They should NOT intersect because M dimensions don't overlap + assertThat(evaluator.intersects(box1, box2)).isFalse(); + assertThat(evaluator.intersects(box2, box1)).isFalse(); + } + + @Test + public void testBoxesWithXYZMCoordinatesIntersecting() { + Type geometryType = Types.GeometryType.crs84(); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geometryType); + + // Two boxes with all XYZM coordinates that overlap in all dimensions + GeospatialBound min1 = GeospatialBound.createXYZM(0.0, 0.0, 0.0, 0.0); + GeospatialBound max1 = GeospatialBound.createXYZM(2.0, 2.0, 2.0, 2.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + GeospatialBound min2 = GeospatialBound.createXYZM(1.0, 1.0, 1.0, 1.0); + GeospatialBound max2 = GeospatialBound.createXYZM(3.0, 3.0, 3.0, 3.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + // They should intersect because all dimensions overlap + assertThat(evaluator.intersects(box1, box2)).isTrue(); + assertThat(evaluator.intersects(box2, box1)).isTrue(); + } + + @Test + public void testMixedDimensionsXYvsXYZ() { + Type geometryType = Types.GeometryType.crs84(); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geometryType); + + // One box with XY coordinates, another with XYZ coordinates + GeospatialBound min1 = GeospatialBound.createXY(0.0, 0.0); + GeospatialBound max1 = GeospatialBound.createXY(2.0, 2.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + GeospatialBound min2 = GeospatialBound.createXYZ(1.0, 1.0, 100.0); + GeospatialBound max2 = GeospatialBound.createXYZ(3.0, 3.0, 200.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + // They should intersect because Z dimension is ignored when not present in both + assertThat(evaluator.intersects(box1, box2)).isTrue(); + assertThat(evaluator.intersects(box2, box1)).isTrue(); + } + + @Test + public void testMixedDimensionsXYvsXYM() { + Type geometryType = Types.GeometryType.crs84(); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geometryType); + + // One box with XY coordinates, another with XYM coordinates + GeospatialBound min1 = GeospatialBound.createXY(0.0, 0.0); + GeospatialBound max1 = GeospatialBound.createXY(2.0, 2.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + GeospatialBound min2 = GeospatialBound.createXYM(1.0, 1.0, 100.0); + GeospatialBound max2 = GeospatialBound.createXYM(3.0, 3.0, 200.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + // They should intersect because M dimension is ignored when not present in both + assertThat(evaluator.intersects(box1, box2)).isTrue(); + assertThat(evaluator.intersects(box2, box1)).isTrue(); + } + + @Test + public void testMixedDimensionsXYZvsXYM() { + Type geometryType = Types.GeometryType.crs84(); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geometryType); + + // One box with XYZ coordinates, another with XYM coordinates + GeospatialBound min1 = GeospatialBound.createXYZ(0.0, 0.0, 0.0); + GeospatialBound max1 = GeospatialBound.createXYZ(2.0, 2.0, 2.0); + BoundingBox box1 = new BoundingBox(min1, max1); + + GeospatialBound min2 = GeospatialBound.createXYM(1.0, 1.0, 100.0); + GeospatialBound max2 = GeospatialBound.createXYM(3.0, 3.0, 200.0); + BoundingBox box2 = new BoundingBox(min2, max2); + + // They should intersect because both Z and M dimensions are ignored when not present in both + assertThat(evaluator.intersects(box1, box2)).isTrue(); + assertThat(evaluator.intersects(box2, box1)).isTrue(); + } + @Test public void testSphericalGeographyType() { Type geographyType = Types.GeographyType.of("srid:4326", EdgeAlgorithm.SPHERICAL); From e7770b41ac921f318bd3436d5b8ed11bb40dac73 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Tue, 23 Sep 2025 13:00:48 +0800 Subject: [PATCH 06/15] Fix toByteBuffer and fromByteBuffer --- .../iceberg/geospatial/BoundingBox.java | 49 ++++++----- .../iceberg/geospatial/GeospatialBound.java | 87 +++++++++---------- .../iceberg/geospatial/TestBoundingBox.java | 24 ----- 3 files changed, 68 insertions(+), 92 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java b/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java index b823ff1c8f1e..a8056e424f60 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java @@ -22,6 +22,7 @@ import java.nio.ByteOrder; import java.util.Objects; import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; /** * Represents a geospatial bounding box composed of minimum and maximum bounds. @@ -50,20 +51,41 @@ public static BoundingBox fromByteBuffers(ByteBuffer min, ByteBuffer max) { * @return a BoundingBox instance */ public static BoundingBox fromByteBuffer(ByteBuffer buffer) { - buffer.order(ByteOrder.LITTLE_ENDIAN); + Preconditions.checkArgument(buffer.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); int minLen = buffer.getInt(); - ByteBuffer min = buffer.slice(); + ByteBuffer min = buffer.slice().order(ByteOrder.LITTLE_ENDIAN); min.limit(minLen); buffer.position(buffer.position() + minLen); int maxLen = buffer.getInt(); - ByteBuffer max = buffer.slice(); + ByteBuffer max = buffer.slice().order(ByteOrder.LITTLE_ENDIAN); max.limit(maxLen); return fromByteBuffers(min, max); } + /** + * Serializes this bounding box to a byte buffer. The serialized byte buffer could be deserialized + * using {@link #fromByteBuffer(ByteBuffer)}. + * + * @return a byte buffer containing the serialized bounding box + */ + public ByteBuffer toByteBuffer() { + ByteBuffer minBuffer = min.toByteBuffer(); + ByteBuffer maxBuffer = max.toByteBuffer(); + + int totalSize = Integer.BYTES + minBuffer.remaining() + Integer.BYTES + maxBuffer.remaining(); + ByteBuffer buffer = ByteBuffer.allocate(totalSize).order(ByteOrder.LITTLE_ENDIAN); + + buffer.putInt(minBuffer.remaining()); + buffer.put(minBuffer); + buffer.putInt(maxBuffer.remaining()); + buffer.put(maxBuffer); + buffer.flip(); + return buffer; + } + public BoundingBox(GeospatialBound min, GeospatialBound max) { this.min = min; this.max = max; @@ -90,27 +112,6 @@ public GeospatialBound max() { return max; } - /** - * Serializes this bounding box to a byte buffer. The serialized byte buffer could be deserialized - * using {@link #fromByteBuffer(ByteBuffer)}. - * - * @return a byte buffer containing the serialized bounding box - */ - public ByteBuffer toByteBuffer() { - ByteBuffer minBuffer = min.toByteBuffer(); - ByteBuffer maxBuffer = max.toByteBuffer(); - - int totalSize = Integer.BYTES + minBuffer.remaining() + Integer.BYTES + maxBuffer.remaining(); - ByteBuffer buffer = ByteBuffer.allocate(totalSize).order(ByteOrder.LITTLE_ENDIAN); - - buffer.putInt(minBuffer.remaining()); - buffer.put(minBuffer); - buffer.putInt(maxBuffer.remaining()); - buffer.put(maxBuffer); - buffer.flip(); - return buffer; - } - @Override public boolean equals(Object other) { if (this == other) { diff --git a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java index 3772487476c0..febd158db2f2 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java @@ -59,8 +59,7 @@ public class GeospatialBound { * @throws IllegalArgumentException if the buffer has an invalid size */ public static GeospatialBound fromByteBuffer(ByteBuffer buffer) { - // Save original position and byte order to restore them later - buffer.order(ByteOrder.LITTLE_ENDIAN); + Preconditions.checkArgument(buffer.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); int size = buffer.remaining(); Preconditions.checkArgument( size == 2 * Double.BYTES || size == 3 * Double.BYTES || size == 4 * Double.BYTES, @@ -88,6 +87,47 @@ public static GeospatialBound fromByteBuffer(ByteBuffer buffer) { } } + /** + * Serializes this geospatial bound to a byte buffer according to Iceberg spec. + * + *

Following the Iceberg spec, the bound is serialized based on which coordinates are set: - + * x:y (2 doubles) when both z and m are unset - x:y:z (3 doubles) when only m is unset - + * x:y:NaN:m (4 doubles) when only z is unset - x:y:z:m (4 doubles) when all coordinates are set + * + * @return A ByteBuffer containing the serialized geospatial bound + */ + public ByteBuffer toByteBuffer() { + // Calculate size based on which coordinates are present + int size; + if (!hasZ() && !hasM()) { + // Just x and y + size = 2 * Double.BYTES; + } else if (hasZ() && !hasM()) { + // x, y, and z (no m) + size = 3 * Double.BYTES; + } else { + // x, y, z (or NaN), and m + size = 4 * Double.BYTES; + } + + ByteBuffer buffer = ByteBuffer.allocate(size).order(ByteOrder.LITTLE_ENDIAN); + buffer.putDouble(x); + buffer.putDouble(y); + + if (hasZ() || hasM()) { + // If we have z or m or both, we need to include z (could be NaN) + buffer.putDouble(z); + } + + if (hasM()) { + // If we have m, include it + buffer.putDouble(m); + } + + buffer.flip(); + return buffer; + } + /** * Parses a geospatial bound from a byte array according to Iceberg spec. * @@ -96,7 +136,7 @@ public static GeospatialBound fromByteBuffer(ByteBuffer buffer) { * @throws IllegalArgumentException if the byte array has an invalid length */ public static GeospatialBound fromByteArray(byte[] bytes) { - return fromByteBuffer(ByteBuffer.wrap(bytes)); + return fromByteBuffer(ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN)); } /** @@ -230,47 +270,6 @@ public boolean hasM() { return !Double.isNaN(m); } - /** - * Serializes this geospatial bound to a byte buffer according to Iceberg spec. - * - *

Following the Iceberg spec, the bound is serialized based on which coordinates are set: - - * x:y (2 doubles) when both z and m are unset - x:y:z (3 doubles) when only m is unset - - * x:y:NaN:m (4 doubles) when only z is unset - x:y:z:m (4 doubles) when all coordinates are set - * - * @return A ByteBuffer containing the serialized geospatial bound - */ - public ByteBuffer toByteBuffer() { - // Calculate size based on which coordinates are present - int size; - if (!hasZ() && !hasM()) { - // Just x and y - size = 2 * Double.BYTES; - } else if (hasZ() && !hasM()) { - // x, y, and z (no m) - size = 3 * Double.BYTES; - } else { - // x, y, z (or NaN), and m - size = 4 * Double.BYTES; - } - - ByteBuffer buffer = ByteBuffer.allocate(size).order(ByteOrder.LITTLE_ENDIAN); - buffer.putDouble(x); - buffer.putDouble(y); - - if (hasZ() || hasM()) { - // If we have z or m or both, we need to include z (could be NaN) - buffer.putDouble(z); - } - - if (hasM()) { - // If we have m, include it - buffer.putDouble(m); - } - - buffer.flip(); - return buffer; - } - @Override public String toString() { return "GeospatialBound(" + simpleString() + ")"; diff --git a/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java b/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java index 23e9ee0968e7..af2328cce890 100644 --- a/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java +++ b/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java @@ -62,29 +62,6 @@ public void testCreateFromByteBuffers() { assertThat(box.max().y()).isEqualTo(4.0); } - @Test - public void testCreateFromBigEndianByteBuffers() { - // Create byte buffers for XY bounds - ByteBuffer minBuffer = ByteBuffer.allocate(16); - minBuffer.order(ByteOrder.LITTLE_ENDIAN); - minBuffer.putDouble(0, 10.0); // x - minBuffer.putDouble(8, 20.0); // y - minBuffer.order(ByteOrder.BIG_ENDIAN); - - ByteBuffer maxBuffer = ByteBuffer.allocate(16); - maxBuffer.order(ByteOrder.LITTLE_ENDIAN); - maxBuffer.putDouble(0, 30.0); // x - maxBuffer.putDouble(8, 40.0); // y - maxBuffer.order(ByteOrder.BIG_ENDIAN); - - BoundingBox box = BoundingBox.fromByteBuffers(minBuffer, maxBuffer); - - assertThat(box.min().x()).isEqualTo(10.0); - assertThat(box.min().y()).isEqualTo(20.0); - assertThat(box.max().x()).isEqualTo(30.0); - assertThat(box.max().y()).isEqualTo(40.0); - } - @Test public void testEqualsAndHashCode() { GeospatialBound min1 = GeospatialBound.createXY(1.0, 2.0); @@ -105,7 +82,6 @@ public void testEqualsAndHashCode() { assertThat(box1).isEqualTo(box2); assertThat(box1).isNotEqualTo(box3); assertThat(box1).isNotEqualTo(null); - assertThat(box1).isNotEqualTo("not a box"); // Test hashCode assertThat(box1.hashCode()).isEqualTo(box2.hashCode()); From 52e7a999ffc0594ab66366483e251da9fc4a20a2 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Tue, 23 Sep 2025 13:01:04 +0800 Subject: [PATCH 07/15] Rename tests for geometries --- .../TestGeospatialPredicateEvaluators.java | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java index 226587ac7b15..d424bc8a743c 100644 --- a/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java +++ b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java @@ -38,7 +38,25 @@ public void testGeometryType() { } @Test - public void testOverlappingBoxesIntersect() { + public void testSphericalGeographyType() { + Type geographyType = Types.GeographyType.of("srid:4326", EdgeAlgorithm.SPHERICAL); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geographyType); + + assertThat(evaluator).isInstanceOf(GeospatialPredicateEvaluators.GeographyEvaluator.class); + } + + @Test + public void testUnsupportedType() { + Type stringType = Types.StringType.get(); + + assertThatThrownBy(() -> GeospatialPredicateEvaluators.create(stringType)) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageContaining("Unsupported type for BoundingBox"); + } + + @Test + public void testOverlappingGeometryBoxesIntersect() { Type geometryType = Types.GeometryType.crs84(); GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = GeospatialPredicateEvaluators.create(geometryType); @@ -56,7 +74,7 @@ public void testOverlappingBoxesIntersect() { } @Test - public void testNonOverlappingBoxesDontIntersect() { + public void testNonOverlappingGeometryBoxesDontIntersect() { Type geometryType = Types.GeometryType.crs84(); GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = GeospatialPredicateEvaluators.create(geometryType); @@ -74,7 +92,7 @@ public void testNonOverlappingBoxesDontIntersect() { } @Test - public void testBoxesTouchingAtCornerIntersect() { + public void testGeometryBoxesTouchingAtCornerIntersect() { Type geometryType = Types.GeometryType.crs84(); GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = GeospatialPredicateEvaluators.create(geometryType); @@ -92,7 +110,7 @@ public void testBoxesTouchingAtCornerIntersect() { } @Test - public void testBoxesTouchingAtEdgeIntersect() { + public void testGeometryBoxesTouchingAtEdgeIntersect() { Type geometryType = Types.GeometryType.crs84(); GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = GeospatialPredicateEvaluators.create(geometryType); @@ -110,7 +128,7 @@ public void testBoxesTouchingAtEdgeIntersect() { } @Test - public void testBoxContainedWithinAnotherIntersects() { + public void testGeometryBoxContainedWithinAnotherIntersects() { Type geometryType = Types.GeometryType.crs84(); GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = GeospatialPredicateEvaluators.create(geometryType); @@ -128,7 +146,7 @@ public void testBoxContainedWithinAnotherIntersects() { } @Test - public void testBoxesWithZCoordinate() { + public void testGeometryBoxesWithZCoordinate() { Type geometryType = Types.GeometryType.crs84(); GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = GeospatialPredicateEvaluators.create(geometryType); @@ -155,7 +173,7 @@ public void testBoxesWithZCoordinate() { } @Test - public void testBoxesWithMCoordinate() { + public void testGeometryBoxesWithMCoordinate() { Type geometryType = Types.GeometryType.crs84(); GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = GeospatialPredicateEvaluators.create(geometryType); @@ -237,7 +255,7 @@ public void testGeometryWrapAroundOnB() { } @Test - public void testBothGeometryWrappingAround() { + public void testBothGeometriesWrappingAround() { Type geometryType = Types.GeometryType.crs84(); GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = GeospatialPredicateEvaluators.create(geometryType); @@ -390,7 +408,7 @@ public void testInvalidGeographyLongitude() { } @Test - public void testExtremeLongitudeBoundaries() { + public void testExtremeGeographyLongitudeBoundaries() { // Tests valid boxes at the extreme boundaries of longitude Type geographyType = Types.GeographyType.of("srid:4326", EdgeAlgorithm.SPHERICAL); GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = @@ -521,22 +539,4 @@ public void testMixedDimensionsXYZvsXYM() { assertThat(evaluator.intersects(box1, box2)).isTrue(); assertThat(evaluator.intersects(box2, box1)).isTrue(); } - - @Test - public void testSphericalGeographyType() { - Type geographyType = Types.GeographyType.of("srid:4326", EdgeAlgorithm.SPHERICAL); - GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = - GeospatialPredicateEvaluators.create(geographyType); - - assertThat(evaluator).isInstanceOf(GeospatialPredicateEvaluators.GeographyEvaluator.class); - } - - @Test - public void testUnsupportedType() { - Type stringType = Types.StringType.get(); - - assertThatThrownBy(() -> GeospatialPredicateEvaluators.create(stringType)) - .isInstanceOf(UnsupportedOperationException.class) - .hasMessageContaining("Unsupported type for BoundingBox"); - } } From 5fa18f2fed565ba9e3dec56e3ca6a52e693041c4 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Tue, 23 Sep 2025 16:09:40 +0800 Subject: [PATCH 08/15] Fix review comments --- .../main/java/org/apache/iceberg/geospatial/BoundingBox.java | 3 ++- .../java/org/apache/iceberg/geospatial/GeospatialBound.java | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java b/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java index a8056e424f60..363f7cfa33ea 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java @@ -51,7 +51,8 @@ public static BoundingBox fromByteBuffers(ByteBuffer min, ByteBuffer max) { * @return a BoundingBox instance */ public static BoundingBox fromByteBuffer(ByteBuffer buffer) { - Preconditions.checkArgument(buffer.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); + Preconditions.checkArgument( + buffer.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); int minLen = buffer.getInt(); ByteBuffer min = buffer.slice().order(ByteOrder.LITTLE_ENDIAN); diff --git a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java index febd158db2f2..efaff9be4fab 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java @@ -59,7 +59,8 @@ public class GeospatialBound { * @throws IllegalArgumentException if the buffer has an invalid size */ public static GeospatialBound fromByteBuffer(ByteBuffer buffer) { - Preconditions.checkArgument(buffer.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); + Preconditions.checkArgument( + buffer.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); int size = buffer.remaining(); Preconditions.checkArgument( size == 2 * Double.BYTES || size == 3 * Double.BYTES || size == 4 * Double.BYTES, From 71931376ea63574a3d1a07d509c03b94015ec7b0 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Thu, 16 Oct 2025 17:13:40 +0800 Subject: [PATCH 09/15] Remove support for geometry X wraparound --- .../GeospatialPredicateEvaluators.java | 132 ++++++++++-------- .../TestGeospatialPredicateEvaluators.java | 67 +-------- 2 files changed, 80 insertions(+), 119 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java index 8b6e2cf223d4..9df0f5e3a0c3 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java @@ -72,57 +72,45 @@ public static GeographyEvaluator create(Types.GeographyType type) { } public static class GeometryEvaluator implements GeospatialPredicateEvaluator { - @Override - public boolean intersects(BoundingBox bbox1, BoundingBox bbox2) { - return intersectsWithWrapAround(bbox1, bbox2); - } /** - * Check if two bounding boxes intersect, taking wrap-around into account. - * - *

Wraparound (or antimeridian crossing) occurs when a geography crosses the 180°/-180° - * longitude line on a map. In these cases, the minimum X value is greater than the maximum X - * value (xmin > xmax). This represents a bounding box that wraps around the globe. - * - *

For example, a bounding box with xmin=170° and xmax=-170° represents an area that spans - * from 170° east to 190° east (or equivalently, -170° west). This is important for geometries - * that cross the antimeridian, like a path from Japan to Alaska. - * - *

When xmin > xmax, a point matches if its X coordinate is either X ≥ xmin OR X ≤ xmax, - * rather than the usual X ≥ xmin AND X ≤ xmax. In geographic terms, if the westernmost - * longitude is greater than the easternmost longitude, this indicates an antimeridian crossing. - * - *

The Iceberg specification does not explicitly rule out the use of wrap-around in bounding - * boxes for geometry types, so we handle wrap-around for both geography and geometry bounding - * boxes. + * Check if two bounding boxes intersect * * @param bbox1 the first bounding box * @param bbox2 the second bounding box * @return true if the bounding boxes intersect */ - static boolean intersectsWithWrapAround(BoundingBox bbox1, BoundingBox bbox2) { + @Override + public boolean intersects(BoundingBox bbox1, BoundingBox bbox2) { + if (!intersectsYZM(bbox1, bbox2)) { + return false; + } + + // Check X dimension (longitude/easting) - no wrap-around + return rangeIntersects(bbox1.min().x(), bbox1.max().x(), bbox2.min().x(), bbox2.max().x()); + } + + static boolean intersectsYZM(BoundingBox bbox1, BoundingBox bbox2) { // Check Z dimension (elevation) if both boxes have Z coordinates - no wrap-around if (bbox1.min().hasZ() && bbox1.max().hasZ() && bbox2.min().hasZ() && bbox2.max().hasZ()) { - if (!intersects(bbox1.min().z(), bbox1.max().z(), bbox2.min().z(), bbox2.max().z())) { + if (!rangeIntersects(bbox1.min().z(), bbox1.max().z(), bbox2.min().z(), bbox2.max().z())) { return false; } } // Check M dimension (measure) if both boxes have M coordinates - no wrap-around if (bbox1.min().hasM() && bbox1.max().hasM() && bbox2.min().hasM() && bbox2.max().hasM()) { - if (!intersects(bbox1.min().m(), bbox1.max().m(), bbox2.min().m(), bbox2.max().m())) { + if (!rangeIntersects(bbox1.min().m(), bbox1.max().m(), bbox2.min().m(), bbox2.max().m())) { return false; } } // Check Y dimension (latitude/northing) - no wrap-around - if (!intersects(bbox1.min().y(), bbox1.max().y(), bbox2.min().y(), bbox2.max().y())) { + if (!rangeIntersects(bbox1.min().y(), bbox1.max().y(), bbox2.min().y(), bbox2.max().y())) { return false; } - // Check X dimension (longitude/easting) - with wrap-around support - return intersectsWithWrapAround( - bbox1.min().x(), bbox1.max().x(), bbox2.min().x(), bbox2.max().x()); + return true; } /** @@ -135,47 +123,43 @@ static boolean intersectsWithWrapAround(BoundingBox bbox1, BoundingBox bbox2) { * @param max2 maximum of second interval * @return true if the intervals intersect */ - private static boolean intersects(double min1, double max1, double min2, double max2) { + static boolean rangeIntersects(double min1, double max1, double min2, double max2) { return min1 <= max2 && max1 >= min2; } + } + public static class GeographyEvaluator implements GeospatialPredicateEvaluator { /** - * Check if two intervals intersect with wrap-around support for longitude/X dimension. Handles - * antimeridian crossing where min > max indicates wrapping around the globe. + * Check if two bounding boxes intersect, taking wrap-around into account. * - * @param min1 minimum of first interval (may be > max1 if wrapping) - * @param max1 maximum of first interval (may be < min1 if wrapping) - * @param min2 minimum of second interval (may be > max2 if wrapping) - * @param max2 maximum of second interval (may be < min2 if wrapping) - * @return true if the intervals intersect + *

Wraparound (or antimeridian crossing) occurs when a geography crosses the 180°/-180° + * longitude line on a map. In these cases, the minimum X value is greater than the maximum X + * value (xmin > xmax). This represents a bounding box that wraps around the globe. + * + *

For example, a bounding box with xmin=170° and xmax=-170° represents an area that spans + * from 170° east to 190° east (or equivalently, -170° west). This is important for geometries + * that cross the antimeridian, like a path from Japan to Alaska. + * + *

When xmin > xmax, a point matches if its X coordinate is either X ≥ xmin OR X ≤ xmax, + * rather than the usual X ≥ xmin AND X ≤ xmax. In geographic terms, if the westernmost + * longitude is greater than the easternmost longitude, this indicates an antimeridian crossing. + * + * @param bbox1 the first bounding box + * @param bbox2 the second bounding box + * @return true if the bounding boxes intersect */ - private static boolean intersectsWithWrapAround( - double min1, double max1, double min2, double max2) { - boolean interval1WrapsAround = min1 > max1; - boolean interval2WrapsAround = min2 > max2; - - if (!interval1WrapsAround && !interval2WrapsAround) { - // No wrap-around in either interval - use regular intersection - return intersects(min1, max1, min2, max2); - } else if (interval1WrapsAround && interval2WrapsAround) { - // Both intervals wrap around - they must intersect somewhere - return true; - } else if (interval1WrapsAround) { - // interval1 wraps around, interval2 does not - return min1 <= max2 || max1 >= min2; - } else { - // interval2 wraps around, interval1 does not - return min2 <= max1 || max2 >= min1; - } - } - } - - public static class GeographyEvaluator implements GeospatialPredicateEvaluator { @Override public boolean intersects(BoundingBox bbox1, BoundingBox bbox2) { validateBoundingBox(bbox1); validateBoundingBox(bbox2); - return GeometryEvaluator.intersectsWithWrapAround(bbox1, bbox2); + + if (!GeometryEvaluator.intersectsYZM(bbox1, bbox2)) { + return false; + } + + // Check X dimension (longitude/easting) - with wrap-around + return rangeIntersectsWithWrapAround( + bbox1.min().x(), bbox1.max().x(), bbox2.min().x(), bbox2.max().x()); } /** @@ -196,5 +180,35 @@ private void validateBoundingBox(BoundingBox bbox) { "Longitude out of range: %s", bbox); } + + /** + * Check if two intervals intersect with wrap-around support for longitude/X dimension. Handles + * antimeridian crossing where min > max indicates wrapping around the globe. + * + * @param min1 minimum of first interval (may be > max1 if wrapping) + * @param max1 maximum of first interval (may be < min1 if wrapping) + * @param min2 minimum of second interval (may be > max2 if wrapping) + * @param max2 maximum of second interval (may be < min2 if wrapping) + * @return true if the intervals intersect + */ + private static boolean rangeIntersectsWithWrapAround( + double min1, double max1, double min2, double max2) { + boolean interval1WrapsAround = min1 > max1; + boolean interval2WrapsAround = min2 > max2; + + if (!interval1WrapsAround && !interval2WrapsAround) { + // No wrap-around in either interval - use regular intersection + return GeometryEvaluator.rangeIntersects(min1, max1, min2, max2); + } else if (interval1WrapsAround && interval2WrapsAround) { + // Both intervals wrap around - they must intersect somewhere + return true; + } else if (interval1WrapsAround) { + // interval1 wraps around, interval2 does not + return min1 <= max2 || max1 >= min2; + } else { + // interval2 wraps around, interval1 does not + return min2 <= max1 || max2 >= min1; + } + } } } diff --git a/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java index d424bc8a743c..7e6cae7f7efd 100644 --- a/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java +++ b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java @@ -200,78 +200,25 @@ public void testGeometryBoxesWithMCoordinate() { } @Test - public void testGeometryWrapAroundOnA() { + public void testGeometryBoxesWithEmptyXRange() { Type geometryType = Types.GeometryType.crs84(); GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = GeospatialPredicateEvaluators.create(geometryType); - // First box wraps around antimeridian (min.x > max.x), second doesn't GeospatialBound min1 = GeospatialBound.createXY(170.0, 0.0); GeospatialBound max1 = GeospatialBound.createXY(-170.0, 10.0); BoundingBox box1 = new BoundingBox(min1, max1); - - // Box that overlaps with the part after the wrap around GeospatialBound min2 = GeospatialBound.createXY(-175.0, 5.0); GeospatialBound max2 = GeospatialBound.createXY(-160.0, 15.0); BoundingBox box2 = new BoundingBox(min2, max2); - - assertThat(evaluator.intersects(box1, box2)).isTrue(); - assertThat(evaluator.intersects(box2, box1)).isTrue(); - - // Box that overlaps with the part before the wrap around - GeospatialBound min3 = GeospatialBound.createXY(160.0, 5.0); - GeospatialBound max3 = GeospatialBound.createXY(175.0, 15.0); + GeospatialBound min3 = GeospatialBound.createXY(160.0, 0.0); + GeospatialBound max3 = GeospatialBound.createXY(-160.0, 10.0); BoundingBox box3 = new BoundingBox(min3, max3); - assertThat(evaluator.intersects(box1, box3)).isTrue(); - assertThat(evaluator.intersects(box3, box1)).isTrue(); - - // Box that doesn't overlap with either part - GeospatialBound min4 = GeospatialBound.createXY(-150.0, 20.0); - GeospatialBound max4 = GeospatialBound.createXY(-140.0, 30.0); - BoundingBox box4 = new BoundingBox(min4, max4); - - assertThat(evaluator.intersects(box1, box4)).isFalse(); - assertThat(evaluator.intersects(box4, box1)).isFalse(); - } - - @Test - public void testGeometryWrapAroundOnB() { - Type geometryType = Types.GeometryType.crs84(); - GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = - GeospatialPredicateEvaluators.create(geometryType); - - // First box doesn't wrap around, second does (min.x > max.x) - GeospatialBound min1 = GeospatialBound.createXY(-175.0, 5.0); - GeospatialBound max1 = GeospatialBound.createXY(-160.0, 15.0); - BoundingBox box1 = new BoundingBox(min1, max1); - - GeospatialBound min2 = GeospatialBound.createXY(170.0, 0.0); - GeospatialBound max2 = GeospatialBound.createXY(-170.0, 10.0); - BoundingBox box2 = new BoundingBox(min2, max2); - - assertThat(evaluator.intersects(box1, box2)).isTrue(); - assertThat(evaluator.intersects(box2, box1)).isTrue(); - } - - @Test - public void testBothGeometriesWrappingAround() { - Type geometryType = Types.GeometryType.crs84(); - GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = - GeospatialPredicateEvaluators.create(geometryType); - - // Both boxes wrap around (min.x > max.x) - GeospatialBound min1 = GeospatialBound.createXY(170.0, 0.0); - GeospatialBound max1 = GeospatialBound.createXY(-170.0, 10.0); - BoundingBox box1 = new BoundingBox(min1, max1); - - GeospatialBound min2 = GeospatialBound.createXY(160.0, 5.0); - GeospatialBound max2 = GeospatialBound.createXY(-160.0, 15.0); - BoundingBox box2 = new BoundingBox(min2, max2); - - // When both wrap around, they must intersect - assertThat(evaluator.intersects(box1, box2)).isTrue(); - assertThat(evaluator.intersects(box2, box1)).isTrue(); + assertThat(evaluator.intersects(box1, box2)).isFalse(); + assertThat(evaluator.intersects(box2, box1)).isFalse(); + assertThat(evaluator.intersects(box1, box3)).isFalse(); + assertThat(evaluator.intersects(box3, box1)).isFalse(); } @Test From acc7034cc898bb7359ac31a44cd77703a90fdde4 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Thu, 23 Oct 2025 16:21:27 +0800 Subject: [PATCH 10/15] Fix review comments --- .../iceberg/geospatial/BoundingBox.java | 2 +- .../iceberg/geospatial/GeospatialBound.java | 23 ++-- .../GeospatialPredicateEvaluators.java | 101 +++++++++++------- .../geospatial/TestGeospatialBound.java | 1 + .../TestGeospatialPredicateEvaluators.java | 55 ++++++---- 5 files changed, 119 insertions(+), 63 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java b/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java index 363f7cfa33ea..1ab49ae197ef 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java @@ -52,7 +52,7 @@ public static BoundingBox fromByteBuffers(ByteBuffer min, ByteBuffer max) { */ public static BoundingBox fromByteBuffer(ByteBuffer buffer) { Preconditions.checkArgument( - buffer.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); + buffer.order() == ByteOrder.LITTLE_ENDIAN, "Invalid byte order: big endian"); int minLen = buffer.getInt(); ByteBuffer min = buffer.slice().order(ByteOrder.LITTLE_ENDIAN); diff --git a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java index efaff9be4fab..f06678d08008 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java @@ -48,9 +48,13 @@ public class GeospatialBound { /** * Parses a geospatial bound from a byte buffer according to Iceberg spec. * - *

Based on the buffer size, this method determines which coordinates are present: - 16 bytes - * (2 doubles): x and y only - 24 bytes (3 doubles): x, y, and z - 32 bytes (4 doubles): x, y, z - * (might be NaN), and m + *

Based on the buffer size, this method determines which coordinates are present: + * + *

* *

The ordinates are encoded as 8-byte little-endian IEEE 754 values. * @@ -60,7 +64,7 @@ public class GeospatialBound { */ public static GeospatialBound fromByteBuffer(ByteBuffer buffer) { Preconditions.checkArgument( - buffer.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); + buffer.order() == ByteOrder.LITTLE_ENDIAN, "Invalid byte order: big endian"); int size = buffer.remaining(); Preconditions.checkArgument( size == 2 * Double.BYTES || size == 3 * Double.BYTES || size == 4 * Double.BYTES, @@ -91,9 +95,14 @@ public static GeospatialBound fromByteBuffer(ByteBuffer buffer) { /** * Serializes this geospatial bound to a byte buffer according to Iceberg spec. * - *

Following the Iceberg spec, the bound is serialized based on which coordinates are set: - - * x:y (2 doubles) when both z and m are unset - x:y:z (3 doubles) when only m is unset - - * x:y:NaN:m (4 doubles) when only z is unset - x:y:z:m (4 doubles) when all coordinates are set + *

Following the Iceberg spec, the bound is serialized based on which coordinates are set: + * + *

* * @return A ByteBuffer containing the serialized geospatial bound */ diff --git a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java index 9df0f5e3a0c3..908ce5fa03a7 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java @@ -82,6 +82,9 @@ public static class GeometryEvaluator implements GeospatialPredicateEvaluator { */ @Override public boolean intersects(BoundingBox bbox1, BoundingBox bbox2) { + validateBoundingBox(bbox1); + validateBoundingBox(bbox2); + if (!intersectsYZM(bbox1, bbox2)) { return false; } @@ -90,41 +93,21 @@ public boolean intersects(BoundingBox bbox1, BoundingBox bbox2) { return rangeIntersects(bbox1.min().x(), bbox1.max().x(), bbox2.min().x(), bbox2.max().x()); } - static boolean intersectsYZM(BoundingBox bbox1, BoundingBox bbox2) { - // Check Z dimension (elevation) if both boxes have Z coordinates - no wrap-around - if (bbox1.min().hasZ() && bbox1.max().hasZ() && bbox2.min().hasZ() && bbox2.max().hasZ()) { - if (!rangeIntersects(bbox1.min().z(), bbox1.max().z(), bbox2.min().z(), bbox2.max().z())) { - return false; - } - } - - // Check M dimension (measure) if both boxes have M coordinates - no wrap-around - if (bbox1.min().hasM() && bbox1.max().hasM() && bbox2.min().hasM() && bbox2.max().hasM()) { - if (!rangeIntersects(bbox1.min().m(), bbox1.max().m(), bbox2.min().m(), bbox2.max().m())) { - return false; - } - } - - // Check Y dimension (latitude/northing) - no wrap-around - if (!rangeIntersects(bbox1.min().y(), bbox1.max().y(), bbox2.min().y(), bbox2.max().y())) { - return false; - } - - return true; - } - /** - * Check if two intervals intersect using regular interval logic. Two intervals [min1, max1] and - * [min2, max2] intersect if min1 <= max2 AND max1 >= min2. + * For geometry types, xmin must not be greater than xmax, ymin must not be greater than ymax. * - * @param min1 minimum of first interval - * @param max1 maximum of first interval - * @param min2 minimum of second interval - * @param max2 maximum of second interval - * @return true if the intervals intersect + * @param bbox the bounding box to validate + * @throws IllegalArgumentException if the bounding box is invalid */ - static boolean rangeIntersects(double min1, double max1, double min2, double max2) { - return min1 <= max2 && max1 >= min2; + private void validateBoundingBox(BoundingBox bbox) { + Preconditions.checkArgument( + bbox.min().x() <= bbox.max().x(), + "Invalid X range: %s. xmin cannot be greater than xmax", + bbox); + Preconditions.checkArgument( + bbox.min().y() <= bbox.max().y(), + "Invalid Y range: %s. ymin cannot be greater than ymax", + bbox); } } @@ -153,7 +136,7 @@ public boolean intersects(BoundingBox bbox1, BoundingBox bbox2) { validateBoundingBox(bbox1); validateBoundingBox(bbox2); - if (!GeometryEvaluator.intersectsYZM(bbox1, bbox2)) { + if (!intersectsYZM(bbox1, bbox2)) { return false; } @@ -171,13 +154,22 @@ public boolean intersects(BoundingBox bbox1, BoundingBox bbox2) { */ private void validateBoundingBox(BoundingBox bbox) { Preconditions.checkArgument( - bbox.min().y() >= -90.0d && bbox.max().y() <= 90.0d, "Latitude out of range: %s", bbox); + bbox.min().y() >= -90.0d + && bbox.min().y() <= 90.0d + && bbox.max().y() >= -90.0d + && bbox.max().y() <= 90.0d, + "Invalid latitude: %s. Out of range: [-90°, 90°]", + bbox); Preconditions.checkArgument( bbox.min().x() >= -180.0d && bbox.min().x() <= 180.0d && bbox.max().x() >= -180.0d && bbox.max().x() <= 180.0d, - "Longitude out of range: %s", + "Invalid longitude: %s. Out of range: [-180°, 180°]", + bbox); + Preconditions.checkArgument( + bbox.min().y() <= bbox.max().y(), + "Invalid latitude range: %s. ymin cannot be greater than ymax", bbox); } @@ -198,7 +190,7 @@ private static boolean rangeIntersectsWithWrapAround( if (!interval1WrapsAround && !interval2WrapsAround) { // No wrap-around in either interval - use regular intersection - return GeometryEvaluator.rangeIntersects(min1, max1, min2, max2); + return rangeIntersects(min1, max1, min2, max2); } else if (interval1WrapsAround && interval2WrapsAround) { // Both intervals wrap around - they must intersect somewhere return true; @@ -211,4 +203,41 @@ private static boolean rangeIntersectsWithWrapAround( } } } + + private static boolean intersectsYZM(BoundingBox bbox1, BoundingBox bbox2) { + // Check Z dimension (elevation) if both boxes have Z coordinates - no wrap-around + if (bbox1.min().hasZ() && bbox1.max().hasZ() && bbox2.min().hasZ() && bbox2.max().hasZ()) { + if (!rangeIntersects(bbox1.min().z(), bbox1.max().z(), bbox2.min().z(), bbox2.max().z())) { + return false; + } + } + + // Check M dimension (measure) if both boxes have M coordinates - no wrap-around + if (bbox1.min().hasM() && bbox1.max().hasM() && bbox2.min().hasM() && bbox2.max().hasM()) { + if (!rangeIntersects(bbox1.min().m(), bbox1.max().m(), bbox2.min().m(), bbox2.max().m())) { + return false; + } + } + + // Check Y dimension (latitude/northing) - no wrap-around + if (!rangeIntersects(bbox1.min().y(), bbox1.max().y(), bbox2.min().y(), bbox2.max().y())) { + return false; + } + + return true; + } + + /** + * Check if two intervals intersect using regular interval logic. Two intervals [min1, max1] and + * [min2, max2] intersect if min1 <= max2 AND max1 >= min2. + * + * @param min1 minimum of first interval + * @param max1 maximum of first interval + * @param min2 minimum of second interval + * @param max2 maximum of second interval + * @return true if the intervals intersect + */ + private static boolean rangeIntersects(double min1, double max1, double min2, double max2) { + return min1 <= max2 && max1 >= min2; + } } diff --git a/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialBound.java b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialBound.java index f4b3a7deebfa..920c658632dc 100644 --- a/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialBound.java +++ b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialBound.java @@ -165,6 +165,7 @@ public void testSerde() { assertThat(xyz.hasZ()).isTrue(); assertThat(xyz.hasM()).isFalse(); assertThat(ByteBuffers.toByteArray(xyz.toByteBuffer())).isEqualTo(xyzBytes); + // Test XYM format (32 bytes: x:y:NaN:m) // These bytes represent x=10.0, y=13.0, z=NaN, m=20.0 byte[] xymBytes = diff --git a/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java index 7e6cae7f7efd..00841441b6f1 100644 --- a/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java +++ b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java @@ -200,25 +200,26 @@ public void testGeometryBoxesWithMCoordinate() { } @Test - public void testGeometryBoxesWithEmptyXRange() { + public void testGeometryBoxesWithInvalidRanges() { Type geometryType = Types.GeometryType.crs84(); GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = GeospatialPredicateEvaluators.create(geometryType); + // Invalid X range GeospatialBound min1 = GeospatialBound.createXY(170.0, 0.0); GeospatialBound max1 = GeospatialBound.createXY(-170.0, 10.0); BoundingBox box1 = new BoundingBox(min1, max1); - GeospatialBound min2 = GeospatialBound.createXY(-175.0, 5.0); - GeospatialBound max2 = GeospatialBound.createXY(-160.0, 15.0); - BoundingBox box2 = new BoundingBox(min2, max2); - GeospatialBound min3 = GeospatialBound.createXY(160.0, 0.0); - GeospatialBound max3 = GeospatialBound.createXY(-160.0, 10.0); - BoundingBox box3 = new BoundingBox(min3, max3); + assertThatThrownBy(() -> evaluator.intersects(box1, box1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Invalid X range: .* xmin cannot be greater than xmax"); - assertThat(evaluator.intersects(box1, box2)).isFalse(); - assertThat(evaluator.intersects(box2, box1)).isFalse(); - assertThat(evaluator.intersects(box1, box3)).isFalse(); - assertThat(evaluator.intersects(box3, box1)).isFalse(); + // Invalid Y range + GeospatialBound min2 = GeospatialBound.createXY(0.0, 10.0); + GeospatialBound max2 = GeospatialBound.createXY(10.0, 9.0); + BoundingBox box2 = new BoundingBox(min2, max2); + assertThatThrownBy(() -> evaluator.intersects(box2, box2)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Invalid Y range: .* ymin cannot be greater than ymax"); } @Test @@ -276,8 +277,15 @@ public void testGeographyWrapAround() { GeospatialBound max2 = GeospatialBound.createXY(-160.0, 15.0); BoundingBox box2 = new BoundingBox(min2, max2); + // Box that does not overlap with box1 + GeospatialBound min3 = GeospatialBound.createXY(-169.0, 5.0); + GeospatialBound max3 = GeospatialBound.createXY(-160.0, 15.0); + BoundingBox box3 = new BoundingBox(min3, max3); + assertThat(evaluator.intersects(box1, box2)).isTrue(); assertThat(evaluator.intersects(box2, box1)).isTrue(); + assertThat(evaluator.intersects(box1, box3)).isFalse(); + assertThat(evaluator.intersects(box3, box1)).isFalse(); } @Test @@ -296,25 +304,34 @@ public void testInvalidGeographyLatitude() { GeospatialBound max2 = GeospatialBound.createXY(10.0, 91.0); BoundingBox box2 = new BoundingBox(min2, max2); + // Box with min latitude > max longitude + GeospatialBound min3 = GeospatialBound.createXY(0.0, 10.0); + GeospatialBound max3 = GeospatialBound.createXY(10.0, 9.0); + BoundingBox box3 = new BoundingBox(min3, max3); + GeospatialBound validMin = GeospatialBound.createXY(0.0, 0.0); GeospatialBound validMax = GeospatialBound.createXY(10.0, 10.0); BoundingBox validBox = new BoundingBox(validMin, validMax); assertThatThrownBy(() -> evaluator.intersects(box1, validBox)) .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("Latitude out of range"); + .hasMessageMatching("Invalid latitude: .* Out of range: \\[-90°, 90°\\]"); assertThatThrownBy(() -> evaluator.intersects(validBox, box1)) .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("Latitude out of range"); + .hasMessageMatching("Invalid latitude: .* Out of range: \\[-90°, 90°\\]"); assertThatThrownBy(() -> evaluator.intersects(box2, validBox)) .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("Latitude out of range"); + .hasMessageMatching("Invalid latitude: .* Out of range: \\[-90°, 90°\\]"); assertThatThrownBy(() -> evaluator.intersects(validBox, box2)) .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("Latitude out of range"); + .hasMessageMatching("Invalid latitude: .* Out of range: \\[-90°, 90°\\]"); + + assertThatThrownBy(() -> evaluator.intersects(validBox, box3)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Invalid latitude range: .* ymin cannot be greater than ymax"); } @Test @@ -339,19 +356,19 @@ public void testInvalidGeographyLongitude() { assertThatThrownBy(() -> evaluator.intersects(box1, validBox)) .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("Longitude out of range"); + .hasMessageMatching("Invalid longitude: .* Out of range: \\[-180°, 180°\\]"); assertThatThrownBy(() -> evaluator.intersects(validBox, box1)) .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("Longitude out of range"); + .hasMessageMatching("Invalid longitude: .* Out of range: \\[-180°, 180°\\]"); assertThatThrownBy(() -> evaluator.intersects(box2, validBox)) .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("Longitude out of range"); + .hasMessageMatching("Invalid longitude: .* Out of range: \\[-180°, 180°\\]"); assertThatThrownBy(() -> evaluator.intersects(validBox, box2)) .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("Longitude out of range"); + .hasMessageMatching("Invalid longitude: .* Out of range: \\[-180°, 180°\\]"); } @Test From e8e28e1dff624b2e9685d37efb4357020e19bb2c Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Mon, 27 Oct 2025 10:47:23 +0800 Subject: [PATCH 11/15] Fix review comments by avoiding mutating the input ByteBuffer in fromByteBuffer methods, also removed geospatial type specific create functions for predicate evaluators. --- .../iceberg/geospatial/BoundingBox.java | 22 +++++++++++------ .../iceberg/geospatial/GeospatialBound.java | 24 +++++++++---------- .../GeospatialPredicateEvaluators.java | 23 ++---------------- .../iceberg/geospatial/TestBoundingBox.java | 14 +++++++++++ .../geospatial/TestGeospatialBound.java | 23 ++++++++++++++++++ 5 files changed, 66 insertions(+), 40 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java b/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java index 1ab49ae197ef..0556a247f5d0 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java @@ -51,16 +51,24 @@ public static BoundingBox fromByteBuffers(ByteBuffer min, ByteBuffer max) { * @return a BoundingBox instance */ public static BoundingBox fromByteBuffer(ByteBuffer buffer) { - Preconditions.checkArgument( - buffer.order() == ByteOrder.LITTLE_ENDIAN, "Invalid byte order: big endian"); + ByteBuffer tmp = buffer.duplicate(); + tmp.order(ByteOrder.LITTLE_ENDIAN); - int minLen = buffer.getInt(); - ByteBuffer min = buffer.slice().order(ByteOrder.LITTLE_ENDIAN); + int minLen = tmp.getInt(); + Preconditions.checkArgument( + minLen == 2 * Double.BYTES || minLen == 3 * Double.BYTES || minLen == 4 * Double.BYTES, + "Invalid geo spatial lower bound buffer size for: %s. Valid sizes are 16, 24, or 32 bytes.", + minLen); + ByteBuffer min = tmp.slice().order(ByteOrder.LITTLE_ENDIAN); min.limit(minLen); - buffer.position(buffer.position() + minLen); + tmp.position(tmp.position() + minLen); - int maxLen = buffer.getInt(); - ByteBuffer max = buffer.slice().order(ByteOrder.LITTLE_ENDIAN); + int maxLen = tmp.getInt(); + Preconditions.checkArgument( + maxLen == 2 * Double.BYTES || maxLen == 3 * Double.BYTES || maxLen == 4 * Double.BYTES, + "Invalid geo spatial upper bound buffer size: %s. Valid sizes are 16, 24, or 32 bytes.", + maxLen); + ByteBuffer max = tmp.slice().order(ByteOrder.LITTLE_ENDIAN); max.limit(maxLen); return fromByteBuffers(min, max); diff --git a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java index f06678d08008..b128ba0ac9da 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java @@ -63,9 +63,9 @@ public class GeospatialBound { * @throws IllegalArgumentException if the buffer has an invalid size */ public static GeospatialBound fromByteBuffer(ByteBuffer buffer) { - Preconditions.checkArgument( - buffer.order() == ByteOrder.LITTLE_ENDIAN, "Invalid byte order: big endian"); - int size = buffer.remaining(); + ByteBuffer tmp = buffer.duplicate(); + tmp.order(ByteOrder.LITTLE_ENDIAN); + int size = tmp.remaining(); Preconditions.checkArgument( size == 2 * Double.BYTES || size == 3 * Double.BYTES || size == 4 * Double.BYTES, "Invalid geo spatial bound buffer size: %s. Valid sizes are 16, 24, or 32 bytes.", @@ -73,21 +73,21 @@ public static GeospatialBound fromByteBuffer(ByteBuffer buffer) { if (size == 2 * Double.BYTES) { // x:y format (2 doubles) - double coordX = buffer.getDouble(); - double coordY = buffer.getDouble(); + double coordX = tmp.getDouble(); + double coordY = tmp.getDouble(); return createXY(coordX, coordY); } else if (size == 3 * Double.BYTES) { // x:y:z format (3 doubles) - double coordX = buffer.getDouble(); - double coordY = buffer.getDouble(); - double coordZ = buffer.getDouble(); + double coordX = tmp.getDouble(); + double coordY = tmp.getDouble(); + double coordZ = tmp.getDouble(); return createXYZ(coordX, coordY, coordZ); } else { // x:y:z:m format (4 doubles) - z might be NaN - double coordX = buffer.getDouble(); - double coordY = buffer.getDouble(); - double coordZ = buffer.getDouble(); - double coordM = buffer.getDouble(); + double coordX = tmp.getDouble(); + double coordY = tmp.getDouble(); + double coordZ = tmp.getDouble(); + double coordM = tmp.getDouble(); return new GeospatialBound(coordX, coordY, coordZ, coordM); } } diff --git a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java index 908ce5fa03a7..d6754e1a9d98 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java @@ -20,7 +20,6 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Type; -import org.apache.iceberg.types.Types; public class GeospatialPredicateEvaluators { private GeospatialPredicateEvaluators() {} @@ -45,32 +44,14 @@ public interface GeospatialPredicateEvaluator { public static GeospatialPredicateEvaluator create(Type type) { switch (type.typeId()) { case GEOMETRY: - return create((Types.GeometryType) type); + return new GeometryEvaluator(); case GEOGRAPHY: - return create((Types.GeographyType) type); + return new GeographyEvaluator(); default: throw new UnsupportedOperationException("Unsupported type for BoundingBox: " + type); } } - /** - * Create an evaluator for evaluating bounding box relationship for planar geometries - * - * @return the evaluator - */ - public static GeometryEvaluator create(Types.GeometryType type) { - return new GeometryEvaluator(); - } - - /** - * Create an evaluator for evaluating bounding box relationship for geographies - * - * @return the evaluator - */ - public static GeographyEvaluator create(Types.GeographyType type) { - return new GeographyEvaluator(); - } - public static class GeometryEvaluator implements GeospatialPredicateEvaluator { /** diff --git a/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java b/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java index af2328cce890..d788596e1cb1 100644 --- a/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java +++ b/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java @@ -98,6 +98,20 @@ public void testToString() { "BoundingBox{min=GeospatialBound(x=1.0, y=2.0), max=GeospatialBound(x=3.0, y=4.0)}"); } + @Test + public void testFromByteBuffer() { + GeospatialBound min = GeospatialBound.createXY(1.0, 2.0); + GeospatialBound max = GeospatialBound.createXY(3.0, 4.0); + BoundingBox box = new BoundingBox(min, max); + ByteBuffer buffer = box.toByteBuffer(); + for (ByteOrder byteOrder : new ByteOrder[] {ByteOrder.BIG_ENDIAN, ByteOrder.LITTLE_ENDIAN}) { + buffer.order(byteOrder); + assertThat(BoundingBox.fromByteBuffer(buffer)).isEqualTo(box); + assertThat(buffer.position()).isEqualTo(0); + assertThat(buffer.order()).isEqualTo(byteOrder); + } + } + @Test public void testRoundTripSerDe() { GeospatialBound min = GeospatialBound.createXY(1.0, 2.0); diff --git a/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialBound.java b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialBound.java index 920c658632dc..292c780836ca 100644 --- a/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialBound.java +++ b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialBound.java @@ -21,6 +21,7 @@ import static org.assertj.core.api.Assertions.assertThat; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import org.apache.iceberg.util.ByteBuffers; import org.junit.jupiter.api.Test; @@ -203,6 +204,28 @@ public void testSerde() { assertThat(ByteBuffers.toByteArray(xyzm.toByteBuffer())).isEqualTo(xyzmBytes); } + @Test + public void testFromByteBuffer() { + // Test XY format (16 bytes: x:y) + // These bytes represent x=10.0, y=13.0 + byte[] xyBytes = + new byte[] { + 0, 0, 0, 0, 0, 0, 36, 64, // 10.0 in little-endian IEEE 754 + 0, 0, 0, 0, 0, 0, 42, 64 // 13.0 in little-endian IEEE 754 + }; + ByteBuffer xyBuffer = ByteBuffer.wrap(xyBytes); + for (ByteOrder endianness : new ByteOrder[] {ByteOrder.BIG_ENDIAN, ByteOrder.LITTLE_ENDIAN}) { + xyBuffer.order(endianness); + GeospatialBound xy = GeospatialBound.fromByteBuffer(xyBuffer); + assertThat(xy.x()).isEqualTo(10.0); + assertThat(xy.y()).isEqualTo(13.0); + assertThat(xy.hasZ()).isFalse(); + assertThat(xy.hasM()).isFalse(); + assertThat(xyBuffer.position()).isEqualTo(0); + assertThat(xyBuffer.order()).isEqualTo(endianness); + } + } + private GeospatialBound roundTripSerDe(GeospatialBound original) { ByteBuffer buffer = original.toByteBuffer(); return GeospatialBound.fromByteBuffer(buffer); From 43144b10ccd0b6fdf72d007b2063e94ffeaa77dd Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Thu, 30 Oct 2025 17:04:21 +0800 Subject: [PATCH 12/15] More review comment fixes --- .../iceberg/geospatial/GeospatialBound.java | 2 +- .../GeospatialPredicateEvaluators.java | 17 ++++++++ .../TestGeospatialPredicateEvaluators.java | 39 +++++++++++++++++++ 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java index b128ba0ac9da..e674d28db97d 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java @@ -146,7 +146,7 @@ public ByteBuffer toByteBuffer() { * @throws IllegalArgumentException if the byte array has an invalid length */ public static GeospatialBound fromByteArray(byte[] bytes) { - return fromByteBuffer(ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN)); + return fromByteBuffer(ByteBuffer.wrap(bytes)); } /** diff --git a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java index d6754e1a9d98..9eca7ee796ab 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java @@ -89,6 +89,7 @@ private void validateBoundingBox(BoundingBox bbox) { bbox.min().y() <= bbox.max().y(), "Invalid Y range: %s. ymin cannot be greater than ymax", bbox); + validateZMRange(bbox); } } @@ -152,6 +153,7 @@ private void validateBoundingBox(BoundingBox bbox) { bbox.min().y() <= bbox.max().y(), "Invalid latitude range: %s. ymin cannot be greater than ymax", bbox); + validateZMRange(bbox); } /** @@ -208,6 +210,21 @@ private static boolean intersectsYZM(BoundingBox bbox1, BoundingBox bbox2) { return true; } + private static void validateZMRange(BoundingBox bbox) { + if (bbox.min().hasZ() && bbox.max().hasZ()) { + Preconditions.checkArgument( + bbox.min().z() <= bbox.max().z(), + "Invalid Z range: %s. zmin cannot be greater than zmax", + bbox); + } + if (bbox.min().hasM() && bbox.max().hasM()) { + Preconditions.checkArgument( + bbox.min().m() <= bbox.max().m(), + "Invalid M range: %s. mmin cannot be greater than mmax", + bbox); + } + } + /** * Check if two intervals intersect using regular interval logic. Two intervals [min1, max1] and * [min2, max2] intersect if min1 <= max2 AND max1 >= min2. diff --git a/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java index 00841441b6f1..81a6a19d0bb7 100644 --- a/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java +++ b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialPredicateEvaluators.java @@ -220,6 +220,22 @@ public void testGeometryBoxesWithInvalidRanges() { assertThatThrownBy(() -> evaluator.intersects(box2, box2)) .isInstanceOf(IllegalArgumentException.class) .hasMessageMatching("Invalid Y range: .* ymin cannot be greater than ymax"); + + // Invalid Z range + GeospatialBound min3 = GeospatialBound.createXYZ(0.0, 0.0, 10.0); + GeospatialBound max3 = GeospatialBound.createXYZ(10.0, 10.0, 9.0); + BoundingBox box3 = new BoundingBox(min3, max3); + assertThatThrownBy(() -> evaluator.intersects(box3, box3)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Invalid Z range: .* zmin cannot be greater than zmax"); + + // Invalid M range + GeospatialBound min4 = GeospatialBound.createXYZM(0.0, 0.0, 0, 10.0); + GeospatialBound max4 = GeospatialBound.createXYZM(10.0, 10.0, 0, 9.0); + BoundingBox box4 = new BoundingBox(min4, max4); + assertThatThrownBy(() -> evaluator.intersects(box4, box4)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Invalid M range: .* mmin cannot be greater than mmax"); } @Test @@ -371,6 +387,29 @@ public void testInvalidGeographyLongitude() { .hasMessageMatching("Invalid longitude: .* Out of range: \\[-180°, 180°\\]"); } + @Test + public void testInvalidGeographyZMRanges() { + Type geographyType = Types.GeographyType.of("srid:4326", EdgeAlgorithm.SPHERICAL); + GeospatialPredicateEvaluators.GeospatialPredicateEvaluator evaluator = + GeospatialPredicateEvaluators.create(geographyType); + + // Invalid Z range + GeospatialBound min1 = GeospatialBound.createXYZ(0.0, 0.0, 10.0); + GeospatialBound max1 = GeospatialBound.createXYZ(10.0, 10.0, 9.0); + BoundingBox box1 = new BoundingBox(min1, max1); + assertThatThrownBy(() -> evaluator.intersects(box1, box1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Invalid Z range: .* zmin cannot be greater than zmax"); + + // Invalid M range + GeospatialBound min2 = GeospatialBound.createXYZM(0.0, 0.0, 0, 10.0); + GeospatialBound max2 = GeospatialBound.createXYZM(10.0, 10.0, 0, 9.0); + BoundingBox box2 = new BoundingBox(min2, max2); + assertThatThrownBy(() -> evaluator.intersects(box2, box2)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Invalid M range: .* mmin cannot be greater than mmax"); + } + @Test public void testExtremeGeographyLongitudeBoundaries() { // Tests valid boxes at the extreme boundaries of longitude From 5bfbe13674bc4614a32fb5096e4a0728784b19a3 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Tue, 11 Nov 2025 09:22:06 +0800 Subject: [PATCH 13/15] Check the byte order of input buffers when serializing/deserializing geometry bounds --- .../apache/iceberg/geospatial/BoundingBox.java | 2 ++ .../iceberg/geospatial/GeospatialBound.java | 4 +++- .../iceberg/geospatial/TestBoundingBox.java | 8 ++------ .../geospatial/TestGeospatialBound.java | 18 +++++++----------- 4 files changed, 14 insertions(+), 18 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java b/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java index 0556a247f5d0..72996d2e7680 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java @@ -51,6 +51,8 @@ public static BoundingBox fromByteBuffers(ByteBuffer min, ByteBuffer max) { * @return a BoundingBox instance */ public static BoundingBox fromByteBuffer(ByteBuffer buffer) { + Preconditions.checkArgument( + buffer.order() == ByteOrder.LITTLE_ENDIAN, "Invalid byte order: big endian"); ByteBuffer tmp = buffer.duplicate(); tmp.order(ByteOrder.LITTLE_ENDIAN); diff --git a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java index e674d28db97d..3425a4663801 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java @@ -63,6 +63,8 @@ public class GeospatialBound { * @throws IllegalArgumentException if the buffer has an invalid size */ public static GeospatialBound fromByteBuffer(ByteBuffer buffer) { + Preconditions.checkArgument( + buffer.order() == ByteOrder.LITTLE_ENDIAN, "Invalid byte order: big endian"); ByteBuffer tmp = buffer.duplicate(); tmp.order(ByteOrder.LITTLE_ENDIAN); int size = tmp.remaining(); @@ -146,7 +148,7 @@ public ByteBuffer toByteBuffer() { * @throws IllegalArgumentException if the byte array has an invalid length */ public static GeospatialBound fromByteArray(byte[] bytes) { - return fromByteBuffer(ByteBuffer.wrap(bytes)); + return fromByteBuffer(ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN)); } /** diff --git a/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java b/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java index d788596e1cb1..84a9e10546c4 100644 --- a/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java +++ b/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java @@ -104,12 +104,8 @@ public void testFromByteBuffer() { GeospatialBound max = GeospatialBound.createXY(3.0, 4.0); BoundingBox box = new BoundingBox(min, max); ByteBuffer buffer = box.toByteBuffer(); - for (ByteOrder byteOrder : new ByteOrder[] {ByteOrder.BIG_ENDIAN, ByteOrder.LITTLE_ENDIAN}) { - buffer.order(byteOrder); - assertThat(BoundingBox.fromByteBuffer(buffer)).isEqualTo(box); - assertThat(buffer.position()).isEqualTo(0); - assertThat(buffer.order()).isEqualTo(byteOrder); - } + assertThat(BoundingBox.fromByteBuffer(buffer)).isEqualTo(box); + assertThat(buffer.position()).isEqualTo(0); } @Test diff --git a/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialBound.java b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialBound.java index 292c780836ca..8a6a6325cc85 100644 --- a/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialBound.java +++ b/api/src/test/java/org/apache/iceberg/geospatial/TestGeospatialBound.java @@ -213,17 +213,13 @@ public void testFromByteBuffer() { 0, 0, 0, 0, 0, 0, 36, 64, // 10.0 in little-endian IEEE 754 0, 0, 0, 0, 0, 0, 42, 64 // 13.0 in little-endian IEEE 754 }; - ByteBuffer xyBuffer = ByteBuffer.wrap(xyBytes); - for (ByteOrder endianness : new ByteOrder[] {ByteOrder.BIG_ENDIAN, ByteOrder.LITTLE_ENDIAN}) { - xyBuffer.order(endianness); - GeospatialBound xy = GeospatialBound.fromByteBuffer(xyBuffer); - assertThat(xy.x()).isEqualTo(10.0); - assertThat(xy.y()).isEqualTo(13.0); - assertThat(xy.hasZ()).isFalse(); - assertThat(xy.hasM()).isFalse(); - assertThat(xyBuffer.position()).isEqualTo(0); - assertThat(xyBuffer.order()).isEqualTo(endianness); - } + ByteBuffer xyBuffer = ByteBuffer.wrap(xyBytes).order(ByteOrder.LITTLE_ENDIAN); + GeospatialBound xy = GeospatialBound.fromByteBuffer(xyBuffer); + assertThat(xy.x()).isEqualTo(10.0); + assertThat(xy.y()).isEqualTo(13.0); + assertThat(xy.hasZ()).isFalse(); + assertThat(xy.hasM()).isFalse(); + assertThat(xyBuffer.position()).isEqualTo(0); } private GeospatialBound roundTripSerDe(GeospatialBound original) { From 7108a5060f2a241141bfd9f6aaf136cb067944b8 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Tue, 11 Nov 2025 09:40:17 +0800 Subject: [PATCH 14/15] Fix docstring --- .../iceberg/geospatial/GeospatialPredicateEvaluators.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java index 9eca7ee796ab..4e699e301e0a 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java @@ -99,13 +99,13 @@ public static class GeographyEvaluator implements GeospatialPredicateEvaluator { * *

Wraparound (or antimeridian crossing) occurs when a geography crosses the 180°/-180° * longitude line on a map. In these cases, the minimum X value is greater than the maximum X - * value (xmin > xmax). This represents a bounding box that wraps around the globe. + * value (xmin > xmax). This represents a bounding box that wraps around the globe. * *

For example, a bounding box with xmin=170° and xmax=-170° represents an area that spans * from 170° east to 190° east (or equivalently, -170° west). This is important for geometries * that cross the antimeridian, like a path from Japan to Alaska. * - *

When xmin > xmax, a point matches if its X coordinate is either X ≥ xmin OR X ≤ xmax, + *

When xmin > xmax, a point matches if its X coordinate is either X ≥ xmin OR X ≤ xmax, * rather than the usual X ≥ xmin AND X ≤ xmax. In geographic terms, if the westernmost * longitude is greater than the easternmost longitude, this indicates an antimeridian crossing. * From 1a9132d67be8c92b32fa7b425d523225062dc3ff Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Sat, 15 Nov 2025 12:15:23 +0800 Subject: [PATCH 15/15] Add another sanity check: the input buffer of BoundingBox.fromByteBuffer should have zero position --- .../org/apache/iceberg/geospatial/BoundingBox.java | 1 + .../apache/iceberg/geospatial/TestBoundingBox.java | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java b/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java index 72996d2e7680..cbf3c699ce1f 100644 --- a/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java +++ b/api/src/main/java/org/apache/iceberg/geospatial/BoundingBox.java @@ -51,6 +51,7 @@ public static BoundingBox fromByteBuffers(ByteBuffer min, ByteBuffer max) { * @return a BoundingBox instance */ public static BoundingBox fromByteBuffer(ByteBuffer buffer) { + Preconditions.checkArgument(buffer.position() == 0, "Input ByteBuffer must have position 0"); Preconditions.checkArgument( buffer.order() == ByteOrder.LITTLE_ENDIAN, "Invalid byte order: big endian"); ByteBuffer tmp = buffer.duplicate(); diff --git a/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java b/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java index 84a9e10546c4..aa79da3d48ba 100644 --- a/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java +++ b/api/src/test/java/org/apache/iceberg/geospatial/TestBoundingBox.java @@ -19,6 +19,7 @@ package org.apache.iceberg.geospatial; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -108,6 +109,19 @@ public void testFromByteBuffer() { assertThat(buffer.position()).isEqualTo(0); } + @Test + public void testFromByteBufferWithNonZeroPosition() { + BoundingBox box = + new BoundingBox(GeospatialBound.createXY(1, 2), GeospatialBound.createXY(3, 4)); + ByteBuffer buffer = box.toByteBuffer(); + ByteBuffer largerBuffer = ByteBuffer.allocate(buffer.capacity() + 4); + largerBuffer.position(4); + largerBuffer.put(buffer); + largerBuffer.position(4); + assertThatThrownBy(() -> BoundingBox.fromByteBuffer(largerBuffer)) + .hasMessageMatching("Input ByteBuffer must have position 0"); + } + @Test public void testRoundTripSerDe() { GeospatialBound min = GeospatialBound.createXY(1.0, 2.0);