Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions core/trino-main/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,6 @@
<artifactId>stream</artifactId>
</dependency>

<dependency>
<groupId>com.esri.geometry</groupId>
<artifactId>esri-geometry-api</artifactId>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
*/
package io.trino.operator;

import com.esri.core.geometry.ogc.OGCGeometry;
import com.esri.core.geometry.ogc.OGCPoint;
import io.airlift.slice.Slice;
import io.trino.Session;
import io.trino.geospatial.Rectangle;
Expand All @@ -29,6 +27,8 @@
import it.unimi.dsi.fastutil.longs.LongArrayList;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.geom.Point;
import org.locationtech.jts.index.strtree.STRtree;

import java.util.List;
Expand All @@ -40,7 +40,8 @@
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Verify.verifyNotNull;
import static io.airlift.slice.SizeOf.instanceSize;
import static io.trino.geospatial.serde.GeometrySerde.deserialize;
import static io.trino.geospatial.GeometryUtils.estimateMemorySize;
import static io.trino.geospatial.serde.JtsGeometrySerde.deserialize;
import static io.trino.operator.SyntheticAddress.decodePosition;
import static io.trino.operator.SyntheticAddress.decodeSliceIndex;
import static io.trino.operator.join.JoinUtils.channelsToPages;
Expand All @@ -67,20 +68,20 @@ public static final class GeometryWithPosition
{
private static final int INSTANCE_SIZE = instanceSize(GeometryWithPosition.class);

private final OGCGeometry ogcGeometry;
private final Geometry geometry;
private final int partition;
private final int position;

public GeometryWithPosition(OGCGeometry ogcGeometry, int partition, int position)
public GeometryWithPosition(Geometry geometry, int partition, int position)
{
this.ogcGeometry = requireNonNull(ogcGeometry, "ogcGeometry is null");
this.geometry = requireNonNull(geometry, "geometry is null");
this.partition = partition;
this.position = position;
}

public OGCGeometry getGeometry()
public Geometry getGeometry()
{
return ogcGeometry;
return geometry;
}

public int getPartition()
Expand All @@ -95,7 +96,7 @@ public int getPosition()

public long getEstimatedMemorySizeInBytes()
{
return INSTANCE_SIZE + ogcGeometry.estimateMemorySize();
return INSTANCE_SIZE + estimateMemorySize(geometry);
}
Comment on lines 97 to 100
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

fd -type f -name "PagesRTreeIndex.java" 2>/dev/null | head -5

Repository: trinodb/trino

Length of output: 39


🏁 Script executed:

# First, let's find and examine the file
find . -name "PagesRTreeIndex.java" -type f 2>/dev/null

Repository: trinodb/trino

Length of output: 127


🏁 Script executed:

# Let's search for the file using git
git ls-files | grep -i "PagesRTreeIndex"

Repository: trinodb/trino

Length of output: 125


🏁 Script executed:

wc -l core/trino-main/src/main/java/io/trino/operator/PagesRTreeIndex.java

Repository: trinodb/trino

Length of output: 129


🏁 Script executed:

cat -n core/trino-main/src/main/java/io/trino/operator/PagesRTreeIndex.java

Repository: trinodb/trino

Length of output: 11294


🏁 Script executed:

# Search for other memory size estimates for geometries in the codebase
rg -i "geometry.*memory\|memory.*geometry" --type java | head -20

Repository: trinodb/trino

Length of output: 39


🏁 Script executed:

# Search for JtsGeometrySerde which is imported
find . -name "*JtsGeometrySerde*" -type f 2>/dev/null

Repository: trinodb/trino

Length of output: 251


🏁 Script executed:

# Look for getNumPoints usage in the codebase
rg "getNumPoints" --type java

Repository: trinodb/trino

Length of output: 891


🏁 Script executed:

wc -l ./lib/trino-geospatial-toolkit/src/main/java/io/trino/geospatial/serde/JtsGeometrySerde.java

Repository: trinodb/trino

Length of output: 153


🏁 Script executed:

cat -n ./lib/trino-geospatial-toolkit/src/main/java/io/trino/geospatial/serde/JtsGeometrySerde.java | head -150

Repository: trinodb/trino

Length of output: 6703


🏁 Script executed:

cat -n ./lib/trino-geospatial-toolkit/src/main/java/io/trino/geospatial/serde/JtsGeometrySerde.java | tail -50

Repository: trinodb/trino

Length of output: 1910


🏁 Script executed:

# Check if there's any pattern for retaining serialized slice sizes in the codebase
rg "slice.*length\|getSliceLength\|estimatedSize" --type java | grep -i geometry | head -10

Repository: trinodb/trino

Length of output: 39


🏁 Script executed:

# Look at how geometries are stored in the spatial index
rg "STRtree\|rtree" --type java -B 2 -A 2 | head -40

Repository: trinodb/trino

Length of output: 39


🌐 Web query:

JTS Geometry getNumPoints() what does it return vertices coordinate points

💡 Result:

In JTS, Geometry.getNumPoints() returns the count of the geometry’s vertices—i.e., the total number of coordinate positions stored by the geometry (not the number of distinct/unique points). For composite geometries (e.g., Multi*, GeometryCollection), this is effectively the sum of the vertex counts of the components. [1]

Implications:

  • Point1 (or 0 if empty).
  • LineString → number of coordinates in the line.
  • Polygon → counts vertices in the shell and holes; ring-closure coordinates (where the last coordinate equals the first) are included because they are vertices in the coordinate sequence. [1]

Source: JTS Geometry Javadoc: “Returns the count of this Geometry’s vertices” / “the number of vertices in this Geometry.” [1]


🌐 Web query:

LocationTech JTS Geometry memory object graph retained size structure

💡 Result:

In JTS (LocationTech) the retained size you see in a heap/object-graph tool for a Geometry is driven mostly by (a) the geometry topology objects (Polygon → rings, collections → components) and (b) the coordinate storage (CoordinateSequence implementation), plus a few always-present base fields.

1) Base Geometry object graph (common to all)

Geometry holds: an Envelope field (protected Envelope envelope), a GeometryFactory reference (protected final GeometryFactory factory), an int SRID, and optional userData. [1]

So the retained graph typically starts:
GeometryEnvelope (may be null until computed) + GeometryFactory + userData.

2) Concrete geometry topology objects

  • Point: Point stores exactly one CoordinateSequence reference: private CoordinateSequence coordinates; [2]
    Graph: PointCoordinateSequence → underlying coordinate storage.
  • LineString / LinearRing: LineString stores protected CoordinateSequence points; [3]
    Graph: LineStringCoordinateSequence → underlying coordinate storage.
  • Polygon: Polygon stores protected LinearRing shell and protected LinearRing[] holes [4].
    Graph: Polygonshell (LinearRing) → (LineString.points) → CoordinateSequence → storage, and similarly for each hole.

3) Coordinate storage is usually the biggest driver

JTS uses CoordinateSequence to hold vertices. Two common implementations have very different object graphs:

A) CoordinateArraySequence (default)
Backed by a Coordinate[] coordinates array. [5]
Graph cost: one object for the sequence + one Java array + one Coordinate object per vertex.

B) PackedCoordinateSequence (memory-lean for many vertices)
Backed by a primitive array (double[] coords in PackedCoordinateSequence.Double, or float[] in .Float). [6]
Graph cost: one object for the sequence + one primitive array (much less per-vertex overhead).

Important retained-size detail: PackedCoordinateSequence also has a transient SoftReference cache (coordRef) to a materialized Coordinate[] created by toCoordinateArray(). [6] If that cache has been populated and not cleared, your geometry’s retained graph can temporarily include the cached Coordinate[] plus all the per-vertex Coordinate objects (even though the “real” storage is packed).


Sources: Geometry base fields [1], Point coordinate sequence field [2], LineString points field [3], Polygon shell/holes fields [4], CoordinateArraySequence backing array [5], PackedCoordinateSequence packed arrays + soft cache [6].


🏁 Script executed:

# Search for usage of getEstimatedMemorySizeInBytes
rg "getEstimatedMemorySizeInBytes" --type java

Repository: trinodb/trino

Length of output: 369


🏁 Script executed:

# Check if there's memory tracking or operator memory accounting that uses this
rg "PagesRTreeIndex\|GeometryWithPosition" --type java -B 2 -A 2 | head -60

Repository: trinodb/trino

Length of output: 39


🏁 Script executed:

find . -name "PagesSpatialIndexSupplier.java" -type f 2>/dev/null

Repository: trinodb/trino

Length of output: 137


🏁 Script executed:

cat -n core/trino-main/src/main/java/io/trino/operator/PagesSpatialIndexSupplier.java

Repository: trinodb/trino

Length of output: 9260


This retained-size estimate significantly underestimates JTS geometry memory, especially for composite shapes.

getNumPoints() * 24 accounts only for raw coordinate payload but misses the object graph for coordinate sequences, linear rings, component geometries, and envelope caching. For a Polygon with holes, this omits multiple LinearRing objects and their ring arrays; for MultiPolygon or GeometryCollection, it ignores all component Geometry objects. Since this feeds operator memory accounting (via PagesSpatialIndexSupplier.getEstimatedSize()), large multipart geometries will be materially under-reported and can exceed memory limits without triggering spill-over.

Either retain the serialized slice size alongside the geometry (available at deserialization in buildRTree() line 114) or use a deeper retained-size walk of the JTS object graph.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@core/trino-main/src/main/java/io/trino/operator/PagesRTreeIndex.java` around
lines 96 - 100, The current getEstimatedMemorySizeInBytes() in PagesRTreeIndex
underestimates memory by only using geometry.getNumPoints(); update it to
account for full retained size by either (A) storing and returning the
serialized Slice size captured during deserialization in buildRTree() (reuse the
existing serialized slice retained at build time and return INSTANCE_SIZE +
serializedSlice.length()), or (B) implement a deep retained-size traversal of
the JTS object graph starting from geometry (walk Geometry, LinearRing,
CoordinateSequence, component Geometries, and cached Envelope) and sum
object+array sizes, then return INSTANCE_SIZE + deepSize; ensure
PagesSpatialIndexSupplier.getEstimatedSize() reads this corrected value so
operator accounting reflects multipart/composite geometries accurately.

}

Expand Down Expand Up @@ -124,12 +125,9 @@ public PagesRTreeIndex(
checkArgument(!(constantRadius.isPresent() && radiusChannel.isPresent()), "Radius channel and constant radius are mutually exclusive");
}

private static Envelope getEnvelope(OGCGeometry ogcGeometry)
private static Envelope getEnvelope(Geometry geometry)
{
com.esri.core.geometry.Envelope env = new com.esri.core.geometry.Envelope();
ogcGeometry.getEsriGeometry().queryEnvelope(env);

return new Envelope(env.getXMin(), env.getXMax(), env.getYMin(), env.getYMax());
return geometry.getEnvelopeInternal();
}

/**
Expand All @@ -153,21 +151,21 @@ public int[] findJoinPositions(int position, Page probe, int probeGeometryChanne
int probePartition = probePartitionChannel.isPresent() ? INTEGER.getInt(probe.getBlock(probePartitionChannel.getAsInt()), position) : -1;

Slice slice = probeGeometryBlock.getSlice(probePosition);
OGCGeometry probeGeometry = deserialize(slice);
Geometry probeGeometry = deserialize(slice);
verifyNotNull(probeGeometry);
if (probeGeometry.isEmpty()) {
return EMPTY_ADDRESSES;
}

boolean probeIsPoint = probeGeometry instanceof OGCPoint;
boolean probeIsPoint = probeGeometry instanceof Point;

IntArrayList matchingPositions = new IntArrayList();

Envelope envelope = getEnvelope(probeGeometry);
rtree.query(envelope, item -> {
GeometryWithPosition geometryWithPosition = (GeometryWithPosition) item;
OGCGeometry buildGeometry = geometryWithPosition.getGeometry();
if (partitions.isEmpty() || (probePartition == geometryWithPosition.getPartition() && (probeIsPoint || (buildGeometry instanceof OGCPoint) || testReferencePoint(envelope, buildGeometry, probePartition)))) {
Geometry buildGeometry = geometryWithPosition.getGeometry();
if (partitions.isEmpty() || (probePartition == geometryWithPosition.getPartition() && (probeIsPoint || (buildGeometry instanceof Point) || testReferencePoint(envelope, buildGeometry, probePartition)))) {
if (radiusChannel == -1 && constantRadius.isEmpty()) {
if (spatialRelationshipTest.apply(buildGeometry, probeGeometry, OptionalDouble.empty())) {
matchingPositions.add(geometryWithPosition.getPosition());
Expand All @@ -188,7 +186,7 @@ public int[] findJoinPositions(int position, Page probe, int probeGeometryChanne
return matchingPositions.toIntArray();
}

private boolean testReferencePoint(Envelope probeEnvelope, OGCGeometry buildGeometry, int partition)
private boolean testReferencePoint(Envelope probeEnvelope, Geometry buildGeometry, int partition)
{
Envelope buildEnvelope = getEnvelope(buildGeometry);
Envelope intersection = buildEnvelope.intersection(probeEnvelope);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,6 @@
*/
package io.trino.operator;

import com.esri.core.geometry.Geometry;
import com.esri.core.geometry.GeometryCursor;
import com.esri.core.geometry.Operator;
import com.esri.core.geometry.OperatorFactoryLocal;
import com.esri.core.geometry.ogc.OGCGeometry;
import io.airlift.slice.Slice;
import io.airlift.units.DataSize;
import io.trino.Session;
Expand All @@ -30,6 +25,7 @@
import it.unimi.dsi.fastutil.longs.LongArrayList;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.index.strtree.AbstractNode;
import org.locationtech.jts.index.strtree.ItemBoundable;
import org.locationtech.jts.index.strtree.STRtree;
Expand All @@ -43,7 +39,7 @@

import static com.google.common.base.Verify.verifyNotNull;
import static io.airlift.slice.SizeOf.instanceSize;
import static io.trino.geospatial.serde.GeometrySerde.deserialize;
import static io.trino.geospatial.serde.JtsGeometrySerde.deserialize;
import static io.trino.operator.PagesSpatialIndex.EMPTY_INDEX;
import static io.trino.operator.SyntheticAddress.decodePosition;
import static io.trino.operator.SyntheticAddress.decodeSliceIndex;
Expand Down Expand Up @@ -101,7 +97,6 @@ public PagesSpatialIndexSupplier(
private static STRtree buildRTree(LongArrayList addresses, List<ObjectArrayList<Block>> channels, int geometryChannel, OptionalInt radiusChannel, OptionalDouble constantRadius, OptionalInt partitionChannel)
{
STRtree rtree = new STRtree();
Operator relateOperator = OperatorFactoryLocal.getInstance().getOperator(Operator.Type.Relate);

for (int position = 0; position < addresses.size(); position++) {
long pageAddress = addresses.getLong(position);
Expand All @@ -117,9 +112,9 @@ private static STRtree buildRTree(LongArrayList addresses, List<ObjectArrayList<
}

Slice slice = block.getSlice(valueBlockPosition);
OGCGeometry ogcGeometry = deserialize(slice);
verifyNotNull(ogcGeometry);
if (ogcGeometry.isEmpty()) {
Geometry geometry = deserialize(slice);
verifyNotNull(geometry);
if (geometry.isEmpty()) {
continue;
}

Expand All @@ -135,30 +130,30 @@ else if (radiusChannel.isPresent()) {
continue;
}

if (radiusChannel.isEmpty() && constantRadius.isEmpty()) {
// If radius is supplied, this is a distance query, for which our acceleration won't help.
accelerateGeometry(ogcGeometry, relateOperator);
}

int partition = -1;
if (partitionChannel.isPresent()) {
Block partitionBlock = channels.get(partitionChannel.getAsInt()).get(blockIndex);
partition = INTEGER.getInt(partitionBlock, blockPosition);
}

rtree.insert(getEnvelope(ogcGeometry, radius), new GeometryWithPosition(ogcGeometry, partition, position));
rtree.insert(getEnvelope(geometry, radius), new GeometryWithPosition(geometry, partition, position));
}

rtree.build();
return rtree;
}

private static Envelope getEnvelope(OGCGeometry ogcGeometry, double radius)
private static Envelope getEnvelope(Geometry geometry, double radius)
{
com.esri.core.geometry.Envelope envelope = new com.esri.core.geometry.Envelope();
ogcGeometry.getEsriGeometry().queryEnvelope(envelope);

return new Envelope(envelope.getXMin() - radius, envelope.getXMax() + radius, envelope.getYMin() - radius, envelope.getYMax() + radius);
Envelope envelope = geometry.getEnvelopeInternal();
if (radius == 0.0) {
return envelope;
}
return new Envelope(
envelope.getMinX() - radius,
envelope.getMaxX() + radius,
envelope.getMinY() - radius,
envelope.getMaxY() + radius);
}

private long computeMemorySizeInBytes(AbstractNode root)
Expand All @@ -174,19 +169,6 @@ private long computeMemorySizeInBytes(ItemBoundable item)
return ENVELOPE_INSTANCE_SIZE + ((GeometryWithPosition) item.getItem()).getEstimatedMemorySizeInBytes();
}

private static void accelerateGeometry(OGCGeometry ogcGeometry, Operator relateOperator)
{
// Recurse into GeometryCollections
GeometryCursor cursor = ogcGeometry.getEsriGeometryCursor();
while (true) {
Geometry esriGeometry = cursor.next();
if (esriGeometry == null) {
break;
}
relateOperator.accelerateGeometry(esriGeometry, null, Geometry.GeometryAccelerationDegree.enumMild);
}
}

// doesn't include memory used by channels and addresses which are shared with PagesIndex
public DataSize getEstimatedSize()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
*/
package io.trino.operator;

import com.esri.core.geometry.ogc.OGCGeometry;
import com.google.common.collect.ImmutableList;
import com.google.common.util.concurrent.ListenableFuture;
import io.airlift.slice.Slice;
Expand All @@ -24,6 +23,7 @@
import io.trino.spi.type.Type;
import io.trino.sql.gen.JoinFilterFunctionCompiler.JoinFilterFunctionFactory;
import io.trino.sql.planner.plan.PlanNodeId;
import org.locationtech.jts.geom.Geometry;

import java.util.HashMap;
import java.util.List;
Expand All @@ -42,7 +42,7 @@ public class SpatialIndexBuilderOperator
@FunctionalInterface
public interface SpatialPredicate
{
boolean apply(OGCGeometry probe, OGCGeometry build, OptionalDouble radius);
boolean apply(Geometry probe, Geometry build, OptionalDouble radius);
}

public static final class SpatialIndexBuilderOperatorFactory
Expand Down
11 changes: 0 additions & 11 deletions lib/trino-geospatial-toolkit/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,6 @@
</properties>

<dependencies>
<dependency>
<groupId>com.esri.geometry</groupId>
<artifactId>esri-geometry-api</artifactId>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
Expand All @@ -47,12 +42,6 @@
<artifactId>trino-spi</artifactId>
</dependency>

<dependency>
<groupId>jakarta.annotation</groupId>
<artifactId>jakarta.annotation-api</artifactId>
<optional>true</optional>
</dependency>

<dependency>
<groupId>org.locationtech.jts</groupId>
<artifactId>jts-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ private static GeometryType getForInternalLibraryName(String type)
return switch (type) {
case "Point" -> POINT;
case "MultiPoint" -> MULTI_POINT;
case "LineString" -> LINE_STRING;
case "LineString", "LinearRing" -> LINE_STRING;
case "MultiLineString" -> MULTI_LINE_STRING;
case "Polygon" -> POLYGON;
case "MultiPolygon" -> MULTI_POLYGON;
Expand Down
Loading