Skip to content

[C++][Gandiva] Projector fails with SIGSEGV if calling function from math.h on AArch64 #40839

@DenisTarasyuk

Description

@DenisTarasyuk

Using Java to build Projector and evalute it. Projector calls function sinh with double precision Float argument.
Running built jar inside docker on ubuntu:latest aarch64 image. Using openjdk 11 jdk and temurin 11 jdk.
On openjdk I am getting valid result.
ON temurin jdk I am getting SIGSEGV (0xb) at pc=0x0000000000000000, pid=6522, tid=6523
Same reproduces on aarch64 docker image openjdk:11
Issue seems to be related to libc or some java libs dependencies.
Also reproduces for functions: sqrt,cbrt,exp,log,log10,sin,asin,cos,acos,tan,atan,cosh,tanh,atan2,cot,radians,degrees,power.
I have used debugger to investigate llvm generated projector code and found that when I get SIGSEGV generated function has code like this:

expr_0_0:
0000ffff4cd52000:   stp     x30, x21, [sp, #-32]!  
0000ffff4cd52004:   ldr     x8, [x1, #24]. 
0000ffff4cd52008:   cmp     x6, #0x1
0000ffff4cd5200c:   ldr     x9, [x0, #24]
0000ffff4cd52010:   stp     x20, x19, [sp, #16]
0000ffff4cd52014:   csinc   x19, x6, xzr, gt
0000ffff4cd52018:   ldr     x20, [x0]
0000ffff4cd5201c:   add     x21, x9, x8, lsl #3
0000ffff4cd52020:   ldr     x0, [x21], #8
0000ffff4cd52024:   bl      0xffff4cd5204c
0000ffff4cd52028:   bl      0xffff4cd52060
0000ffff4cd5202c:   bl      0xffff4cd52074
0000ffff4cd52030:   subs    x19, x19, #0x1
0000ffff4cd52034:   str     d0, [x20], #8
0000ffff4cd52038:   b.ne    0xffff4cd52020 <expr_0_0+32>  // b.any
0000ffff4cd5203c:   ldp     x20, x19, [sp, #16]
0000ffff4cd52040:   mov     w0, wzr
0000ffff4cd52044:   ldp     x30, x21, [sp], #32
0000ffff4cd52048:   ret     
0000ffff4cd5204c:   movz    x16, #0x0, lsl #48
0000ffff4cd52050:   movk    x16, #0x0, lsl #32
0000ffff4cd52054:   movk    x16, #0x0, lsl #16
0000ffff4cd52058:   movk    x16, #0x0
0000ffff4cd5205c:   br      x16
0000ffff4cd52060:   movz    x16, #0x0, lsl #48
0000ffff4cd52064:   movk    x16, #0x0, lsl #32
0000ffff4cd52068:   movk    x16, #0x0, lsl #16
0000ffff4cd5206c:   movk    x16, #0x0
0000ffff4cd52070:   br      x16
0000ffff4cd52074:   movz    x16, #0x0, lsl #48
0000ffff4cd52078:   movk    x16, #0x0, lsl #32
0000ffff4cd5207c:   movk    x16, #0x0, lsl #16
0000ffff4cd52080:   movk    x16, #0x0
0000ffff4cd52084:   br      x16

Here #0x0 is where actual address of external functions should be but it is not populated so code jumps to 0x0000000000000000.
Here is example of valid llvm function that works:

expr_0_0:
0000ffff86635000:   stp     x30, x21, [sp, #-32]!
0000ffff86635004:   ldr     x8, [x1, #24]
0000ffff86635008:   cmp     x6, #0x1
0000ffff8663500c:   ldr     x9, [x0, #24]
0000ffff86635010:   stp     x20, x19, [sp, #16]
0000ffff86635014:   csinc   x19, x6, xzr, gt
0000ffff86635018:   ldr     x20, [x0]
0000ffff8663501c:   add     x21, x9, x8, lsl #3
0000ffff86635020:   ldr     x0, [x21], #8
0000ffff86635024:   bl      0xffff8663504c
0000ffff86635028:   bl      0xffff86635060
0000ffff8663502c:   bl      0xffff86635074
0000ffff86635030:   subs    x19, x19, #0x1
0000ffff86635034:   str     d0, [x20], #8
0000ffff86635038:   b.ne    0xffff86635020 <expr_0_0+32>  // b.any
0000ffff8663503c:   ldp     x20, x19, [sp, #16]
0000ffff86635040:   mov     w0, wzr
0000ffff86635044:   ldp     x30, x21, [sp], #32
0000ffff86635048:   ret     
0000ffff8663504c:   movz    x16, #0x0, lsl #48
0000ffff86635050:   movk    x16, #0xffff, lsl #32
0000ffff86635054:   movk    x16, #0x8661, lsl #16
0000ffff86635058:   movk    x16, #0xa4a0
0000ffff8663505c:   br      x16
0000ffff86635060:   movz    x16, #0x0, lsl #48
0000ffff86635064:   movk    x16, #0xffff, lsl #32
0000ffff86635068:   movk    x16, #0x8664, lsl #16
0000ffff8663506c:   movk    x16, #0xe6c0
0000ffff86635070:   br      x16
0000ffff86635074:   movz    x16, #0x0, lsl #48
0000ffff86635078:   movk    x16, #0xffff, lsl #32
0000ffff8663507c:   movk    x16, #0x8661, lsl #16
0000ffff86635080:   movk    x16, #0xb4d0
0000ffff86635084:   br      x16

This does not seem to happen on x86. Both openjdk and temurin works there.
Here is example java code I have used to reproduce:


    package org.example;
    import com.google.common.collect.Lists;
    import java.math.BigDecimal;
    import java.util.ArrayList;
    import java.util.Arrays;
    import java.util.List;

    import org.apache.arrow.gandiva.evaluator.Projector;
    import org.apache.arrow.gandiva.exceptions.GandivaException;
    import org.apache.arrow.gandiva.expression.ExpressionTree;
    import org.apache.arrow.gandiva.expression.TreeBuilder;
    import org.apache.arrow.memory.ArrowBuf;
    import org.apache.arrow.memory.BufferAllocator;
    import org.apache.arrow.memory.RootAllocator;
    import org.apache.arrow.vector.DecimalVector;
    import org.apache.arrow.vector.Float8Vector;
    import org.apache.arrow.vector.ValueVector;
    import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
    import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
    import org.apache.arrow.vector.types.FloatingPointPrecision;
    import org.apache.arrow.vector.types.pojo.ArrowType.Decimal;
    import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
    import org.apache.arrow.vector.types.pojo.Field;
	       import org.apache.arrow.vector.types.pojo.Schema;

    public class Main {

    public static void main(String[] _args) throws GandivaException {
        BufferAllocator allocator= new RootAllocator(Long.MAX_VALUE);
        FloatingPoint fPoint64 = new FloatingPoint(FloatingPointPrecision.DOUBLE);
        Field aFp = Field.nullable("a", fPoint64);
        List<Field> args = Lists.newArrayList(aFp);

        List<ExpressionTree> exprs = new ArrayList<>(
                Arrays.asList(
                        TreeBuilder.makeExpression(
                                TreeBuilder.makeFunction("sinh",
                                        Lists.newArrayList(TreeBuilder.makeField(aFp)),
                                        fPoint64),
                                Field.nullable("f_field", fPoint64))
                )
        );

        Schema schema = new Schema(args);
        Projector eval = Projector.make(schema, exprs);

        List<ValueVector> output = null;
        ArrowRecordBatch batch = null;
        try {
            int numRows = 4;
            String[] aValues = new String[]{"1.23", "1.58", "-1.23", "-1.58"};

            Float8Vector valuesa = floatVector(allocator, aValues);
            batch =
                    new ArrowRecordBatch(
                            numRows,
                            Lists.newArrayList(new ArrowFieldNode(numRows, 0)),
                            Lists.newArrayList(valuesa.getValidityBuffer(), valuesa.getDataBuffer()));


            // Allocate output vectors.
            output = new ArrayList<>(
                    Arrays.asList(
                            new Float8Vector("f_field", allocator)
                    )
            );
            for (ValueVector v : output) {
                v.allocateNew();
            }

            // evaluate expressions.
            eval.evaluate(batch, output);

            // compare the outputs.
            for (int idx = 0; idx < output.size(); ++idx) {
                Float8Vector resultVector = (Float8Vector) output.get(idx);

                for (int i = 0; i < numRows; i++) {
                    System.out.println("value: "+resultVector.getObject(i));
                }
            }
        } finally {
            // free buffers
            if (batch != null) {
                releaseRecordBatch(batch);
            }
            if (output != null) {
                releaseValueVectors(output);
            }
            eval.close();
            allocator.close();
        }
    }

    static Float8Vector floatVector(BufferAllocator allocator, String[] values) {
        Float8Vector vector = new Float8Vector("double" + Math.random(), allocator);
        vector.allocateNew();
        for (int i = 0; i < values.length; i++) {
            Double doubleValue = new Double(values[i]);
            vector.setSafe(i, doubleValue);
        }

        vector.setValueCount(values.length);
        return vector;
    }

    static void releaseRecordBatch(ArrowRecordBatch recordBatch) {
        // There are 2 references to the buffers
        // One in the recordBatch - release that by calling close()
        // One in the allocator - release that explicitly
        List<ArrowBuf> buffers = recordBatch.getBuffers();
        recordBatch.close();
        for (ArrowBuf buf : buffers) {
            buf.getReferenceManager().release();
        }
    }

    static void releaseValueVectors(List<ValueVector> valueVectors) {
        for (ValueVector valueVector : valueVectors) {
            valueVector.close();
        }
    }
}

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>test</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <maven.compiler.source>11</maven.compiler.source>
        <maven.compiler.target>11</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <arrow.version>14.0.2</arrow.version>
    </properties>
    <dependencies>

        <dependency>
            <groupId>com.googlecode.json-simple</groupId>
            <artifactId>json-simple</artifactId>
            <version>1.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.arrow.gandiva</groupId>
            <artifactId>arrow-gandiva</artifactId>
            <version>${arrow.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.arrow</groupId>
            <artifactId>arrow-vector</artifactId>
            <version>${arrow.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.arrow</groupId>
            <artifactId>arrow-memory-netty</artifactId>
            <version>${arrow.version}</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.4</version>
            <scope>test</scope>
        </dependency>

    </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>3.5.2</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <transformers>
                                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <mainClass>org.example.Main</mainClass>
                                </transformer>
                            </transformers>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

</project>

Here is ldd output:
openjdk

	linux-vdso.so.1 (0x0000ffffa48da000)
	libz.so.1 => /lib/aarch64-linux-gnu/libz.so.1 (0x0000ffffa4850000)
	libjli.so => /usr/lib/jvm/java-11-openjdk-arm64/bin/../lib/jli/libjli.so (0x0000ffffa4820000)
	libc.so.6 => /lib/aarch64-linux-gnu/libc.so.6 (0x0000ffffa4670000)
	/lib/ld-linux-aarch64.so.1 (0x0000ffffa48a1000)

temurin jdk

	linux-vdso.so.1 (0x0000ffff9f62d000)
	libpthread.so.0 => /lib/aarch64-linux-gnu/libpthread.so.0 (0x0000ffff9f5b0000)
	libjli.so => /usr/lib/jvm/temurin-11-jdk-arm64/bin/../lib/jli/libjli.so (0x0000ffff9f570000)
	libdl.so.2 => /lib/aarch64-linux-gnu/libdl.so.2 (0x0000ffff9f550000)
	libc.so.6 => /lib/aarch64-linux-gnu/libc.so.6 (0x0000ffff9f3a0000)
	/lib/ld-linux-aarch64.so.1 (0x0000ffff9f5f4000)

Component(s)

C++ - Gandiva

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions