From c57d988602810906d263802afcee9ff6d16b98a8 Mon Sep 17 00:00:00 2001
From: Yichao Yu <yyc1992@gmail.com>
Date: Mon, 26 Sep 2016 20:29:34 -0400
Subject: [PATCH] Fix ARM and AArch64 ABI

Also add related ccall test and test both mutable and immutable types.
---
 src/ccall.cpp        |  42 +++++++-
 src/ccalltest.c      |  29 ++++++
 src/julia_internal.h |  21 ++++
 test/ccall.jl        | 238 ++++++++++++++++++++++++++++++++++---------
 4 files changed, 279 insertions(+), 51 deletions(-)

diff --git a/src/ccall.cpp b/src/ccall.cpp
index 2c3182d1c1ca17..9eac3006824ea6 100644
--- a/src/ccall.cpp
+++ b/src/ccall.cpp
@@ -409,10 +409,26 @@ Value *llvm_type_rewrite(Value *v, Type *from_type, Type *target_type,
     // one or both of from_type and target_type is a VectorType or AggregateType
     // LLVM doesn't allow us to cast these values directly, so
     // we need to use this alloca copy trick instead
-    // NOTE: it is assumed that the ABI has ensured that sizeof(from_type) == sizeof(target_type)
-    Value *mem = emit_static_alloca(target_type, ctx);
-    builder.CreateStore(v, builder.CreatePointerCast(mem, from_type->getPointerTo()));
-    return builder.CreateLoad(mem);
+    // On ARM and AArch64, the ABI requires casting through memory to different
+    // sizes.
+    Value *from;
+    Value *to;
+    const DataLayout &DL =
+#if JL_LLVM_VERSION >= 30600
+        jl_ExecutionEngine->getDataLayout();
+#else
+    *jl_ExecutionEngine->getDataLayout();
+#endif
+    if (DL.getTypeAllocSize(target_type) >= DL.getTypeAllocSize(from_type)) {
+        to = emit_static_alloca(target_type, ctx);
+        from = builder.CreatePointerCast(to, from_type->getPointerTo());
+    }
+    else {
+        from = emit_static_alloca(from_type, ctx);
+        to = builder.CreatePointerCast(from, target_type->getPointerTo());
+    }
+    builder.CreateStore(v, from);
+    return builder.CreateLoad(to);
 }
 
 // --- argument passing and scratch space utilities ---
@@ -1839,8 +1855,24 @@ static jl_cgval_t emit_ccall(jl_value_t **args, size_t nargs, jl_codectx_t *ctx)
             jl_cgval_t newst = emit_new_struct(rt, 1, NULL, ctx); // emit a new, empty struct
             assert(newst.typ != NULL && "Type was not concrete");
             assert(newst.isboxed);
+            size_t rtsz = jl_datatype_size(rt);
+            assert(rtsz > 0);
+            int boxalign = jl_gc_alignment(rtsz);
+#ifndef NDEBUG
+            const DataLayout &DL =
+#if JL_LLVM_VERSION >= 30600
+                jl_ExecutionEngine->getDataLayout();
+#else
+            *jl_ExecutionEngine->getDataLayout();
+#endif
+            // ARM and AArch64 can use a LLVM type larger than the julia
+            // type. However, the LLVM type size should be no larger than
+            // the GC allocation size. (multiple of `sizeof(void*)`)
+            assert(DL.getTypeStoreSize(lrt) <= LLT_ALIGN(jl_datatype_size(rt),
+                                                         boxalign));
+#endif
             // copy the data from the return value to the new struct
-            tbaa_decorate(newst.tbaa, builder.CreateAlignedStore(result, emit_bitcast(newst.V, prt->getPointerTo()), 16)); // julia gc is aligned 16
+            tbaa_decorate(newst.tbaa, builder.CreateAlignedStore(result, emit_bitcast(newst.V, prt->getPointerTo()), boxalign));
             return newst;
         }
         else if (jlrt != prt) {
diff --git a/src/ccalltest.c b/src/ccalltest.c
index 02e40f4fb9c1ac..9f7d632458f33f 100644
--- a/src/ccalltest.c
+++ b/src/ccalltest.c
@@ -191,6 +191,17 @@ typedef struct {
     double a,b,c;
 } struct16;
 
+typedef struct {
+    int8_t a;
+    int16_t b;
+} struct17;
+
+typedef struct {
+    int8_t a;
+    int8_t b;
+    int8_t c;
+} struct18;
+
 typedef struct {
     jint x;
     jint y;
@@ -467,6 +478,24 @@ JL_DLLEXPORT struct16 test_16(struct16 a, float b) {
     return a;
 }
 
+JL_DLLEXPORT struct17 test_17(struct17 a, int8_t b) {
+    //Unpack a struct with non-obvious packing requirements
+    if (verbose) fprintf(stderr,"%d %d & %d\n", (int)a.a, (int)a.b, (int)b);
+    a.a += b*1;
+    a.b -= b*2;
+    return a;
+}
+
+JL_DLLEXPORT struct18 test_18(struct18 a, int8_t b) {
+    //Unpack a struct with non-obvious packing requirements
+    if (verbose) fprintf(stderr,"%d %d %d & %d\n",
+                         (int)a.a, (int)a.b, (int)a.c, (int)b);
+    a.a += b*1;
+    a.b -= b*2;
+    a.c += b*3;
+    return a;
+}
+
 // Note for AArch64:
 // `i128` is a native type on aarch64 so the type here is wrong.
 // However, it happens to have the same calling convention with `[2 x i64]`
diff --git a/src/julia_internal.h b/src/julia_internal.h
index 42f495dfad49c0..0b46f698185e37 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -92,6 +92,27 @@ static const int jl_gc_sizeclasses[JL_GC_N_POOLS] = {
 //    64,   32,  160,   64,   16,   64,  112,  128, bytes lost
 };
 
+STATIC_INLINE int jl_gc_alignment(size_t sz)
+{
+    if (sz == 0)
+        return sizeof(void*);
+#ifdef _P64
+    (void)sz;
+    return 16;
+#elif defined(_CPU_ARM_) || defined(_CPU_PPC_)
+    return rtsz < 4 : 8 : 16;
+#else
+    // szclass 8
+    if (sz < 4)
+        return 8;
+    // szclass 12
+    if (sz < 8)
+        return 4;
+    // szclass 16+
+    return 16;
+#endif
+}
+
 STATIC_INLINE int JL_CONST_FUNC jl_gc_szclass(size_t sz)
 {
 #ifdef _P64
diff --git a/test/ccall.jl b/test/ccall.jl
index 56a6b1bf3f5be3..3fb2ecf54edad8 100644
--- a/test/ccall.jl
+++ b/test/ccall.jl
@@ -111,14 +111,19 @@ type Struct1
     x::Float32
     y::Float64
 end
+immutable Struct1I
+    x::Float32
+    y::Float64
+end
 copy(a::Struct1) = Struct1(a.x, a.y)
+copy(a::Struct1I) = a
 
-let a, b, a2, x
-    a = Struct1(352.39422f23, 19.287577)
+function test_struct1{Struct}(::Type{Struct})
+    a = Struct(352.39422f23, 19.287577)
     b = Float32(123.456)
 
     a2 = copy(a)
-    x = ccall((:test_1, libccalltest), Struct1, (Struct1, Float32), a2, b)
+    x = ccall((:test_1, libccalltest), Struct, (Struct, Float32), a2, b)
 
     @test a2.x == a.x && a2.y == a.y
     @test !(a2 === x)
@@ -126,6 +131,8 @@ let a, b, a2, x
     @test x.x ≈ a.x + 1*b
     @test x.y ≈ a.y - 2*b
 end
+test_struct1(Struct1)
+test_struct1(Struct1I)
 
 let a, b, x, y
     a = Complex{Int32}(Int32(10),Int32(31))
@@ -161,17 +168,24 @@ type Struct4
     y::Int32
     z::Int32
 end
+immutable Struct4I
+    x::Int32
+    y::Int32
+    z::Int32
+end
 
-let a, b, x
-    a = Struct4(-512275808,882558299,-2133022131)
+function test_struct4{Struct}(::Type{Struct})
+    a = Struct(-512275808,882558299,-2133022131)
     b = Int32(42)
 
-    x = ccall((:test_4, libccalltest), Struct4, (Struct4,Int32), a, b)
+    x = ccall((:test_4, libccalltest), Struct, (Struct, Int32), a, b)
 
     @test x.x == a.x+b*1
     @test x.y == a.y-b*2
     @test x.z == a.z+b*3
 end
+test_struct4(Struct4)
+test_struct4(Struct4I)
 
 type Struct5
     x::Int32
@@ -179,80 +193,113 @@ type Struct5
     z::Int32
     a::Int32
 end
+immutable Struct5I
+    x::Int32
+    y::Int32
+    z::Int32
+    a::Int32
+end
 
-let a, b, x
-    a = Struct5(1771319039, 406394736, -1269509787, -745020976)
+function test_struct5{Struct}(::Type{Struct})
+    a = Struct(1771319039, 406394736, -1269509787, -745020976)
     b = Int32(42)
 
-    x = ccall((:test_5, libccalltest), Struct5, (Struct5,Int32), a, b)
+    x = ccall((:test_5, libccalltest), Struct, (Struct, Int32), a, b)
 
     @test x.x == a.x+b*1
     @test x.y == a.y-b*2
     @test x.z == a.z+b*3
     @test x.a == a.a-b*4
 end
+test_struct5(Struct5)
+test_struct5(Struct5I)
 
 type Struct6
     x::Int64
     y::Int64
     z::Int64
 end
+immutable Struct6I
+    x::Int64
+    y::Int64
+    z::Int64
+end
 
-let a, b, x
-    a = Struct6(-654017936452753226, -5573248801240918230, -983717165097205098)
+function test_struct6{Struct}(::Type{Struct})
+    a = Struct(-654017936452753226, -5573248801240918230, -983717165097205098)
     b = Int64(42)
 
-    x = ccall((:test_6, libccalltest), Struct6, (Struct6, Int64), a, b)
+    x = ccall((:test_6, libccalltest), Struct, (Struct, Int64), a, b)
 
     @test x.x == a.x+b*1
     @test x.y == a.y-b*2
     @test x.z == a.z+b*3
 end
+test_struct6(Struct6)
+test_struct6(Struct6I)
 
 type Struct7
     x::Int64
     y::Cchar
 end
+immutable Struct7I
+    x::Int64
+    y::Cchar
+end
 
-let a, b, x
-    a = Struct7(-384082741977533896, 'h')
+function test_struct7{Struct}(::Type{Struct})
+    a = Struct(-384082741977533896, 'h')
     b = Int8(42)
 
-    x = ccall((:test_7, libccalltest), Struct7, (Struct7,Int8), a, b)
+    x = ccall((:test_7, libccalltest), Struct, (Struct, Int8), a, b)
 
     @test x.x == a.x+Int(b)*1
     @test x.y == a.y-Int(b)*2
 end
+test_struct7(Struct7)
+test_struct7(Struct7I)
 
 type Struct8
     x::Int32
     y::Cchar
 end
+immutable Struct8I
+    x::Int32
+    y::Cchar
+end
 
-let a, b, x
-    a = Struct8(-384082896, 'h')
+function test_struct8{Struct}(::Type{Struct})
+    a = Struct(-384082896, 'h')
     b = Int8(42)
 
-    r8 = ccall((:test_8, libccalltest), Struct8, (Struct8,Int8), a, b)
+    r8 = ccall((:test_8, libccalltest), Struct, (Struct, Int8), a, b)
 
     @test r8.x == a.x+b*1
     @test r8.y == a.y-b*2
 end
+test_struct8(Struct8)
+test_struct8(Struct8I)
 
 type Struct9
     x::Int32
     y::Int16
 end
+immutable Struct9I
+    x::Int32
+    y::Int16
+end
 
-let a, b, x
-    a = Struct9(-394092996, -3840)
+function test_struct9{Struct}(::Type{Struct})
+    a = Struct(-394092996, -3840)
     b = Int16(42)
 
-    x = ccall((:test_9, libccalltest), Struct9, (Struct9,Int16), a, b)
+    x = ccall((:test_9, libccalltest), Struct, (Struct, Int16), a, b)
 
     @test x.x == a.x+b*1
     @test x.y == a.y-b*2
 end
+test_struct9(Struct9)
+test_struct9(Struct9I)
 
 type Struct10
     x::Cchar
@@ -260,89 +307,125 @@ type Struct10
     z::Cchar
     a::Cchar
 end
+immutable Struct10I
+    x::Cchar
+    y::Cchar
+    z::Cchar
+    a::Cchar
+end
 
-let a, b, x
-    a = Struct10('0', '1', '2', '3')
+function test_struct10{Struct}(::Type{Struct})
+    a = Struct('0', '1', '2', '3')
     b = Int8(2)
 
-    x = ccall((:test_10, libccalltest), Struct10, (Struct10,Int8), a, b)
+    x = ccall((:test_10, libccalltest), Struct, (Struct, Int8), a, b)
 
     @test x.x == a.x+b*1
     @test x.y == a.y-b*2
     @test x.z == a.z+b*3
     @test x.a == a.a-b*4
 end
+test_struct10(Struct10)
+test_struct10(Struct10I)
 
 type Struct11
     x::Complex64
 end
+immutable Struct11I
+    x::Complex64
+end
 
-let a, b, x
-    a = Struct11(0.8877077f0 + 0.4591081f0im)
+function test_struct11{Struct}(::Type{Struct})
+    a = Struct(0.8877077f0 + 0.4591081f0im)
     b = Float32(42)
 
-    x = ccall((:test_11, libccalltest), Struct11, (Struct11,Float32), a, b)
+    x = ccall((:test_11, libccalltest), Struct, (Struct, Float32), a, b)
 
     @test x.x ≈ a.x + b*1 - b*2im
 end
+test_struct11(Struct11)
+test_struct11(Struct11I)
 
 type Struct12
     x::Complex64
     y::Complex64
 end
+immutable Struct12I
+    x::Complex64
+    y::Complex64
+end
 
-let a, b, x
-    a = Struct12(0.8877077f5 + 0.4591081f2im, 0.0004842868f0 - 6982.3265f3im)
+function test_struct12{Struct}(::Type{Struct})
+    a = Struct(0.8877077f5 + 0.4591081f2im, 0.0004842868f0 - 6982.3265f3im)
     b = Float32(42)
 
-    x = ccall((:test_12, libccalltest), Struct12, (Struct12,Float32), a, b)
+    x = ccall((:test_12, libccalltest), Struct, (Struct, Float32), a, b)
 
     @test x.x ≈ a.x + b*1 - b*2im
     @test x.y ≈ a.y + b*3 - b*4im
 end
+test_struct12(Struct12)
+test_struct12(Struct12I)
 
 type Struct13
     x::Complex128
 end
+immutable Struct13I
+    x::Complex128
+end
 
-let a, b, x
-    a = Struct13(42968.97560380495 - 803.0576845153616im)
+function test_struct13{Struct}(::Type{Struct})
+    a = Struct(42968.97560380495 - 803.0576845153616im)
     b = Float64(42)
 
-    x = ccall((:test_13, libccalltest), Struct13, (Struct13,Float64), a, b)
+    x = ccall((:test_13, libccalltest), Struct, (Struct, Float64), a, b)
 
     @test x.x ≈ a.x + b*1 - b*2im
 end
+test_struct13(Struct13)
+test_struct13(Struct13I)
 
 type Struct14
     x::Float32
     y::Float32
 end
+immutable Struct14I
+    x::Float32
+    y::Float32
+end
 
-let a, b, x
-    a = Struct14(0.024138331f0, 0.89759064f32)
+function test_struct14{Struct}(::Type{Struct})
+    a = Struct(0.024138331f0, 0.89759064f32)
     b = Float32(42)
 
-    x = ccall((:test_14, libccalltest), Struct14, (Struct14,Float32), a, b)
+    x = ccall((:test_14, libccalltest), Struct, (Struct, Float32), a, b)
 
     @test x.x ≈ a.x + b*1
     @test x.y ≈ a.y - b*2
 end
+test_struct14(Struct14)
+test_struct14(Struct14I)
 
 type Struct15
     x::Float64
     y::Float64
 end
+immutable Struct15I
+    x::Float64
+    y::Float64
+end
 
-let a, b, x
-    a = Struct15(4.180997967273657, -0.404218594294923)
+function test_struct15{Struct}(::Type{Struct})
+    a = Struct(4.180997967273657, -0.404218594294923)
     b = Float64(42)
 
-    x = ccall((:test_15, libccalltest), Struct15, (Struct15,Float64), a, b)
+    x = ccall((:test_15, libccalltest), Struct, (Struct, Float64), a, b)
 
     @test x.x ≈ a.x + b*1
     @test x.y ≈ a.y - b*2
 end
+test_struct15(Struct15)
+test_struct15(Struct15I)
 
 type Struct16
     x::Float32
@@ -352,13 +435,21 @@ type Struct16
     b::Float64
     c::Float64
 end
+immutable Struct16I
+    x::Float32
+    y::Float32
+    z::Float32
+    a::Float64
+    b::Float64
+    c::Float64
+end
 
-let a, b, x
-    a = Struct16(0.1604656f0, 0.6297606f0, 0.83588994f0,
-                 0.6460273620993535, 0.9472692581106656, 0.47328535437352093)
+function test_struct16{Struct}(::Type{Struct})
+    a = Struct(0.1604656f0, 0.6297606f0, 0.83588994f0,
+               0.6460273620993535, 0.9472692581106656, 0.47328535437352093)
     b = Float32(42)
 
-    x = ccall((:test_16, libccalltest), Struct16, (Struct16,Float32), a, b)
+    x = ccall((:test_16, libccalltest), Struct, (Struct, Float32), a, b)
 
     @test x.x ≈ a.x + b*1
     @test x.y ≈ a.y - b*2
@@ -367,6 +458,53 @@ let a, b, x
     @test x.b ≈ a.b + b*5
     @test x.c ≈ a.c - b*6
 end
+test_struct16(Struct16)
+test_struct16(Struct16I)
+
+type Struct17
+    a::Int8
+    b::Int16
+end
+immutable Struct17I
+    a::Int8
+    b::Int16
+end
+
+function test_struct17{Struct}(::Type{Struct})
+    a = Struct(2, 10)
+    b = Int8(2)
+
+    x = ccall((:test_17, libccalltest), Struct, (Struct, Int8), a, b)
+
+    @test x.a == a.a + b * 1
+    @test x.b == a.b - b * 2
+end
+test_struct17(Struct17)
+test_struct17(Struct17I)
+
+type Struct18
+    a::Int8
+    b::Int8
+    c::Int8
+end
+immutable Struct18I
+    a::Int8
+    b::Int8
+    c::Int8
+end
+
+function test_struct18{Struct}(::Type{Struct})
+    a = Struct(2, 10, -3)
+    b = Int8(2)
+
+    x = ccall((:test_18, libccalltest), Struct, (Struct, Int8), a, b)
+
+    @test x.a == a.a + b * 1
+    @test x.b == a.b - b * 2
+    @test x.c == a.c + b * 3
+end
+test_struct18(Struct18)
+test_struct18(Struct18I)
 
 let a, b, x
     a = Int128(0x7f00123456789abc)<<64 + typemax(UInt64)
@@ -383,19 +521,27 @@ type Struct_Big
     y::Int
     z::Int8
 end
+immutable Struct_BigI
+    x::Int
+    y::Int
+    z::Int8
+end
 copy(a::Struct_Big) = Struct_Big(a.x, a.y, a.z)
+copy(a::Struct_BigI) = a
 
-let a, a2, x
-    a = Struct_Big(424,-5,Int8('Z'))
+function test_struct_big{Struct}(::Type{Struct})
+    a = Struct(424,-5,Int8('Z'))
     a2 = copy(a)
 
-    x = ccall((:test_big, libccalltest), Struct_Big, (Struct_Big,), a2)
+    x = ccall((:test_big, libccalltest), Struct, (Struct,), a2)
 
     @test a2.x == a.x && a2.y == a.y && a2.z == a.z
     @test x.x == a.x + 1
     @test x.y == a.y - 2
     @test x.z == a.z - Int('A')
 end
+test_struct_big(Struct_Big)
+test_struct_big(Struct_BigI)
 
 const Struct_huge1a = NTuple{8, Int64}
 const Struct_huge1b = NTuple{9, Int64}