-
Notifications
You must be signed in to change notification settings - Fork 212
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add addition, subtraction, multiplication, and compare operations for f128
#606
Conversation
2e9bb45
to
7871c81
Compare
Powerpc seems to be hitting a SIGILL on Also looks like symbol names may be different, I need to double check https://gcc.gnu.org/wiki/Ieee128PowerPC |
Ok, some more info. Compiling C for 32-bit powerpc seems to always emit Compiling C for powerpc64 seems to use an ifunc resolver for |
Some asm for anyone who understands it better: Assembly generated from Rust (incorrect result)0000000000010d00 <.add_entry>:
10d00: 7c 08 02 a6 mflr r0
10d04: f8 21 ff 91 stdu r1,-112(r1)
10d08: f8 01 00 80 std r0,128(r1)
10d0c: 4b ff c6 95 bl d3a0 <00000143.plt_call.__addkf3>
10d10: e8 41 00 28 ld r2,40(r1)
10d14: 38 21 00 70 addi r1,r1,112
10d18: e8 01 00 10 ld r0,16(r1)
10d1c: 7c 08 03 a6 mtlr r0
10d20: 4e 80 00 20 blr
000000000000d3a0 <00000143.plt_call.__addkf3>:
d3a0: f8 41 00 28 std r2,40(r1)
d3a4: 3d 62 ff ff addis r11,r2,-1
d3a8: e9 8b 7f 58 ld r12,32600(r11)
d3ac: 7d 89 03 a6 mtctr r12
d3b0: e8 4b 7f 60 ld r2,32608(r11)
d3b4: 4e 80 04 20 bctr
0000000000056790 <.__addkf3_resolve>:
56790: 81 2d 8f 9c lwz r9,-28772(r13)
56794: 75 29 00 40 andis. r9,r9,64
56798: 41 82 00 18 beq 567b0 <.__addkf3_resolve+0x20>
5679c: e8 62 80 10 ld r3,-32752(r2)
567a0: 4e 80 00 20 blr
567a4: 60 00 00 00 nop
567a8: 60 00 00 00 nop
567ac: 60 42 00 00 ori r2,r2,0
567b0: e8 62 80 18 ld r3,-32744(r2)
567b4: 4e 80 00 20 blr
...
567c4: 60 00 00 00 nop
567c8: 60 00 00 00 nop
567cc: 60 42 00 00 ori r2,r2,0
000000000005e6e0 <.__addkf3_hw>:
5e6e0: fc 42 18 08 xsaddqp v2,v2,v3
5e6e4: 4e 80 00 20 blr
...
5e6f4: 60 00 00 00 nop
5e6f8: 60 00 00 00 nop
5e6fc: 60 00 00 00 nop
0000000000057050 <.__addkf3_sw>:
57050: fb 41 ff d0 std r26,-48(r1)
57054: fb 61 ff d8 std r27,-40(r1)
57058: fb 81 ff e0 std r28,-32(r1)
5705c: fb a1 ff e8 std r29,-24(r1)
57060: fb c1 ff f0 std r30,-16(r1)
57064: fb e1 ff f8 std r31,-8(r1)
57068: f8 21 ff 41 stdu r1,-192(r1)
5706c: 39 21 00 70 addi r9,r1,112
57070: 39 41 00 70 addi r10,r1,112
// ... full sw implementation Assembly generated from C (correct result)0000000010000af8 <.add_entry>:
10000af8: 7c 08 02 a6 mflr r0
10000afc: f8 01 00 10 std r0,16(r1)
10000b00: fb e1 ff f8 std r31,-8(r1)
10000b04: f8 21 ff 81 stdu r1,-128(r1)
10000b08: 7c 3f 0b 78 mr r31,r1
10000b0c: 39 20 00 30 li r9,48
10000b10: 39 5f 00 80 addi r10,r31,128
10000b14: 7c 4a 4f 99 stxvd2x vs34,r10,r9
10000b18: 39 20 00 40 li r9,64
10000b1c: 39 5f 00 80 addi r10,r31,128
10000b20: 7c 6a 4f 99 stxvd2x vs35,r10,r9
10000b24: 39 20 00 40 li r9,64
10000b28: 39 5f 00 80 addi r10,r31,128
10000b2c: 7c 6a 4e 99 lxvd2x vs35,r10,r9
10000b30: 39 20 00 30 li r9,48
10000b34: 39 5f 00 80 addi r10,r31,128
10000b38: 7c 4a 4e 99 lxvd2x vs34,r10,r9
10000b3c: 4b ff fb a5 bl 100006e0 <00000019.plt_call.__addkf3>
10000b40: e8 41 00 28 ld r2,40(r1)
10000b44: f0 02 14 96 xxmr vs0,vs34
10000b48: f0 40 04 91 xxmr vs34,vs0
10000b4c: 38 3f 00 80 addi r1,r31,128
10000b50: e8 01 00 10 ld r0,16(r1)
10000b54: 7c 08 03 a6 mtlr r0
10000b58: eb e1 ff f8 ld r31,-8(r1)
10000b5c: 4e 80 00 20 blr
10000b60: 00 00 00 00 .long 0x0
10000b64: 00 00 00 01 .long 0x1
10000b68: 80 01 00 01 lwz r0,1(r1)
0000000010000c40 <.__addkf3_resolve>:
10000c40: 81 2d 8f 9c lwz r9,-28772(r13)
10000c44: 75 29 00 40 andis. r9,r9,64
10000c48: 41 82 00 18 beq 10000c60 <.__addkf3_resolve+0x20>
10000c4c: e8 62 80 10 ld r3,-32752(r2)
10000c50: 4e 80 00 20 blr
10000c54: 60 00 00 00 nop
10000c58: 60 00 00 00 nop
10000c5c: 60 42 00 00 ori r2,r2,0
10000c60: e8 62 80 18 ld r3,-32744(r2)
10000c64: 4e 80 00 20 blr
...
10000c74: 60 00 00 00 nop
10000c78: 60 00 00 00 nop
10000c7c: 60 42 00 00 ori r2,r2,0
0000000010008b90 <.__addkf3_hw>:
10008b90: fc 42 18 08 xsaddqp v2,v2,v3
10008b94: 4e 80 00 20 blr
...
10008ba4: 60 00 00 00 nop
10008ba8: 60 00 00 00 nop
10008bac: 60 00 00 00 nop
0000000010001500 <.__addkf3_sw>:
10001500: fb 41 ff d0 std r26,-48(r1)
10001504: fb 61 ff d8 std r27,-40(r1)
10001508: fb 81 ff e0 std r28,-32(r1)
1000150c: fb a1 ff e8 std r29,-24(r1)
10001510: fb c1 ff f0 std r30,-16(r1)
10001514: fb e1 ff f8 std r31,-8(r1)
10001518: f8 21 ff 41 stdu r1,-192(r1)
1000151c: 39 21 00 70 addi r9,r1,112
10001520: 39 41 00 70 addi r10,r1,112
// ... Rust code#![feature(f128)]
#[no_mangle]
#[inline(never)]
fn add_entry(a: f128, b: f128) -> f128 {
a + b
}
fn main() {
let a = f128::from_bits(0x0);
let b = f128::from_bits(0x1);
dbg!(a, b);
let c = add_entry(a, b);
dbg!(c);
} C code#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
#define _Float128 __float128
typedef struct {
#if __BYTE_ORDER == __LITTLE_ENDIAN
uint64_t lower, upper;
#elif __BYTE_ORDER == __BIG_ENDIAN
uint64_t upper, lower;
#else
#error missing endian check
#endif
} __attribute__((aligned(_Alignof(_Float128)))) u128;
_Float128 __addkf3(_Float128, _Float128);
void f128_print(_Float128 val) {
u128 ival = *((u128 *)(&val));
printf("%#018" PRIx64 "%016" PRIx64 " %lf\n", ival.upper, ival.lower, (double)val);
}
_Float128 new_f128(uint64_t upper, uint64_t lower) {
u128 val = { .lower = lower, .upper = upper };
return *((_Float128 *)(&val));
}
_Float128 add_entry(_Float128 a, _Float128 b) {
#ifdef USE_ADDKF3
return __addkf3(a, b);
#else
return a + b;
#endif
}
int main() {
_Float128 a = new_f128(0x0000000000000000, 0x0000000000000000);
_Float128 b = new_f128(0x0000000000000000, 0x0000000000000001);
f128_print(a);
f128_print(b);
_Float128 c = add_entry(a, b);
f128_print(c);
return 0;
} |
5119b1a
to
3133d8f
Compare
I don't have any further insight on powerpc64, so I'll just disable testing against the system for now unless you have any suggestions @Amanieu. We're still testing against apfloat so our compiler-builtins is correct, but using the system symbols may cause precision issues for users. I think we can probably just address this more after everything merges. This PR is probably best reviewed by-commit. |
@lu-zero Maybe you have some insight into the powerpc issues? |
Discussed some more at https://rust-lang.zulipchat.com/#narrow/stream/122651-general/topic/f128.20system.20libraries.20noncompliant.20platforms/near/438486364, @ecnelises is going to take a look at some point. This PR doesn't break anything new so I don't think it is blocked, opened rust-lang/rust#125109 to track the issue further. |
@Amanieu I can build and try on real hardware. We can ask for real runners, I think. |
@lu-zero could you try the code at #606 (comment) on powerpc64? I put more about how I am building at rust-lang/rust#125109 Less pressing but it would also be nice if you have a way to confirm that |
so this is what I'm getting.
|
Well that is interesting. That is 64-bit, correct? |
Yes, tested on power9, used as ppc64le |
LE seems to work fine in qemu, the green test in CI isn't using the fallback. Only |
Sadly less and less people care about BE and it is fairly annoying to set up a BE environment =/ |
Ah, well thanks for taking a look! I'll dig a little bit more but I think we are probably okay to not worry about it unless rust-lang/rust unit f128 unit tests have failures (we currently can't test much without this) |
Either qemu gets fixed for BE or we'll need a BE runner. |
I don't think the ppc64 BE is a qemu issue since the C version works fine. 32-bit hopefully should be just qemu. Agreed that native runners wouldn't be a bad thing. |
`MinInt` contains the basic methods that are only needed by integers involved in widening operations, i.e. big integers. `Int` retains all other operations and convenience methods.
Change float test macros to fall back to testing against `rustc_apfloat` when system implementations are not available, rather than just skipping tests. This allows for easier debugging where operations may not be supported.
PowerPC uses `kf` instead of `tf`: <https://gcc.gnu.org/wiki/Ieee128PowerPC>
3133d8f
to
6a847ab
Compare
This took a while to fully review, but LGTM! |
Thanks for taking a look! |
Division is not yet working, but all others were straigtforward to add so I split them out of #587.
This includes some bigger changes that are split per commit:
Int
intoInt
andMinInt
so we can use traits with bigint typesrustc_apfloat
rather than just being skipped. I needed this so I can actually debug.