Skip to content

Commit

Permalink
Port clzsi2 from compiler_rt, required for using std.fmt.format on so…
Browse files Browse the repository at this point in the history
…me ARM architecture.
  • Loading branch information
mlarouche committed Jan 16, 2020
1 parent 8a792db commit ccbdff2
Show file tree
Hide file tree
Showing 3 changed files with 444 additions and 1 deletion.
6 changes: 5 additions & 1 deletion lib/std/special/compiler_rt.zig
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@ comptime {
@export(@import("compiler_rt/negXf2.zig").__negsf2, .{ .name = "__negsf2", .linkage = linkage });
@export(@import("compiler_rt/negXf2.zig").__negdf2, .{ .name = "__negdf2", .linkage = linkage });

@export(@import("compiler_rt/clzsi2.zig").__clzsi2, .{ .name = "__clzsi2", .linkage = linkage });

if (is_arm_arch and !is_arm_64 and !is_test) {
@export(@import("compiler_rt/arm.zig").__aeabi_unwind_cpp_pr0, .{ .name = "__aeabi_unwind_cpp_pr0", .linkage = strong_linkage });
@export(@import("compiler_rt/arm.zig").__aeabi_unwind_cpp_pr1, .{ .name = "__aeabi_unwind_cpp_pr1", .linkage = linkage });
Expand Down Expand Up @@ -177,7 +179,9 @@ comptime {
@export(@import("compiler_rt/arm.zig").__aeabi_memclr, .{ .name = "__aeabi_memclr4", .linkage = linkage });
@export(@import("compiler_rt/arm.zig").__aeabi_memclr, .{ .name = "__aeabi_memclr8", .linkage = linkage });

@export(@import("compiler_rt/arm.zig").__aeabi_read_tp, .{ .name = "__aeabi_read_tp", .linkage = linkage });
if (builtin.os == .linux) {
@export(@import("compiler_rt/arm.zig").__aeabi_read_tp, .{ .name = "__aeabi_read_tp", .linkage = linkage });
}

@export(@import("compiler_rt/extendXfYf2.zig").__aeabi_f2d, .{ .name = "__aeabi_f2d", .linkage = linkage });
@export(@import("compiler_rt/floatsiXf.zig").__aeabi_i2d, .{ .name = "__aeabi_i2d", .linkage = linkage });
Expand Down
150 changes: 150 additions & 0 deletions lib/std/special/compiler_rt/clzsi2.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
// Ported from:
//
// https://github.com/llvm-mirror/compiler-rt/blob/f0745e8476f069296a7c71accedd061dce4cdf79/lib/builtins/clzsi2.c
// https://github.com/llvm-mirror/compiler-rt/blob/f0745e8476f069296a7c71accedd061dce4cdf79/lib/builtins/arm/clzsi2.S
const builtin = @import("builtin");

// Precondition: a != 0
fn __clzsi2_generic(a: i32) callconv(.C) i32 {
@setRuntimeSafety(builtin.is_test);

var x: c_uint = @bitCast(c_uint, a);
var t: c_int = @intCast(c_int, @boolToInt((x & 0xFFFF0000) == 0)) << 4; // if (x is small) t = 16 else 0
x >>= @intCast(u5, 16 - t); // x = [0 - 0xFFFF]

var r: c_int = t; // r = [0, 16]
// return r + clz(x)
t = @intCast(c_int, @boolToInt((x & 0xFF00) == 0)) << 3;
x >>= @intCast(u5, 8 - t); // x = [0 - 0xFF]n
r += t; // r = [0, 8, 16, 24]
// return r + clz(x)
t = @intCast(c_int, @boolToInt((x & 0xF0) == 0)) << 2;
x >>= @intCast(u5, 4 - t); // x = [0 - 0xF]
r += t; // r = [0, 4, 8, 12, 16, 20, 24, 28]
// return r + clz(x)
t = @intCast(c_int, @boolToInt((x & 0xC) == 0)) << 1;
x >>= @intCast(u5, 2 - t); // x = [0 - 3]
r += t; // r = [0 - 30] and is even
// return r + clz(x)
// switch (x)
// {
// case 0:
// return r + 2;
// case 1:
// return r + 1;
// case 2:
// case 3:
// return r;
// }
return @intCast(i32, r) + ((2 - @intCast(i32,x)) & -@intCast(i32, @boolToInt((x & 2) == 0)));
}

fn __clzsi2_arm_clz(a: i32) callconv(.C) i32 {
return asm volatile(
\\ clz r0,r0
: [ret] "={r0}" (->i32)
: [a] "{r0}" (a),
: "r0"
);
}

fn __clzsi2_arm32(a: i32) callconv(.C) i32 {
return asm volatile (
\\ // Assumption: n != 0
\\ // r0: n
\\ // r1: count of leading zeros in n + 1
\\ // r2: scratch register for shifted r0
\\ mov r1, #1
\\
\\ // Basic block:
\\ // if ((r0 >> SHIFT) == 0)
\\ // r1 += SHIFT;
\\ // else
\\ // r0 >>= SHIFT;
\\ // for descending powers of two as SHIFT.
\\ lsrs r2, r0, #16
\\ movne r0, r2
\\ addeq r1, #16
\\
\\ lsrs r2, r0, #8
\\ movne r0, r2
\\ addeq r1, #8
\\
\\ lsrs r2, r0, #4
\\ movne r0, r2
\\ addeq r1, #4
\\
\\ lsrs r2, r0, #2
\\ movne r0, r2
\\ addeq r1, #2
\\
\\ // The basic block invariants at this point are (r0 >> 2) == 0 and
\\ // r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
\\ //
\\ // r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)f
\\ // ---+----------------+----------------+------------+--------------
\\ // 1 | 1 | 0 | 0 | 1
\\ // 2 | 0 | 1 | -1 | 0
\\ // 3 | 0 | 1 | -1 | 0
\\ //
\\ // The r1's initial value of 1 compensates for the 1 here.
\\ sub r0, r1, r0, lsr #1
: [ret] "={r0}" (->i32)
: [a] "{r0}" (a),
: "r0", "r1", "r2"
);
}

const can_use_arm_clz = switch (builtin.arch) {
.arm => |sub_arch| switch (sub_arch) {
.v4t => false,
else => true,
},
.armeb => |sub_arch| switch (sub_arch) {
.v4t => false,
else => true,
},
.thumb => |sub_arch| switch (sub_arch) {
.v6,
.v6m,
.v6k,
.v5,
.v5te,
.v4t,
=> false,
else => true,
},
.thumbeb => |sub_arch| switch (sub_arch) {
.v6,
.v6m,
.v6k,
.v5,
.v5te,
.v4t,
=> false,
else => true,
},
else => false,
};

const is_arm32_no_thumb = switch (builtin.arch) {
builtin.Arch.arm,
builtin.Arch.armeb,
=> true,
else => false,
};

pub const __clzsi2 = blk: {
if (comptime can_use_arm_clz) {
break :blk __clzsi2_arm_clz;
}
else if (comptime is_arm32_no_thumb) {
break :blk __clzsi2_arm32;
} else {
break :blk __clzsi2_generic;
}
};

test "test clzsi2" {
_ = @import("clzsi2_test.zig");
}
Loading

0 comments on commit ccbdff2

Please sign in to comment.