Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 47 additions & 14 deletions cranelift/codegen/meta/src/isa/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ pub(crate) fn define() -> TargetIsa {
"SSSE3: CPUID.01H:ECX.SSSE3[bit 9]",
false,
);
let has_cmpxchg16b = settings.add_bool(
"has_cmpxchg16b",
"Has support for CMPXCHG16b.",
"CMPXCHG16b: CPUID.01H:ECX.CMPXCHG16B[bit 13]",
false,
);
let has_sse41 = settings.add_bool(
"has_sse41",
"Has support for SSE4.1.",
Expand Down Expand Up @@ -106,6 +112,7 @@ pub(crate) fn define() -> TargetIsa {
false,
);

settings.add_predicate("use_cmpxchg16b", predicate!(has_cmpxchg16b));
settings.add_predicate("use_ssse3", predicate!(has_ssse3));
settings.add_predicate("use_sse41", predicate!(has_sse41));
settings.add_predicate("use_sse42", predicate!(has_sse41 && has_sse42));
Expand Down Expand Up @@ -141,14 +148,30 @@ pub(crate) fn define() -> TargetIsa {
// Intel CPUs

// Netburst
settings.add_preset("nocona", "Nocona microarchitecture.", preset!(sse3));
settings.add_preset(
"nocona",
"Nocona microarchitecture.",
preset!(sse3 && has_cmpxchg16b),
);

// Intel Core 2 Solo/Duo
settings.add_preset("core2", "Core 2 microarchitecture.", preset!(sse3));
settings.add_preset("penryn", "Penryn microarchitecture.", preset!(sse41));
settings.add_preset(
"core2",
"Core 2 microarchitecture.",
preset!(sse3 && has_cmpxchg16b),
);
settings.add_preset(
"penryn",
"Penryn microarchitecture.",
preset!(sse41 && has_cmpxchg16b),
);

// Intel Atom CPUs
let atom = settings.add_preset("atom", "Atom microarchitecture.", preset!(ssse3));
let atom = settings.add_preset(
"atom",
"Atom microarchitecture.",
preset!(ssse3 && has_cmpxchg16b),
);
settings.add_preset("bonnell", "Bonnell microarchitecture.", preset!(atom));
let silvermont = settings.add_preset(
"silvermont",
Expand Down Expand Up @@ -186,7 +209,7 @@ pub(crate) fn define() -> TargetIsa {
let nehalem = settings.add_preset(
"nehalem",
"Nehalem microarchitecture.",
preset!(sse42 && has_popcnt),
preset!(sse42 && has_popcnt && has_cmpxchg16b),
);
settings.add_preset("corei7", "Core i7 microarchitecture.", preset!(nehalem));
let westmere = settings.add_preset("westmere", "Westmere microarchitecture.", preset!(nehalem));
Expand Down Expand Up @@ -229,7 +252,15 @@ pub(crate) fn define() -> TargetIsa {
let knights_landing = settings.add_preset(
"knl",
"Knights Landing microarchitecture.",
preset!(has_popcnt && has_avx512f && has_fma && has_bmi1 && has_bmi2 && has_lzcnt),
preset!(
has_popcnt
&& has_avx512f
&& has_fma
&& has_bmi1
&& has_bmi2
&& has_lzcnt
&& has_cmpxchg16b
),
);
settings.add_preset(
"knm",
Expand Down Expand Up @@ -312,22 +343,22 @@ pub(crate) fn define() -> TargetIsa {
settings.add_preset(
"opteron-sse3",
"Opteron microarchitecture with support for SSE3 instructions.",
preset!(sse3),
preset!(sse3 && has_cmpxchg16b),
);
settings.add_preset(
"k8-sse3",
"K8 Hammer microarchitecture with support for SSE3 instructions.",
preset!(sse3),
preset!(sse3 && has_cmpxchg16b),
);
settings.add_preset(
"athlon64-sse3",
"Athlon 64 microarchitecture with support for SSE3 instructions.",
preset!(sse3),
preset!(sse3 && has_cmpxchg16b),
);
let barcelona = settings.add_preset(
"barcelona",
"Barcelona microarchitecture.",
preset!(has_popcnt && has_lzcnt),
preset!(has_popcnt && has_lzcnt && has_cmpxchg16b),
);
settings.add_preset(
"amdfam10",
Expand All @@ -338,7 +369,7 @@ pub(crate) fn define() -> TargetIsa {
let btver1 = settings.add_preset(
"btver1",
"Bobcat microarchitecture.",
preset!(ssse3 && has_lzcnt && has_popcnt),
preset!(ssse3 && has_lzcnt && has_popcnt && has_cmpxchg16b),
);
settings.add_preset(
"btver2",
Expand All @@ -349,7 +380,7 @@ pub(crate) fn define() -> TargetIsa {
let bdver1 = settings.add_preset(
"bdver1",
"Bulldozer microarchitecture",
preset!(has_lzcnt && has_popcnt && ssse3),
preset!(has_lzcnt && has_popcnt && ssse3 && has_cmpxchg16b),
);
let bdver2 = settings.add_preset(
"bdver2",
Expand All @@ -366,7 +397,9 @@ pub(crate) fn define() -> TargetIsa {
let znver1 = settings.add_preset(
"znver1",
"Zen (first generation) microarchitecture.",
preset!(sse42 && has_popcnt && has_bmi1 && has_bmi2 && has_lzcnt && has_fma),
preset!(
sse42 && has_popcnt && has_bmi1 && has_bmi2 && has_lzcnt && has_fma && has_cmpxchg16b
),
);
let znver2 = settings.add_preset(
"znver2",
Expand Down Expand Up @@ -397,7 +430,7 @@ pub(crate) fn define() -> TargetIsa {
let x86_64_v2 = settings.add_preset(
"x86-64-v2",
"Generic x86-64 (V2) microarchitecture.",
preset!(sse42 && has_popcnt),
preset!(sse42 && has_popcnt && has_cmpxchg16b),
);
let x86_64_v3 = settings.add_preset(
"x84_64_v3",
Expand Down
35 changes: 19 additions & 16 deletions cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3637,18 +3637,19 @@ pub(crate) fn define(
let AtomicMem = &TypeVar::new(
"AtomicMem",
"Any type that can be stored in memory, which can be used in an atomic operation",
TypeSetBuilder::new().ints(8..64).build(),
TypeSetBuilder::new().ints(8..128).build(),
);

ig.push(
Inst::new(
"atomic_rmw",
r#"
Atomically read-modify-write memory at `p`, with second operand `x`. The old value is
returned. `p` has the type of the target word size, and `x` may be an integer type of
8, 16, 32 or 64 bits, even on a 32-bit target. The type of the returned value is the
same as the type of `x`. This operation is sequentially consistent and creates
happens-before edges that order normal (non-atomic) loads and stores.
returned. `p` has the type of the target word size, and `x` may be any integer type; note
that some targets require specific target features to be enabled in order to support 128-bit
integer atomics. The type of the returned value is the same as the type of `x`. This
operation is sequentially consistent and creates happens-before edges that order normal
(non-atomic) loads and stores.
"#,
&formats.atomic_rmw,
)
Expand All @@ -3673,11 +3674,11 @@ pub(crate) fn define(
Perform an atomic compare-and-swap operation on memory at `p`, with expected value `e`,
storing `x` if the value at `p` equals `e`. The old value at `p` is returned,
regardless of whether the operation succeeds or fails. `p` has the type of the target
word size, and `x` and `e` must have the same type and the same size, which may be an
integer type of 8, 16, 32 or 64 bits, even on a 32-bit target. The type of the returned
value is the same as the type of `x` and `e`. This operation is sequentially
consistent and creates happens-before edges that order normal (non-atomic) loads and
stores.
word size, and `x` and `e` must have the same type and the same size, which may be any
integer type; note that some targets require specific target features to be enabled in order
to support 128-bit integer atomics. The type of the returned value is the same as the type
of `x` and `e`. This operation is sequentially consistent and creates happens-before edges
that order normal (non-atomic) loads and stores.
"#,
&formats.atomic_cas,
)
Expand All @@ -3702,9 +3703,10 @@ pub(crate) fn define(
Atomically load from memory at `p`.

This is a polymorphic instruction that can load any value type which has a memory
representation. It should only be used for integer types with 8, 16, 32 or 64 bits.
This operation is sequentially consistent and creates happens-before edges that order
normal (non-atomic) loads and stores.
representation. It can only be used for integer types; note that some targets require
specific target features to be enabled in order to support 128-bit integer atomics. This
operation is sequentially consistent and creates happens-before edges that order normal
(non-atomic) loads and stores.
"#,
&formats.load_no_offset,
)
Expand All @@ -3726,9 +3728,10 @@ pub(crate) fn define(
Atomically store `x` to memory at `p`.

This is a polymorphic instruction that can store any value type with a memory
representation. It should only be used for integer types with 8, 16, 32 or 64 bits.
This operation is sequentially consistent and creates happens-before edges that order
normal (non-atomic) loads and stores.
representation. It can only be used for integer types; note that some targets require
specific target features to be enabled in order to support 128-bit integer atomics This
operation is sequentially consistent and creates happens-before edges that order normal
(non-atomic) loads and stores.
"#,
&formats.store_no_offset,
)
Expand Down
107 changes: 107 additions & 0 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,24 @@
(mem SyntheticAmode)
(dst_old WritableReg))

;; A standard (native) `lock cmpxchg16b (amode)`, with register
;; conventions:
;;
;; `mem` (read) address
;; %rbx (low), %rcx (high) (read) replacement value
;; %rax (low), %rdx (high) (modified) in: expected value, out: value that was actually at `dst`
;; %rflags is written. Do not assume anything about it after the instruction.
;;
;; The instruction "succeeded" iff the bits of %rax and %rdx
;; afterwards are the same as they were before.
(LockCmpxchg16b (replacement_low Reg)
(replacement_high Reg)
(expected_low Reg)
(expected_high Reg)
(mem BoxSyntheticAmode)
(dst_old_low WritableReg)
(dst_old_high WritableReg))

;; A synthetic instruction, based on a loop around a native `lock
;; cmpxchg` instruction.
;;
Expand Down Expand Up @@ -696,6 +714,46 @@
(temp WritableReg)
(dst_old WritableReg))

;; A synthetic instruction, based on a loop around a native `lock
;; cmpxchg16b` instruction.
;;
;; This is the same as `AtomicRmwSeq`, but for 128-bit integers.
;;
;; For `MachAtomicRmwOp::Xchg`, use `Atomic128XchgSeq` instead.
;;
;; This instruction sequence has fixed register uses as follows:
;; - %rax (low), %rdx (high) (written) the old value at `mem`
;; - %rbx (low), %rcx (high) (written) used as temp registers to hold
;; the replacement value
;; - %rflags is written. Do not assume anything about it after the
;; instruction.
(Atomic128RmwSeq (op MachAtomicRmwOp)
(mem BoxSyntheticAmode)
(operand_low Reg)
(operand_high Reg)
(temp_low WritableReg)
(temp_high WritableReg)
(dst_old_low WritableReg)
(dst_old_high WritableReg))

;; A synthetic instruction, based on a loop around a native `lock
;; cmpxchg16b` instruction.
;;
;; This is `Atomic128XchgSeq` but only for `MachAtomicRmwOp::Xchg`. As
;; the replacement value is the same every time, this instruction doesn't
;; require any temporary registers.
;;
;; This instruction sequence has fixed register uses as follows:
;; - %rax (low), %rdx (high) (written) the old value at `mem`
;; - %rbx (low), %rcx (high) (read) the replacement value
;; - %rflags is written. Do not assume anything about it after the
;; instruction.
(Atomic128XchgSeq (mem SyntheticAmode)
(operand_low Reg)
(operand_high Reg)
(dst_old_low WritableReg)
(dst_old_high WritableReg))

;; A memory fence (mfence, lfence or sfence).
(Fence (kind FenceKind))

Expand Down Expand Up @@ -762,6 +820,11 @@
(type BoxCallIndInfo extern (enum))
(type BoxReturnCallInfo extern (enum))
(type BoxReturnCallIndInfo extern (enum))
(type BoxSyntheticAmode extern (enum))

(decl pure box_synthetic_amode (SyntheticAmode) BoxSyntheticAmode)
(extern constructor box_synthetic_amode box_synthetic_amode)
(convert SyntheticAmode BoxSyntheticAmode box_synthetic_amode)

;; Get the `OperandSize` for a given `Type`, rounding smaller types up to 32 bits.
(decl operand_size_of_type_32_64 (Type) OperandSize)
Expand Down Expand Up @@ -1862,6 +1925,9 @@
(decl pure use_avx2 () bool)
(extern constructor use_avx2 use_avx2)

(decl pure use_cmpxchg16b () bool)
(extern constructor use_cmpxchg16b use_cmpxchg16b)

;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;;

;; Extract a constant `Imm8Reg.Imm8` from a value operand.
Expand Down Expand Up @@ -5214,13 +5280,54 @@
(_ Unit (emit (MInst.LockCmpxchg ty replacement expected addr dst))))
dst))

(decl x64_cmpxchg16b (ValueRegs ValueRegs SyntheticAmode) ValueRegs)
(rule (x64_cmpxchg16b expected replacement addr)
(let ((expected_low Gpr (value_regs_get_gpr expected 0))
(expected_high Gpr (value_regs_get_gpr expected 1))
(replacement_low Gpr (value_regs_get_gpr replacement 0))
(replacement_high Gpr (value_regs_get_gpr replacement 1))
(dst_low WritableGpr (temp_writable_gpr))
(dst_high WritableGpr (temp_writable_gpr))
(_ Unit (emit (MInst.LockCmpxchg16b replacement_low replacement_high expected_low expected_high addr dst_low dst_high))))
(value_regs dst_low dst_high)))

(decl x64_atomic_rmw_seq (Type MachAtomicRmwOp SyntheticAmode Gpr) Gpr)
(rule (x64_atomic_rmw_seq ty op mem input)
(let ((dst WritableGpr (temp_writable_gpr))
(tmp WritableGpr (temp_writable_gpr))
(_ Unit (emit (MInst.AtomicRmwSeq ty op mem input tmp dst))))
dst))

(decl x64_atomic_128_rmw_seq (MachAtomicRmwOp SyntheticAmode ValueRegs) ValueRegs)
(rule (x64_atomic_128_rmw_seq op mem input)
(let ((dst_low WritableGpr (temp_writable_gpr))
(dst_high WritableGpr (temp_writable_gpr))
(tmp_low WritableGpr (temp_writable_gpr))
(tmp_high WritableGpr (temp_writable_gpr))
(input_low Gpr (value_regs_get_gpr input 0))
(input_high Gpr (value_regs_get_gpr input 1))
(_ Unit (emit (MInst.Atomic128RmwSeq op mem input_low input_high tmp_low tmp_high dst_low dst_high))))
(value_regs dst_low dst_high)))

(rule 1 (x64_atomic_128_rmw_seq (mach_atomic_rmw_op_xchg) mem input)
(let ((dst_low WritableGpr (temp_writable_gpr))
(dst_high WritableGpr (temp_writable_gpr))
(input_low Gpr (value_regs_get_gpr input 0))
(input_high Gpr (value_regs_get_gpr input 1))
(_ Unit (emit (MInst.Atomic128XchgSeq mem input_low input_high dst_low dst_high))))
(value_regs dst_low dst_high)))

(decl x64_atomic_128_store_seq (SyntheticAmode ValueRegs) SideEffectNoResult)
(rule (x64_atomic_128_store_seq mem input)
(let ((dst_low WritableGpr (temp_writable_gpr))
(dst_high WritableGpr (temp_writable_gpr))
(input_low Gpr (value_regs_get_gpr input 0))
(input_high Gpr (value_regs_get_gpr input 1)))
(SideEffectNoResult.Inst (MInst.Atomic128XchgSeq mem input_low input_high dst_low dst_high))))

(decl mach_atomic_rmw_op_xchg () MachAtomicRmwOp)
(extern extractor mach_atomic_rmw_op_xchg mach_atomic_rmw_op_is_xchg)

;; CLIF IR has one enumeration for atomic operations (`AtomicRmwOp`) while the
;; mach backend has another (`MachAtomicRmwOp`)--this converts one to the other.
(type MachAtomicRmwOp extern (enum))
Expand Down
1 change: 1 addition & 0 deletions cranelift/codegen/src/isa/x64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,7 @@ pub enum CmpOpcode {
pub(crate) enum InstructionSet {
SSE,
SSE2,
CMPXCHG16b,
SSSE3,
SSE41,
SSE42,
Expand Down
Loading