Skip to content

Commit

Permalink
cmd/compile,cmd/internal/obj/riscv,runtime: use Duff's devices on ris…
Browse files Browse the repository at this point in the history
…cv64

Implement runtime.duffzero and runtime.duffcopy for riscv64.
Use obj.ADUFFZERO/obj.ADUFFCOPY for medium size, word aligned
zeroing/moving.

Change-Id: I42ec622055630c94cb77e286d8d33dbe7c9f846c
Reviewed-on: https://go-review.googlesource.com/c/go/+/237797
Run-TryBot: Cherry Zhang <[email protected]>
Reviewed-by: Joel Sing <[email protected]>
Reviewed-by: Cherry Zhang <[email protected]>
  • Loading branch information
michalderkacz authored and 4a6f656c committed Oct 28, 2020
1 parent c95bd2e commit 150d244
Show file tree
Hide file tree
Showing 9 changed files with 1,076 additions and 4 deletions.
10 changes: 9 additions & 1 deletion src/cmd/compile/internal/riscv64/ggen.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,15 @@ func zeroRange(pp *gc.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog {
return p
}

// TODO(jsing): Add a duff zero implementation for medium sized ranges.
if cnt <= int64(128*gc.Widthptr) {
p = pp.Appendpp(p, riscv.AADDI, obj.TYPE_CONST, 0, off, obj.TYPE_REG, riscv.REG_A0, 0)
p.Reg = riscv.REG_SP
p = pp.Appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0)
p.To.Name = obj.NAME_EXTERN
p.To.Sym = gc.Duffzero
p.To.Offset = 8 * (128 - cnt/int64(gc.Widthptr))
return p
}

// Loop, zeroing pointer width bytes at a time.
// ADD $(off), SP, T0
Expand Down
14 changes: 14 additions & 0 deletions src/cmd/compile/internal/riscv64/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()

case ssa.OpRISCV64DUFFZERO:
p := s.Prog(obj.ADUFFZERO)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = gc.Duffzero
p.To.Offset = v.AuxInt

case ssa.OpRISCV64DUFFCOPY:
p := s.Prog(obj.ADUFFCOPY)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = gc.Duffcopy
p.To.Offset = v.AuxInt

default:
v.Fatalf("Unhandled op %v", v.Op)
}
Expand Down
14 changes: 14 additions & 0 deletions src/cmd/compile/internal/ssa/gen/RISCV64.rules
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,13 @@
(Zero [4] ptr mem) => (MOVWstore ptr (MOVWconst) mem)
(Zero [8] ptr mem) => (MOVDstore ptr (MOVDconst) mem)

// Medium zeroing uses a Duff's device
// 8 and 128 are magic constants, see runtime/mkduff.go
(Zero [s] {t} ptr mem)
&& s%8 == 0 && s >= 16 && s <= 8*128
&& t.Alignment()%8 == 0 && !config.noDuffDevice =>
(DUFFZERO [8 * (128 - s/8)] ptr mem)

// Generic zeroing uses a loop
(Zero [s] {t} ptr mem) =>
(LoweredZero [t.Alignment()]
Expand Down Expand Up @@ -395,6 +402,13 @@
(Move [4] dst src mem) => (MOVWstore dst (MOVWload src mem) mem)
(Move [8] dst src mem) => (MOVDstore dst (MOVDload src mem) mem)

// Medium move uses a Duff's device
// 16 and 128 are magic constants, see runtime/mkduff.go
(Move [s] {t} dst src mem)
&& s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0
&& !config.noDuffDevice && logLargeCopy(v, s) =>
(DUFFCOPY [16 * (128 - s/8)] dst src mem)

// Generic move uses a loop
(Move [s] {t} dst src mem) && (s <= 16 || logLargeCopy(v, s)) =>
(LoweredMove [t.Alignment()]
Expand Down
38 changes: 38 additions & 0 deletions src/cmd/compile/internal/ssa/gen/RISCV64Ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,44 @@ func init() {
{name: "CALLclosure", argLength: 3, reg: callClosure, aux: "CallOff", call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
{name: "CALLinter", argLength: 2, reg: callInter, aux: "CallOff", call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem

// duffzero
// arg0 = address of memory to zero (in X10, changed as side effect)
// arg1 = mem
// auxint = offset into duffzero code to start executing
// X1 (link register) changed because of function call
// returns mem
{
name: "DUFFZERO",
aux: "Int64",
argLength: 2,
reg: regInfo{
inputs: []regMask{regNamed["X10"]},
clobbers: regNamed["X1"] | regNamed["X10"],
},
typ: "Mem",
faultOnNilArg0: true,
},

// duffcopy
// arg0 = address of dst memory (in X11, changed as side effect)
// arg1 = address of src memory (in X10, changed as side effect)
// arg2 = mem
// auxint = offset into duffcopy code to start executing
// X1 (link register) changed because of function call
// returns mem
{
name: "DUFFCOPY",
aux: "Int64",
argLength: 3,
reg: regInfo{
inputs: []regMask{regNamed["X11"], regNamed["X10"]},
clobbers: regNamed["X1"] | regNamed["X10"] | regNamed["X11"],
},
typ: "Mem",
faultOnNilArg0: true,
faultOnNilArg1: true,
},

// Generic moves and zeros

// general unaligned zeroing
Expand Down
28 changes: 28 additions & 0 deletions src/cmd/compile/internal/ssa/opGen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 33 additions & 0 deletions src/cmd/compile/internal/ssa/rewriteRISCV64.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions src/cmd/internal/obj/riscv/obj.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func buildop(ctxt *obj.Link) {}
// lr is the link register to use for the JALR.
// p must be a CALL, JMP or RET.
func jalrToSym(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc, lr int16) *obj.Prog {
if p.As != obj.ACALL && p.As != obj.AJMP && p.As != obj.ARET {
if p.As != obj.ACALL && p.As != obj.AJMP && p.As != obj.ARET && p.As != obj.ADUFFZERO && p.As != obj.ADUFFCOPY {
ctxt.Diag("unexpected Prog in jalrToSym: %v", p)
return p
}
Expand Down Expand Up @@ -417,7 +417,7 @@ func containsCall(sym *obj.LSym) bool {
// CALLs are CALL or JAL(R) with link register LR.
for p := sym.Func().Text; p != nil; p = p.Link {
switch p.As {
case obj.ACALL:
case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY:
return true
case AJAL, AJALR:
if p.From.Type == obj.TYPE_REG && p.From.Reg == REG_LR {
Expand Down Expand Up @@ -656,7 +656,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
p.From.Reg = REG_SP
}

case obj.ACALL:
case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY:
switch p.To.Type {
case obj.TYPE_MEM:
jalrToSym(ctxt, p, newprog, REG_LR)
Expand Down Expand Up @@ -1696,6 +1696,8 @@ var encodings = [ALAST & obj.AMask]encoding{
obj.APCDATA: pseudoOpEncoding,
obj.ATEXT: pseudoOpEncoding,
obj.ANOP: pseudoOpEncoding,
obj.ADUFFZERO: pseudoOpEncoding,
obj.ADUFFCOPY: pseudoOpEncoding,
}

// encodingForAs returns the encoding for an obj.As.
Expand Down
Loading

0 comments on commit 150d244

Please sign in to comment.