Skip to content

Commit 7492fd0

Browse files
committed
Optimisations of ALU and CPU + start of clmul[rh] support
Port (and correct) clmul support from RV32, expand to 64 bits, detect in cpu_functionblocks.si and pass to cpuexecuteSLOWPATH. Simplify wait state generator and result/frd/FPUnewflags in cpuexecuteFPU. Remove a wait state in the CPU fetch unit, plus simplify blocks. Simplify blocks in CPU load unit.
1 parent a7ff0d1 commit 7492fd0

File tree

10 files changed

+232
-33
lines changed

10 files changed

+232
-33
lines changed

README.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,16 @@ The image displayed below, shows our PAWS, for whom the project is named. From l
100100

101101

102102
## OUTRUN (DEMO)
103-
An implementation of https://www.lexaloffle.com/bbs/?tid=35767 using the GPU and DrawList2D from the PAWSv2 library, and the floating-point unit. The car is a pixelblock sprite ported from http://www.dizionariovideogiochi.it/doku.php?id=out_run with no animation. The cityscape and clouds are drawn on the two tilemaps. There are vehicles running in front of the cityscape that are just visible in one frame of the demonstration using sprites. Press FIRE2 to exit.
103+
An implementation of https://www.lexaloffle.com/bbs/?tid=35767 using the GPU and DrawList2D from the PAWSv2 library, and the floating-point unit. The car is a pixelblock sprite ported from https://www.spriters-resource.com/genesis_32x_scd/outrun/sheet/25458/ with no animation. The cityscape and clouds are drawn on the two tilemaps. There are vehicles running in front of the cityscape that are just visible in one frame of the demonstration using sprites. Press FIRE2 to exit.
104104
<br>
105105
![OUTRUN](Reference/Graphics/OUTRUN-1.jpg)
106106
![OUTRUN](Reference/Graphics/OUTRUN-2.jpg)
107107
![OUTRUN](Reference/Graphics/OUTRUN-3.jpg)
108108

109+
A full view of the cityscape backdrop (forced, not normally available), showing the background sprites.
110+
111+
![OUTRUN Full view of the cityscape](Reference/Graphics/OUTRUN-4.jpg)
112+
109113
## PACMAN (GAMES)
110114
A conversion of https://github.com/floooh/pacman.c to the PAWSv2 graphics, sound and input system.
111115
<br>

RV32/ALU.si

+1-1
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ unit aluCLMUL(
386386
uint6 stopat <:: function3[0,1] ? 32 : 31;
387387
uint6 count = uninitialised;
388388
uint1 update = uninitialised;
389-
uint32 resultNEXT <:: result ^ ( ( function3[1,1] ) ? ( sourceReg1 >> ( ( function3[0,1] ? 31 : 32 ) - count ) ) : ( sourceReg1 << count ) );
389+
uint324 resultNEXT <:: result ^ ( ( function3[1,1] ) ? ( sourceReg1 >> ( ( function3[0,1] ? 32 : 31 ) - count ) ) : ( sourceReg1 << count ) );
390390
update := 0;
391391

392392
algorithm <autorun> {

RV64/ALU.si

+34
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,40 @@ unit aludivision(
345345
function3[1,1] ? sourceReg1 : 64hffffffffffffffff; // DIVIDE BY ZERO, dividend if remainder, -1 if quotient
346346
}
347347

348+
// ALU FOR CARRYLESS MULTIPLY FROM B-EXTENSION
349+
unit alucarrylessmultiply(
350+
input uint1 start,
351+
output uint1 busy(0),
352+
input uint2 function3,
353+
input uint64 sourceReg1,
354+
input uint64 sourceReg2,
355+
output uint64 result
356+
) <reginputs> {
357+
uint64 resultNEXT <:: result ^ ( ( function3[1,1] ) ? ( sourceReg1 >> ( ( function3[0,1] ? 64 : 63 ) - count ) ) : ( sourceReg1 << count ) );
358+
uint7 startat <:: &function3;
359+
uint7 stopat <:: function3[0,1] ? 64 : 63;
360+
uint7 count = uninitialised;
361+
uint1 update = uninitialised;
362+
update := 0;
363+
364+
algorithm <autorun> {
365+
while(1) {
366+
if( start ) { busy = 1; while( count != stopat ) { update = 1; } busy = 0; }
367+
}
368+
}
369+
370+
always_after {
371+
if( start ) {
372+
result = 0; count = start;
373+
} else {
374+
if( update ) {
375+
if( sourceReg2[ count, 1 ] ) { result = resultNEXT; }
376+
count = count + 1;
377+
}
378+
}
379+
}
380+
}
381+
348382
// INTEGER COMPARISONS
349383
unit integercomparisons(
350384
input uint64 sourceReg1,

RV64/CPU.si

+28-30
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ unit PAWSCPU(
110110
memoryinput <: memoryinput,
111111
incCSRinstret <: COMMIT,
112112
isALUM <: RV64CONTROL.IS_ALUM,
113+
isALUCLM <: RV64CONTROL.IS_ALUCLM,
113114
isCSR <: RV64CONTROL.IS_CSR,
114115
isATOMIC <: RV64CONTROL.IS_ATOMIC,
115116
FPUnewflags <: EXECUTEFPU.FPUnewflags,
@@ -372,6 +373,7 @@ unit cpuexecuteSLOWPATH(
372373
input uint1 isCSR,
373374
input uint1 isATOMIC,
374375
input uint1 isALUM,
376+
input uint1 isALUCLM,
375377
output uint8 FPUflags,
376378
input uint5 FPUnewflags,
377379
input uint1 CSRupdateFPUflags
@@ -530,31 +532,20 @@ unit cpuexecuteFPU(
530532
FPUCALC.start := fpucalc & start; CSRupdateFPUflags := 0;
531533

532534
algorithm <autorun> {
533-
// PROVIDE WAIT STATE FOR APPROPRIATE OPERATION
534535
while(1) {
535536
if( start ) {
536-
onehot( operation ) {
537-
default: {} // FPU COMPARE, MIN/MAX, CLASS, MOVE, CONVERT
538-
case 2: { while( FPUCALC.busy ) {} } // FPU CALCULATIONS
539-
}
537+
busy = 1;
538+
if( operation[2,1] ) { while( FPUCALC.busy ) {} } // WAIT STATE FOR CALCULATIONS
540539
busy = 0;
541540
CSRupdateFPUflags = 1;
542541
}
543542
}
544543
}
545544

546-
// COLLECT THE APPROPRIATE RESULT
547545
always_after {
548-
{ if( start ) { busy = 1; } }
549-
{
550-
onehot( operation ) {
551-
case 0: { result = FPUFAST.result; }
552-
case 1: { result = convertresult; }
553-
case 2: { result = FPUCALC.result; }
554-
}
555-
}
556-
{ frd = fpuconvert ? ( |converttype[0,2] ) : fpucalc ? 1 : FPUFAST.frd; }
557-
{ FPUnewflags = fpuconvert ? convertflags : fpucalc ? FPUCALC.FPUnewflags : FPUFAST.FPUnewflags; }
546+
result = operation[0,1] ? FPUFAST.result : operation[1,1] ? convertresult : FPUCALC.result; // COLLECT THE APPROPRIATE RESULT
547+
frd = fpuconvert ? ( |converttype[0,2] ) : fpucalc ? 1 : FPUFAST.frd; // WRITE TO F REGISTERS FOR TO FLOAT CONVERSIONS, CALCULATIONS AND SOME SINGLE CYCLE
548+
FPUnewflags = fpuconvert ? convertflags : fpucalc ? FPUCALC.FPUnewflags : FPUFAST.FPUnewflags; // OUTPUT NEW FPU FLAGS
558549
}
559550
}
560551

@@ -581,21 +572,24 @@ unit fetch(
581572
while(1) {
582573
if( start ) {
583574
busy = 1;
584-
while( memorybusy ) {} ++:
585-
switch( readdata[0,2] ) { // EXPAND COMPRESSED INSTRUCTION
586-
case 2b00: { instruction = { COMPRESSED00.i32, 2b11 }; } // OR KEEP 32 BIT INSTRUCTION
587-
case 2b01: { instruction = { COMPRESSED01.i32, 2b11 }; }
588-
case 2b10: { instruction = { COMPRESSED10.i32, 2b11 }; }
589-
default: { instruction = readdata; }
590-
}
591-
compressed = ( ~&readdata[0,2] ); // SET COMPRESSED FLAGS
575+
while( memorybusy ) {}
592576
busy = 0;
593577
}
594578
}
595579
}
580+
581+
always_after {
582+
switch( readdata[0,2] ) { // EXPAND COMPRESSED INSTRUCTION
583+
case 2b00: { instruction = { COMPRESSED00.i32, 2b11 }; } // OR KEEP 32 BIT INSTRUCTION
584+
case 2b01: { instruction = { COMPRESSED01.i32, 2b11 }; }
585+
case 2b10: { instruction = { COMPRESSED10.i32, 2b11 }; }
586+
default: { instruction = readdata; }
587+
}
588+
compressed = ( ~&readdata[0,2] ); // SET COMPRESSED FLAGS
589+
}
596590
}
597591

598-
// LOAD UNIT - INCLUDING SIGN EXTENSION FOR 8/16 BIT SIGNED LOADS
592+
// LOAD UNITS - INCLUDING SIGN EXTENSION FOR 8/16 BIT SIGNED LOADS
599593
unit load(
600594
input uint1 start,
601595
output! uint1 busy(0),
@@ -610,16 +604,19 @@ unit load(
610604
) <reginputs> {
611605
// SIGN FOR 8, 16, 32 BIT LOADS
612606
uint1 sign <:: ~dounsigned & ( accesssize[1,1] ? readdata[31,1] : accesssize[0,1] ? readdata[15,1] : readdata[ { byteaccess, 3b111 }, 1 ] );
613-
uint64 signextend <:: accesssize[1,1] ? { {32{sign}}, readdata } : // 32 BIT
614-
accesssize[0,1] ? { {48{sign}}, readdata[0,16] } : // 16 BIT
615-
{ {56{sign}}, readdata[ { byteaccess, 3b000 }, 8 ] }; // 8 BIT
616607
readmemory := start;
617608

618609
algorithm <autorun> {
619610
while(1) {
620-
if( start ) { busy = 1; while( memorybusy ) {} ++: memoryinput = signextend; busy = 0; }
611+
if( start ) { busy = 1; while( memorybusy ) {} ++: busy = 0; }
621612
}
622613
}
614+
615+
always_after {
616+
memoryinput = accesssize[1,1] ? { {32{sign}}, readdata } : // 32 BIT
617+
accesssize[0,1] ? { {48{sign}}, readdata[0,16] } : // 16 BIT
618+
{ {56{sign}}, readdata[ { byteaccess, 3b000 }, 8 ] }; // 8 BIT
619+
}
623620
}
624621
unit load64(
625622
input uint1 start,
@@ -645,7 +642,8 @@ unit load64(
645642
}
646643
}
647644
}
648-
// STORE UNIT FOR NON-FAST STORES ( to BRAM/SDRAM or 32 bit )
645+
646+
// STORE UNITS
649647
unit store(
650648
input uint1 start,
651649
output! uint1 busy(0),

RV64/SOFTWARE/PAWS/TEST/RISCV.P64

56.1 KB
Binary file not shown.

RV64/SOFTWARE/c/riscv-tests/makefile

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
CFLAGSEXTRA=
2+
include ../makefile-lto.inc
3+
4+
PROJECT=RISCV
5+
PROJECTTYPE=TEST
6+
PRJSRC=$(wildcard *.c) $(wildcard *.cpp) $(wildcard *.s) $(wildcard *.h)
7+
PRJOBJ=riscv-tests.o
8+
9+
include ../makefile-paws.inc
+150
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
#include <stdio.h>
2+
#include <math.h>
3+
#include <PAWSlibrary.h>
4+
5+
// https://github.com/riscv-software-src/riscv-tests/blob/master/isa/macros/scalar/test_macros.h
6+
#define qNaNh 0x7e00
7+
#define sNaNh 0x7c01
8+
#define nINFf 0xFF800000
9+
#define pINFf 0x7F800000
10+
#define qNaNf 0x7fc00000
11+
#define sNaNf 0x7f800001
12+
#define qNaN 0x7ff8000000000000
13+
#define sNaN 0x7ff0000000000001
14+
15+
union single_bitstream {
16+
float single_float;
17+
int bitstream;
18+
};
19+
20+
struct singe_inputs {
21+
int operation;
22+
union single_bitstream op1;
23+
union single_bitstream op2;
24+
union single_bitstream result;
25+
};
26+
27+
struct singe_inputs single_test_inputs[] = {
28+
{ 1, 2.5f, 1.0f, 3.5f },
29+
{ 1, -1235.1f, 1.1f, -1234.0f },
30+
{ 1, 3.14159265f, 0.00000001f, 3.14159265f },
31+
{ 2, 2.5f, 1.0f, 1.5f },
32+
{ 2, -1235.1f, -1.1f, -1234.0f },
33+
{ 2, 3.14159265f, 0.00000001f, 3.14159265f },
34+
{ 2, pINFf, nINFf, qNaNf },
35+
{ 3, 2.5f, 1.0f, 2.5f },
36+
{ 3, -1235.1f, -1.1f, 1358.61f },
37+
{ 3, 3.14159265f, 0.00000001f, 3.14159265e-8f },
38+
{ 4, 3.14159265f, 2.71828182f, 1.1557273520668288f },
39+
{ 4, -1234.0f, 1235.1f, -0.9991093838555584f },
40+
{ 4, 3.14159265f, 1.0f, 3.14159265f },
41+
{ 5, 3.14159265f, 0, 1.7724538498928541f },
42+
{ 5, 10000.0f, 0, 100.0f },
43+
{ 5,-1.0f, 0, qNaNf },
44+
{ 5, 171.0f, 0, 13.076696f },
45+
{ -1, -1, -1, -1 }
46+
};
47+
int single_test_inputs_overrides[][3] = {
48+
{0,0,0},
49+
{0,0,0},
50+
{0,0,0},
51+
{0,0,0},
52+
{0,0,0},
53+
{0,0,0},
54+
{pINFf, nINFf, qNaNf},
55+
{0,0,0},
56+
{0,0,0},
57+
{0,0,0},
58+
{0,0,0},
59+
{0,0,0},
60+
{0,0,0},
61+
{0,0,0},
62+
{0,0,0},
63+
{0,0,qNaNf},
64+
{0,0,0},
65+
{0,0,0},
66+
{0,0,0},
67+
};
68+
69+
int single_tests( void ) {
70+
int test = 0;
71+
union single_bitstream result;
72+
73+
printf("\n");
74+
printf("Single-Precision Floating-Point Test");
75+
printf("\n");
76+
77+
while( single_test_inputs[ test ].operation != -1 ) {
78+
if( single_test_inputs_overrides[test][0] ) { single_test_inputs[test].op1.bitstream = single_test_inputs_overrides[test][0]; }
79+
if( single_test_inputs_overrides[test][1] ) { single_test_inputs[test].op2.bitstream = single_test_inputs_overrides[test][1]; }
80+
if( single_test_inputs_overrides[test][2] ) { single_test_inputs[test].result.bitstream = single_test_inputs_overrides[test][2]; }
81+
test++;
82+
}
83+
84+
test = 0;
85+
while( single_test_inputs[ test ].operation != -1 ) {
86+
switch( single_test_inputs[ test ].operation ) {
87+
case 1:
88+
result.single_float = single_test_inputs[test].op1.single_float + single_test_inputs[test].op2.single_float;
89+
printf("fadd.s %8.8f + %8.8f\n",
90+
single_test_inputs[test].op1.single_float,single_test_inputs[test].op2.single_float);
91+
printf(" -> %8.8f ( %8.8f )\n",result.single_float,single_test_inputs[test].result.single_float);
92+
printf(" -> 0x%8x ( 0x%8x )",result.bitstream,single_test_inputs[test].result.bitstream);
93+
break;
94+
case 2:
95+
result.single_float = single_test_inputs[test].op1.single_float - single_test_inputs[test].op2.single_float;
96+
printf("fsub.s %8.8f - %8.8f\n",
97+
single_test_inputs[test].op1.single_float,single_test_inputs[test].op2.single_float);
98+
printf(" -> %8.8f ( %8.8f )\n",result.single_float,single_test_inputs[test].result.single_float);
99+
printf(" -> 0x%8x ( 0x%8x )",result.bitstream,single_test_inputs[test].result.bitstream);
100+
break;
101+
case 3:
102+
result.single_float = single_test_inputs[test].op1.single_float * single_test_inputs[test].op2.single_float;
103+
printf("fmul.s %8.8f * %8.8f\n",
104+
single_test_inputs[test].op1.single_float,single_test_inputs[test].op2.single_float);
105+
printf(" -> %8.8f ( %8.8f )\n",result.single_float,single_test_inputs[test].result.single_float);
106+
printf(" -> 0x%8x ( 0x%8x )",result.bitstream,single_test_inputs[test].result.bitstream);
107+
break;
108+
case 4:
109+
result.single_float = single_test_inputs[test].op1.single_float / single_test_inputs[test].op2.single_float;
110+
printf("fdiv.s %8.8f / %8.8f\n",
111+
single_test_inputs[test].op1.single_float,single_test_inputs[test].op2.single_float);
112+
printf(" -> %8.8f ( %8.8f )\n",result.single_float,single_test_inputs[test].result.single_float);
113+
printf(" -> 0x%8x ( 0x%8x )",result.bitstream,single_test_inputs[test].result.bitstream);
114+
break;
115+
case 5:
116+
result.single_float = sqrtf(single_test_inputs[test].op1.single_float);
117+
printf("fsqrt.s %8.8f\n",
118+
single_test_inputs[test].op1.single_float);
119+
printf(" -> %8.8f ( %8.8f )\n",result.single_float,single_test_inputs[test].result.single_float);
120+
printf(" -> 0x%8x ( 0x%8x )",result.bitstream,single_test_inputs[test].result.bitstream);
121+
break;
122+
}
123+
switch( single_test_inputs[ test ].operation ) {
124+
case 1:
125+
case 2:
126+
case 3:
127+
case 4:
128+
case 5:
129+
if( result.single_float == single_test_inputs[test].result.single_float )
130+
printf(" PASS\n");
131+
else
132+
printf(" FAIL\n");
133+
break;
134+
}
135+
printf("\n");
136+
test++;
137+
}
138+
}
139+
140+
int main( void ) {
141+
// CODE GOES HERE
142+
printf("PAWS Risc-V Test Suite\n");
143+
printf("Ported from https://github.com/riscv-software-src/riscv-tests\n");
144+
145+
single_tests();
146+
147+
sleep1khz( 4000, 0 );
148+
}
149+
150+
// EXIT WILL RETURN TO BIOS

RV64/ULX3S/BUILD_ulx3s/PAWSv2-64.bit

1.78 KB
Binary file not shown.

RV64/cpu_functionblocks.si

+5-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ unit control(
2121
output uint1 IS_AUIPCLUI,
2222
output uint1 IS_JAL,
2323
output uint1 IS_ALUM,
24+
output uint1 IS_ALUCLM,
2425
output uint1 IS_CSR,
2526
output uint1 IS_ATOMIC,
2627
output uint1 IS_FPU,
@@ -66,6 +67,7 @@ unit control(
6667
IS_AUIPCLUI :> IS_AUIPCLUI,
6768
IS_JAL :> IS_JAL,
6869
IS_ALUM :> IS_ALUM,
70+
IS_ALUCLM :> IS_ALUCLM,
6971
IS_CSR :> IS_CSR,
7072
IS_ATOMIC :> IS_ATOMIC,
7173
IS_LOAD :> IS_LOAD,
@@ -244,6 +246,7 @@ unit controlflags(
244246
output uint1 IS_JAL,
245247
output uint1 IS_BRANCH,
246248
output uint1 IS_ALUM,
249+
output uint1 IS_ALUCLM,
247250
output uint1 IS_CSR,
248251
output uint1 IS_ATOMIC,
249252
output uint1 IS_FPU,
@@ -267,6 +270,7 @@ unit controlflags(
267270
IS_FAST := 1; IS_ALU := 0; IS_ALU32 := 0; IS_ALU32SIGNX := 1; IS_FPU := 0; IS_FASTFPU := 0;
268271
IS_AUIPCLUI := 0; IS_JAL := 0; IS_BRANCH := 0; IS_CSR := 0; IS_ATOMIC := 0; IS_LOAD := 0; IS_STORE := 0; IS_FENCE := 0;
269272
IS_ALUM := REGREG & ( function7 == 7b0000001 );
273+
IS_ALUCLM := REGREG & ( function7 == 7b0000101 );
270274

271275
always_after {
272276
// DETERMINE CPU CONTROL FLAGS
@@ -283,7 +287,7 @@ unit controlflags(
283287
case 5b00011: { IS_FENCE = 1; writeRegister = 0;} // FENCE[I]
284288
case 5b11100: { IS_CSR = 1; IS_FAST = 0; } // CSR ECALL EBBREAK
285289
case 5b00100: { IS_ALU = 1; } // REGISTER-IMMEDIATE ALU
286-
case 5b01100: { IS_ALU = 1; IS_FAST = ~( IS_ALUM & function3[2,1] ); } // REGISTER-REGISTER ALU ( FAST NOT DIVIDE )
290+
case 5b01100: { IS_ALU = 1; IS_FAST = ~( IS_ALUM & function3[2,1] ); } // REGISTER-REGISTER ALU ( FAST NOT DIVIDE OR CLMUL )
287291
case 5b00110: {
288292
IS_ALU = 1; IS_ALU32 = 1; // REGISTER-IMMEDIATE ALU 32BIT
289293
switch( function3 ) {

Reference/Graphics/OUTRUN-4.jpg

100 KB
Loading

0 commit comments

Comments
 (0)