Merge pull request #718 from FPGA-MAFIA/mini_core_accel

add and refactor function to mafia_accel.h library
FPGA-MAFIA · Aug 6, 2024 · 785efb6 · 785efb6
2 parents cbdeb85 + 61fb411
commit 785efb6
Show file tree

Hide file tree

Showing 3 changed files with 73 additions and 15 deletions.
diff --git a/app/defines/mafia_accel.h b/app/defines/mafia_accel.h
@@ -5,6 +5,14 @@
 #define WRITE_REG(REG,VAL) (*REG) = VAL
 #define READ_REG(VAL,REG)  VAL    = (*REG)
 
+#define CORE2MUL_INT8_MULTIPLICAND(index)  (volatile int *)(CR_MEM_BASE + 0xF000 + 2*index) // FIXME - possible to merge the macros. Now its easier to debug
+#define CORE2MUL_INT8_MULTIPLIER(index)    (volatile int *)(CR_MEM_BASE + 0xF000 + 2*index+1)
+#define MUL2CORE_INT8_RESULT(index)        (volatile int *)(CR_MEM_BASE + 0xF010 + 2*index)
+#define MUL2CORE_INT8_DONE(index)          (volatile int *)(CR_MEM_BASE + 0xF010 + 2*index+1)
+
+// error macros
+#define MUL_INDEX_OUT_OF_RANGE  -1
+
 // Define 8-bit, 16-bit and 32-bit types
 typedef unsigned char uint8_t;     // 8-bit unsigned
 typedef signed char int8_t;        // 8-bit signed
@@ -14,12 +22,39 @@ typedef unsigned int uint32_t;   // 32-bit unsigned
 typedef signed int int32_t;      // 32-bit signed
 
 
-// function definitions
+/*****************************************************
+*                function definitions
+*****************************************************/
+// data   - 8 bit input
+// weight - 8 bit weight
+// bias   - 8 bit bias
+// index  - multiplier index
+// returns 32 bit signed result. We need only 17 bits max. data*weight + bias 
+int32_t perceptron8_8(int8_t data, int8_t weight, int8_t bias, unsigned int index) {
+
+    WRITE_REG(CORE2MUL_INT8_MULTIPLICAND(index), weight);
+    WRITE_REG(CORE2MUL_INT8_MULTIPLIER(index), data);
+
+    int data_ready = 0;
+
+    while(!data_ready) {
+            READ_REG(data_ready, MUL2CORE_INT8_DONE(index));
+    }
+
+    int32_t result;
+    READ_REG(result, MUL2CORE_INT8_RESULT(index));
+
+    return result + (int32_t)bias; 
+}
 
-// TODO - possible refactor is needed
-// FIXME - working on multiplier 0 and 1 only
-int32_t mul_16by8(int16_t multiplier, int8_t multiplicand) {
+// multiplier   - 16 bit signed input number
+// multiplicand - 8 bit signed input number
+// mul_index0   - first multiplier index
+// mul_index1   - second multiplier index
+// return 32 bit signed extended number. Note than only 24 max bits is needed.
 
+int32_t mul_16by8(int16_t multiplier, int8_t multiplicand, unsigned int mul_index0, unsigned int mul_index1) {
+
     int8_t pre_result_lsb = multiplier & 0x00ff; // extract low 8 bits from the multiplier
     int8_t pre_result_msb = (multiplier & 0xff00) >> 8; // extract high 8 bits from the multiplier
 
@@ -33,25 +68,26 @@ int32_t mul_16by8(int16_t multiplier, int8_t multiplicand) {
     }
 
     // request from multipliers
-    WRITE_REG(CR_CORE2MUL_INT8_MULTIPLICAND_0, multiplicand);
-    WRITE_REG(CR_CORE2MUL_INT8_MULTIPLIER_0, pre_result_lsb);
+    WRITE_REG(CORE2MUL_INT8_MULTIPLICAND(mul_index0), multiplicand);
+    WRITE_REG(CORE2MUL_INT8_MULTIPLIER(mul_index0), pre_result_lsb);
 
-    WRITE_REG(CR_CORE2MUL_INT8_MULTIPLICAND_1, multiplicand);
-    WRITE_REG(CR_CORE2MUL_INT8_MULTIPLIER_1, pre_result_msb);
+    WRITE_REG(CORE2MUL_INT8_MULTIPLICAND(mul_index1), multiplicand);
+    WRITE_REG(CORE2MUL_INT8_MULTIPLIER(mul_index1), pre_result_msb);
 
     int data_ready = 0;
 
+    // second multiplier we be ready after the first. Than only needed is to polling on the second
      while(!data_ready) {
-            READ_REG(data_ready, CR_MUL2CORE_INT8_DONE_1);
+            READ_REG(data_ready, MUL2CORE_INT8_DONE(mul_index1));
     }
 
     int16_t result_lsb, result_msb; 
 
-    READ_REG(result_lsb, CR_MUL2CORE_INT8_0);
-    READ_REG(result_msb, CR_MUL2CORE_INT8_1);
+    READ_REG(result_lsb, MUL2CORE_INT8_RESULT(mul_index0));
+    READ_REG(result_msb, MUL2CORE_INT8_RESULT(mul_index1));
 
     if(lsb_was_neg) {
-        result_lsb = result_lsb + (multiplicand << 7);
+        result_lsb = result_lsb + (int16_t)(multiplicand << 7);
     }
 
     int32_t result;

diff --git a/verif/mini_core_accel/tests/int16_mul_int8.c b/verif/mini_core_accel/tests/int16_mul_int8.c
@@ -1,6 +1,6 @@
 // testing int16*int8 using int8 arithmetics
 
-//./build.py -dut mini_core_accel -test int16_mul_int8 -app -hw -sim 
+//./build.py -dut mini_core_accel -test int16_mul_int8 -app -hw -sim -clean
 
 #include "mini_core_accel_defines.h"
 #include "mafia_accel.h"
@@ -24,10 +24,10 @@ int main() {
     READ_REG(result16, CR_MUL2CORE_INT8_0); 
 
     int32_t result32;
-    result32 = mul_16by8(result16, weights[1]);
+    result32 = mul_16by8(result16, weights[1], 0, 1);
 
     // used for debug purposes
-    WRITE_REG(CR_DEBUG_0, result32);  //the result is 0x180
+    WRITE_REG(CR_DEBUG_0, result32);  //the result is 0xfffff880
 
     return 0;
 }
diff --git a/verif/mini_core_accel/tests/perceptron.c b/verif/mini_core_accel/tests/perceptron.c
@@ -0,0 +1,22 @@
+#include "mini_core_accel_defines.h"
+#include "mafia_accel.h"
+
+// ./build.py -dut mini_core_accel -test perceptron -app -hw -sim -clean
+
+int main() {
+
+    int8_t data   = 0x60;
+    int8_t weight = 0xfb;
+    int8_t bias   = 0x8;
+
+    int32_t output;
+
+    output = perceptron8_8(data, weight, bias, 5);
+
+    // used for debug purposes
+    WRITE_REG(CR_DEBUG_0, output);  // the result is d 0xfffffe28
+
+
+
+    return 0;
+}