diff --git a/src/hardware/cpu/isa.c b/src/hardware/cpu/isa.c index 4599a4a..f15c884 100644 --- a/src/hardware/cpu/isa.c +++ b/src/hardware/cpu/isa.c @@ -521,7 +521,7 @@ static void mov_handler(od_t *src_od, od_t *dst_od) { // src: register // dst: virtual address - write64bits_dram( + cpu_write64bits_dram( va2pa(dst), *(uint64_t *)src); increase_pc(); @@ -532,7 +532,7 @@ static void mov_handler(od_t *src_od, od_t *dst_od) { // src: virtual address // dst: register - *(uint64_t *)dst = read64bits_dram(va2pa(src)); + *(uint64_t *)dst = cpu_read64bits_dram(va2pa(src)); increase_pc(); cpu_flags.__flags_value = 0; return; @@ -558,7 +558,7 @@ static void push_handler(od_t *src_od, od_t *dst_od) // src: register // dst: empty cpu_reg.rsp = cpu_reg.rsp - 8; - write64bits_dram( + cpu_write64bits_dram( va2pa(cpu_reg.rsp), *(uint64_t *)src); increase_pc(); @@ -576,7 +576,7 @@ static void pop_handler(od_t *src_od, od_t *dst_od) { // src: register // dst: empty - uint64_t old_val = read64bits_dram( + uint64_t old_val = cpu_read64bits_dram( va2pa(cpu_reg.rsp)); cpu_reg.rsp = cpu_reg.rsp + 8; *(uint64_t *)src = old_val; @@ -592,7 +592,7 @@ static void leave_handler(od_t *src_od, od_t *dst_od) cpu_reg.rsp = cpu_reg.rbp; // popq %rbp - uint64_t old_val = read64bits_dram( + uint64_t old_val = cpu_read64bits_dram( va2pa(cpu_reg.rsp)); cpu_reg.rsp = cpu_reg.rsp + 8; cpu_reg.rbp = old_val; @@ -609,7 +609,7 @@ static void call_handler(od_t *src_od, od_t *dst_od) // dst: empty // push the return value cpu_reg.rsp = cpu_reg.rsp - 8; - write64bits_dram( + cpu_write64bits_dram( va2pa(cpu_reg.rsp), cpu_pc.rip + sizeof(char) * MAX_INSTRUCTION_CHAR); // jump to target function address @@ -626,7 +626,7 @@ static void ret_handler(od_t *src_od, od_t *dst_od) // src: empty // dst: empty // pop rsp - uint64_t ret_addr = read64bits_dram( + uint64_t ret_addr = cpu_read64bits_dram( va2pa(cpu_reg.rsp)); cpu_reg.rsp = cpu_reg.rsp + 8; // jump to return address @@ -707,7 +707,7 @@ static void cmp_handler(od_t *src_od, od_t *dst_od) // src: register (value: int64_t bit map) // dst: register (value: int64_t bit map) // dst = dst - src = dst + (-src) - uint64_t dval = read64bits_dram(va2pa(dst)); + uint64_t dval = cpu_read64bits_dram(va2pa(dst)); uint64_t val = dval + (~src + 1); int val_sign = ((val >> 63) & 0x1); @@ -761,7 +761,7 @@ void instruction_cycle() { // FETCH: get the instruction string by program counter char inst_str[MAX_INSTRUCTION_CHAR + 10]; - readinst_dram(va2pa(cpu_pc.rip), inst_str); + cpu_readinst_dram(va2pa(cpu_pc.rip), inst_str); debug_printf(DEBUG_INSTRUCTIONCYCLE, "%8lx %s\n", cpu_pc.rip, inst_str); diff --git a/src/hardware/cpu/sram.c b/src/hardware/cpu/sram.c index 73d4198..6aeb7d2 100644 --- a/src/hardware/cpu/sram.c +++ b/src/hardware/cpu/sram.c @@ -9,10 +9,13 @@ */ #include "headers/address.h" +#include "headers/memory.h" #include +#include #define NUM_CACHE_LINE_PER_SET (8) +// write-back and write-allocate typedef enum { CACHE_LINE_INVALID, @@ -23,8 +26,9 @@ typedef enum typedef struct { sram_cacheline_state_t state; + int time; // timer to find LRU line inside one set uint64_t tag; - uint8_t block[(1 >> SRAM_CACHE_OFFSET_LENGTH)]; + uint8_t block[(1 << SRAM_CACHE_OFFSET_LENGTH)]; } sram_cacheline_t; typedef struct @@ -34,14 +38,44 @@ typedef struct typedef struct { - sram_cacheset_t sets[(1 >> SRAM_CACHE_INDEX_LENGTH)]; + sram_cacheset_t sets[(1 << SRAM_CACHE_INDEX_LENGTH)]; } sram_cache_t; static sram_cache_t cache; -uint8_t sram_cache_read(address_t paddr) +uint8_t sram_cache_read(uint64_t paddr_value) { + address_t paddr = { + .paddr_value = paddr_value, + }; + sram_cacheset_t set = cache.sets[paddr.CI]; + + // update LRU time + sram_cacheline_t *victim = NULL; + sram_cacheline_t *invalid = NULL; + int max_time = -1; + + for (int i = 0; i < NUM_CACHE_LINE_PER_SET; ++ i) + { + set.lines[i].time ++; + + if (max_time < set.lines[i].time) + { + // select this line as victim by LRU policy + // replace it when all lines are valid + victim = &(set.lines[i]); + max_time = set.lines[i].time; + } + + if (set.lines[i].state == CACHE_LINE_INVALID) + { + // exist one invalid line as candidate for cache miss + invalid = &(set.lines[i]); + } + } + + // try cache hit for (int i = 0; i < NUM_CACHE_LINE_PER_SET; ++ i) { sram_cacheline_t line = set.lines[i]; @@ -49,19 +83,167 @@ uint8_t sram_cache_read(address_t paddr) if (line.state != CACHE_LINE_INVALID && line.tag == paddr.CT) { // cache hit - // TODO: update LRU + // update LRU time + line.time = 0; + + // find the byte return line.block[paddr.CO]; } } // cache miss: load from memory - // TODO: update LRU - // TODO: select one victim by replacement policy if set is full - return 0; + // try to find one free cache line + if (invalid != NULL) + { + // load data from DRAM to this invalid cache line + bus_read_cacheline(paddr.paddr_value, &(invalid->block)); + + // update cache line state + invalid->state = CACHE_LINE_CLEAN; + + // update LRU + invalid->time = 0; + + // update tag + invalid->tag = paddr.CT; + + return invalid->block[paddr.CO]; + } + + // no free cache line, use LRU policy + assert(victim != NULL); + + if (victim->state == CACHE_LINE_DIRTY) + { + // write back the dirty line to dram + bus_write_cacheline(paddr.paddr_value, victim); + } + // if CACHE_LINE_CLEAN discard this victim directly + // update state + victim->state = CACHE_LINE_INVALID; + + // read from dram + // load data from DRAM to this invalid cache line + bus_read_cacheline(paddr.paddr_value, &(victim->block)); + + // update cache line state + victim->state = CACHE_LINE_CLEAN; + + // update LRU + victim->time = 0; + + // update tag + victim->tag = paddr.CT; + + return victim->block[paddr.CO]; } -void sram_cache_write(address_t paddr, uint8_t data) +void sram_cache_write(uint64_t paddr_value, uint8_t data) { - return; -} \ No newline at end of file + address_t paddr = { + .paddr_value = paddr_value, + }; + + sram_cacheset_t set = cache.sets[paddr.CI]; + + // update LRU time + sram_cacheline_t *victim = NULL; + sram_cacheline_t *invalid = NULL; // for write-allocate + int max_time = -1; + + for (int i = 0; i < NUM_CACHE_LINE_PER_SET; ++ i) + { + set.lines[i].time ++; + + if (max_time < set.lines[i].time) + { + // select this line as victim by LRU policy + // replace it when all lines are valid + victim = &(set.lines[i]); + max_time = set.lines[i].time; + } + + if (set.lines[i].state == CACHE_LINE_INVALID) + { + // exist one invalid line as candidate for cache miss + invalid = &(set.lines[i]); + } + } + + // try cache hit + for (int i = 0; i < NUM_CACHE_LINE_PER_SET; ++ i) + { + sram_cacheline_t line = set.lines[i]; + + if (line.state != CACHE_LINE_INVALID && line.tag == paddr.CT) + { + // cache hit + + // update LRU time + line.time = 0; + + // find the byte + line.block[paddr.CO] = data; + + // update state + line.state = CACHE_LINE_DIRTY; + + return; + } + } + + // cache miss: load from memory + + // write-allocate + + // try to find one free cache line + if (invalid != NULL) + { + // load data from DRAM to this invalid cache line + bus_read_cacheline(paddr.paddr_value, &(invalid->block)); + + // update cache line state + invalid->state = CACHE_LINE_DIRTY; + + // update LRU + invalid->time = 0; + + // update tag + invalid->tag = paddr.CT; + + // write data + invalid->block[paddr.CO] = data; + + return; + } + + // no free cache line, use LRU policy + assert(victim != NULL); + + if (victim->state == CACHE_LINE_DIRTY) + { + // write back the dirty line to dram + bus_write_cacheline(paddr.paddr_value, victim); + } + // if CACHE_LINE_CLEAN discard this victim directly + // update state + victim->state = CACHE_LINE_INVALID; + + // read from dram + // write-allocate + // load data from DRAM to this invalid cache line + bus_read_cacheline(paddr.paddr_value, &(victim->block)); + + // update cache line state + victim->state = CACHE_LINE_DIRTY; + + // update LRU + victim->time = 0; + + // update tag + victim->tag = paddr.CT; + + victim->block[paddr.CO] = data; +} + diff --git a/src/hardware/memory/dram.c b/src/hardware/memory/dram.c index 44af720..9977f80 100644 --- a/src/hardware/memory/dram.c +++ b/src/hardware/memory/dram.c @@ -14,6 +14,10 @@ #include "headers/cpu.h" #include "headers/memory.h" #include "headers/common.h" +#include "headers/address.h" + +uint8_t sram_cache_read(uint64_t paddr); +void sram_cache_write(uint64_t paddr, uint8_t data); /* Be careful with the x86-64 little endian integer encoding @@ -22,12 +26,18 @@ e.g. write 0x00007fd357a02ae0 to cache, the memory lapping should be: */ // memory accessing used in instructions -uint64_t read64bits_dram(uint64_t paddr) +uint64_t cpu_read64bits_dram(uint64_t paddr) { if (DEBUG_ENABLE_SRAM_CACHE == 1) { // try to load uint64_t from SRAM cache // little-endian + uint64_t val = 0x0; + for (int i = 0; i < 8; ++ i) + { + val += (sram_cache_read(paddr + i) << (i * 8)); + } + return val; } else { @@ -48,12 +58,17 @@ uint64_t read64bits_dram(uint64_t paddr) } } -void write64bits_dram(uint64_t paddr, uint64_t data) +void cpu_write64bits_dram(uint64_t paddr, uint64_t data) { if (DEBUG_ENABLE_SRAM_CACHE == 1) { // try to write uint64_t to SRAM cache // little-endian + for (int i = 0; i < 8; ++ i) + { + sram_cache_write(paddr + i, (data >> (i * 8)) & 0xff); + } + return; } else { @@ -70,7 +85,7 @@ void write64bits_dram(uint64_t paddr, uint64_t data) } } -void readinst_dram(uint64_t paddr, char *buf) +void cpu_readinst_dram(uint64_t paddr, char *buf) { for (int i = 0; i < MAX_INSTRUCTION_CHAR; ++ i) { @@ -78,7 +93,7 @@ void readinst_dram(uint64_t paddr, char *buf) } } -void writeinst_dram(uint64_t paddr, const char *str) +void cpu_writeinst_dram(uint64_t paddr, const char *str) { int len = strlen(str); assert(len < MAX_INSTRUCTION_CHAR); @@ -94,4 +109,28 @@ void writeinst_dram(uint64_t paddr, const char *str) pm[paddr + i] = 0; } } +} + + +/* interface of I/O Bus: read and write between the SRAM cache and DRAM memory + */ + +void bus_read_cacheline(uint64_t paddr, uint8_t *block) +{ + uint64_t dram_base = ((paddr >> SRAM_CACHE_OFFSET_LENGTH) << SRAM_CACHE_OFFSET_LENGTH); + + for (int i = 0; i < (1 << SRAM_CACHE_OFFSET_LENGTH); ++ i) + { + block[i] = pm[dram_base + i]; + } +} + +void bus_write_cacheline(uint64_t paddr, uint8_t *block) +{ + uint64_t dram_base = ((paddr >> SRAM_CACHE_OFFSET_LENGTH) << SRAM_CACHE_OFFSET_LENGTH); + + for (int i = 0; i < (1 << SRAM_CACHE_OFFSET_LENGTH); ++ i) + { + pm[dram_base + i] = block[i]; + } } \ No newline at end of file diff --git a/src/headers/memory.h b/src/headers/memory.h index 8885aee..71b9309 100644 --- a/src/headers/memory.h +++ b/src/headers/memory.h @@ -36,9 +36,13 @@ uint8_t pm[PHYSICAL_MEMORY_SPACE]; /*======================================*/ // used by instructions: read or write uint64_t to DRAM -uint64_t read64bits_dram(uint64_t paddr); -void write64bits_dram(uint64_t paddr, uint64_t data); -void readinst_dram(uint64_t paddr, char *buf); -void writeinst_dram(uint64_t paddr, const char *str); +uint64_t cpu_read64bits_dram(uint64_t paddr); +void cpu_write64bits_dram(uint64_t paddr, uint64_t data); +void cpu_readinst_dram(uint64_t paddr, char *buf); +void cpu_writeinst_dram(uint64_t paddr, const char *str); + + +void bus_read_cacheline(uint64_t paddr, uint8_t *block); +void bus_write_cacheline(uint64_t paddr, uint8_t *block); #endif \ No newline at end of file diff --git a/src/tests/test_machine.c b/src/tests/test_machine.c index 365f1e8..0969611 100644 --- a/src/tests/test_machine.c +++ b/src/tests/test_machine.c @@ -54,11 +54,11 @@ static void TestAddFunctionCallAndComputation() cpu_reg.rbp = 0x7ffffffee110; cpu_reg.rsp = 0x7ffffffee0f0; - write64bits_dram(va2pa(0x7ffffffee110), 0x0000000000000000); // rbp - write64bits_dram(va2pa(0x7ffffffee108), 0x0000000000000000); - write64bits_dram(va2pa(0x7ffffffee100), 0x0000000012340000); - write64bits_dram(va2pa(0x7ffffffee0f8), 0x000000000000abcd); - write64bits_dram(va2pa(0x7ffffffee0f0), 0x0000000000000000); // rsp + cpu_write64bits_dram(va2pa(0x7ffffffee110), 0x0000000000000000); // rbp + cpu_write64bits_dram(va2pa(0x7ffffffee108), 0x0000000000000000); + cpu_write64bits_dram(va2pa(0x7ffffffee100), 0x0000000012340000); + cpu_write64bits_dram(va2pa(0x7ffffffee0f8), 0x000000000000abcd); + cpu_write64bits_dram(va2pa(0x7ffffffee0f0), 0x0000000000000000); // rsp // 2 before call // 3 after call before push @@ -87,7 +87,7 @@ static void TestAddFunctionCallAndComputation() // copy to physical memory for (int i = 0; i < 15; ++ i) { - writeinst_dram(va2pa(i * 0x40 + 0x00400000), assembly[i]); + cpu_writeinst_dram(va2pa(i * 0x40 + 0x00400000), assembly[i]); } cpu_pc.rip = MAX_INSTRUCTION_CHAR * sizeof(char) * 11 + 0x00400000; @@ -121,11 +121,11 @@ static void TestAddFunctionCallAndComputation() printf("register mismatch\n"); } - match = match && (read64bits_dram(va2pa(0x7ffffffee110)) == 0x0000000000000000); // rbp - match = match && (read64bits_dram(va2pa(0x7ffffffee108)) == 0x000000001234abcd); - match = match && (read64bits_dram(va2pa(0x7ffffffee100)) == 0x0000000012340000); - match = match && (read64bits_dram(va2pa(0x7ffffffee0f8)) == 0x000000000000abcd); - match = match && (read64bits_dram(va2pa(0x7ffffffee0f0)) == 0x0000000000000000); // rsp + match = match && (cpu_read64bits_dram(va2pa(0x7ffffffee110)) == 0x0000000000000000); // rbp + match = match && (cpu_read64bits_dram(va2pa(0x7ffffffee108)) == 0x000000001234abcd); + match = match && (cpu_read64bits_dram(va2pa(0x7ffffffee100)) == 0x0000000012340000); + match = match && (cpu_read64bits_dram(va2pa(0x7ffffffee0f8)) == 0x000000000000abcd); + match = match && (cpu_read64bits_dram(va2pa(0x7ffffffee0f0)) == 0x0000000000000000); // rsp if (match) { @@ -151,9 +151,9 @@ static void TestSumRecursiveCondition() cpu_flags.__flags_value = 0; - write64bits_dram(va2pa(0x7ffffffee230), 0x0000000008000650); // rbp - write64bits_dram(va2pa(0x7ffffffee228), 0x0000000000000000); - write64bits_dram(va2pa(0x7ffffffee220), 0x00007ffffffee310); // rsp + cpu_write64bits_dram(va2pa(0x7ffffffee230), 0x0000000008000650); // rbp + cpu_write64bits_dram(va2pa(0x7ffffffee228), 0x0000000000000000); + cpu_write64bits_dram(va2pa(0x7ffffffee220), 0x00007ffffffee310); // rsp char assembly[19][MAX_INSTRUCTION_CHAR] = { "push %rbp", // 0 @@ -180,7 +180,7 @@ static void TestSumRecursiveCondition() // copy to physical memory for (int i = 0; i < 19; ++ i) { - writeinst_dram(va2pa(i * 0x40 + 0x00400000), assembly[i]); + cpu_writeinst_dram(va2pa(i * 0x40 + 0x00400000), assembly[i]); } cpu_pc.rip = MAX_INSTRUCTION_CHAR * sizeof(char) * 16 + 0x00400000; @@ -215,9 +215,9 @@ static void TestSumRecursiveCondition() printf("register mismatch\n"); } - match = match && (read64bits_dram(va2pa(0x7ffffffee230)) == 0x0000000008000650); // rbp - match = match && (read64bits_dram(va2pa(0x7ffffffee228)) == 0x0000000000000006); - match = match && (read64bits_dram(va2pa(0x7ffffffee220)) == 0x00007ffffffee310); // rsp + match = match && (cpu_read64bits_dram(va2pa(0x7ffffffee230)) == 0x0000000008000650); // rbp + match = match && (cpu_read64bits_dram(va2pa(0x7ffffffee228)) == 0x0000000000000006); + match = match && (cpu_read64bits_dram(va2pa(0x7ffffffee220)) == 0x00007ffffffee310); // rsp if (match) {