From 3e16dd4725098d1bfda3320d325dbf7c319741c3 Mon Sep 17 00:00:00 2001 From: yangminz Date: Sun, 6 Jun 2021 23:02:21 +0800 Subject: [PATCH] debug mesi & implement false sharing --- src/hardware/cpu/sram.c | 2 +- src/mains/false_sharing.c | 148 ++++++++++++++++---------------------- src/mains/mesi.c | 60 +++++++++++++--- 3 files changed, 113 insertions(+), 97 deletions(-) diff --git a/src/hardware/cpu/sram.c b/src/hardware/cpu/sram.c index 7d32108..6034aea 100644 --- a/src/hardware/cpu/sram.c +++ b/src/hardware/cpu/sram.c @@ -43,7 +43,7 @@ char *trace_ptr = (char *)&trace_buf; typedef enum { CACHE_LINE_INVALID, - CACHE_LINE_CLEAN, + CACHE_LINE_CLEAN, // in MESI: E, S CACHE_LINE_DIRTY } sram_cacheline_state_t; diff --git a/src/mains/false_sharing.c b/src/mains/false_sharing.c index 5ab2ec2..4566678 100644 --- a/src/mains/false_sharing.c +++ b/src/mains/false_sharing.c @@ -5,165 +5,137 @@ #include #include #include -#include #include -// different page size to make sure the spatial relation #define PAGE_BYTES (4096) -int64_t result_page0[PAGE_BYTES / sizeof(uint64_t)]; -int64_t result_page1[PAGE_BYTES / sizeof(uint64_t)]; -int64_t result_page2[PAGE_BYTES / sizeof(uint64_t)]; -int64_t result_page3[PAGE_BYTES / sizeof(uint64_t)]; - -int LENGTH = 200000000; +int64_t result_page0[PAGE_BYTES / sizeof(int64_t)]; +int64_t result_page1[PAGE_BYTES / sizeof(int64_t)]; +int64_t result_page2[PAGE_BYTES / sizeof(int64_t)]; +int64_t result_page3[PAGE_BYTES / sizeof(int64_t)]; typedef struct { int64_t *cache_write_ptr; int cpu_id; + int length; } param_t; void *work_thread(void *param) { param_t *p = (param_t *)param; - int64_t *w = p->cache_write_ptr; + int64_t *ptr = p->cache_write_ptr; int cpu_id = p->cpu_id; + int length = p->length; - // try to run on cpu cpu_set_t mask; CPU_ZERO(&mask); CPU_SET(cpu_id, &mask); pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask); - // sync or async printf(" * thread[%lu] running on cpu[%d] writes to %p\n", - pthread_self(), cpu_id, w); + pthread_self(), sched_getcpu(), ptr); - for (int i = 0; i < LENGTH; ++ i) + for (int i = 0; i < length; ++ i) { - *w += 1; + // write - not thread safe + // just write to make cache line dirty + *ptr += 1; } return NULL; } -void sequential_run() -{ - // true sharing counting - pthread_t ts_tid_1, ts_tid_2; - - int64_t seq_result; - - param_t p = { - .cache_write_ptr = &seq_result, - .cpu_id = 0 - }; - - printf("[Sequential]\n"); - - long t0 = clock(); - - pthread_create(&ts_tid_1, NULL, work_thread, (void *)&p); - pthread_join(ts_tid_1, NULL); - - pthread_create(&ts_tid_2, NULL, work_thread, (void *)&p); - pthread_join(ts_tid_2, NULL); - - printf(" Result %ld; elapsed tick tock: %ld\n", seq_result, clock() - t0); -} +int LENGTH = 200000000; void true_sharing_run() { - // true sharing counting - pthread_t ts_tid_1, ts_tid_2; + pthread_t t1, t2; - param_t param_t1 = { + param_t p1 = { .cache_write_ptr = &result_page0[0], - .cpu_id = 0 + .cpu_id = 0, + .length = LENGTH }; - param_t param_t2 = { + + param_t p2 = { .cache_write_ptr = &result_page0[0], - .cpu_id = 1 + .cpu_id = 1, + .length = LENGTH }; - printf("[True sharing]\n"); - long t0 = clock(); - - pthread_create(&ts_tid_1, NULL, work_thread, (void *)¶m_t1); - pthread_create(&ts_tid_2, NULL, work_thread, (void *)¶m_t2); - pthread_join(ts_tid_1, NULL); - pthread_join(ts_tid_2, NULL); + pthread_create(&t1, NULL, work_thread, (void *)&p1); + pthread_create(&t2, NULL, work_thread, (void *)&p2); - printf(" Result %ld; elapsed tick tock: %ld\n", result_page0[0], clock() - t0); + pthread_join(t1, NULL); + pthread_join(t2, NULL); + + printf("[True Sharing]\n\tresult: %ld; elapsed tick tock: %ld\n", + result_page0[0], + clock() - t0); } void false_sharing_run() { - // true sharing counting - pthread_t ts_tid_1, ts_tid_2; + pthread_t t1, t2; - param_t param_t1 = { + param_t p1 = { .cache_write_ptr = &result_page1[0], - .cpu_id = 0 + .cpu_id = 0, + .length = LENGTH }; - param_t param_t2 = { + + param_t p2 = { .cache_write_ptr = &result_page1[1], - .cpu_id = 1 + .cpu_id = 1, + .length = LENGTH }; - printf("[False sharing]\n"); - long t0 = clock(); - - pthread_create(&ts_tid_1, NULL, work_thread, (void *)¶m_t1); - pthread_create(&ts_tid_2, NULL, work_thread, (void *)¶m_t2); - pthread_join(ts_tid_1, NULL); - pthread_join(ts_tid_2, NULL); + pthread_create(&t1, NULL, work_thread, (void *)&p1); + pthread_create(&t2, NULL, work_thread, (void *)&p2); - printf(" Result %ld; elapsed tick tock: %ld\n", result_page1[0] + result_page1[1], clock() - t0); + pthread_join(t1, NULL); + pthread_join(t2, NULL); + + printf("[False Sharing]\n\tresult: %ld; elapsed tick tock: %ld\n", + result_page1[0] + result_page1[1], clock() - t0); } -void exclusive_run() +void no_sharing_run() { - // true sharing counting - pthread_t ts_tid_1, ts_tid_2; + pthread_t t1, t2; - param_t param_t1 = { + param_t p1 = { .cache_write_ptr = &result_page2[0], - .cpu_id = 0 + .cpu_id = 0, + .length = LENGTH }; - param_t param_t2 = { + + param_t p2 = { .cache_write_ptr = &result_page3[0], - .cpu_id = 1 + .cpu_id = 1, + .length = LENGTH }; - printf("[Exclusive]\n"); - long t0 = clock(); - - pthread_create(&ts_tid_1, NULL, work_thread, (void *)¶m_t1); - pthread_create(&ts_tid_2, NULL, work_thread, (void *)¶m_t2); - pthread_join(ts_tid_1, NULL); - pthread_join(ts_tid_2, NULL); + pthread_create(&t1, NULL, work_thread, (void *)&p1); + pthread_create(&t2, NULL, work_thread, (void *)&p2); - printf(" Result %ld; elapsed tick tock: %ld\n\n", + pthread_join(t1, NULL); + pthread_join(t2, NULL); + + printf("[No Sharing]\n\tresult: %ld; elapsed tick tock: %ld\n", result_page2[0] + result_page3[0], clock() - t0); } int main() { - assert((LENGTH % 0x1) == 0); - srand(12306); - - sequential_run(); true_sharing_run(); false_sharing_run(); - exclusive_run(); - - return 0; + no_sharing_run(); } \ No newline at end of file diff --git a/src/mains/mesi.c b/src/mains/mesi.c index e6b5781..521316c 100644 --- a/src/mains/mesi.c +++ b/src/mains/mesi.c @@ -17,7 +17,7 @@ typedef struct int value; } line_t; -#define NUM_PROCESSOR (1000) +#define NUM_PROCESSOR (2048) line_t cache[NUM_PROCESSOR]; @@ -64,7 +64,8 @@ int check_state() if ((m_count == 1 && i_count == (NUM_PROCESSOR - 1)) || (e_count == 1 && i_count == (NUM_PROCESSOR - 1)) || - (s_count >= 2 && i_count == (NUM_PROCESSOR - s_count))) + (s_count >= 2 && i_count == (NUM_PROCESSOR - s_count)) || + (i_count == NUM_PROCESSOR)) { return 1; } @@ -73,6 +74,8 @@ int check_state() } // i - the index of processor +// read_value - the address of read value +// int return - if this event is related with target physical address int read_cacheline(int i, int *read_value) { if (cache[i].state == MODIFIED) @@ -136,7 +139,7 @@ int read_cacheline(int i, int *read_value) cache[i].value = cache[j].value; // there are eaxctly 2 copies in processors - cache[i].state = SHARED; + cache[j].state = SHARED; *read_value = cache[i].value; @@ -179,6 +182,9 @@ int read_cacheline(int i, int *read_value) return 0; } +// i - the index of processor +// write_value - the value to be written to the physical address +// int return - if this event is related with target physical address int write_cacheline(int i, int write_value) { if (cache[i].state == MODIFIED) @@ -232,10 +238,17 @@ int write_cacheline(int i, int write_value) { if (cache[j].state == MODIFIED) { + // write back mem_value = cache[j].value; + + // invalid old cache line cache[j].state = INVALID; cache[j].value = 0; + // write allocate + cache[i].value = mem_value; + + // update to modified cache[i].state = MODIFIED; cache[i].value = write_value; @@ -295,6 +308,8 @@ int write_cacheline(int i, int write_value) return 0; } +// i - the index of processor +// int return - if this event is related with target physical address int evict_cacheline(int i) { if (cache[i].state == MODIFIED) @@ -310,7 +325,7 @@ int evict_cacheline(int i) return 1; } - else if (cache[i].state == EXCLUSIVE || cache[i].state == SHARED) + else if (cache[i].state == EXCLUSIVE) { cache[i].state = INVALID; cache[i].value = 0; @@ -321,11 +336,41 @@ int evict_cacheline(int i) return 1; } + else if (cache[i].state == SHARED) + { + cache[i].state = INVALID; + cache[i].value = 0; + + // may left only one shared to be exclusive + int s_count = 0; + int last_s = -1; + for (int j = 0; j < NUM_PROCESSOR; ++ j) + { + if (cache[j].state == SHARED) + { + last_s = j; + s_count ++; + } + } + + if (s_count == 1) + { + cache[last_s].state = EXCLUSIVE; + } + + #ifdef DEBUG + printf("[%d] evict\n", i); + #endif + + return 1; + } + + // evict when cache line is Invalid + // not related with target physical address return 0; } - void print_cacheline() { for (int i = 0; i < NUM_PROCESSOR; ++ i) @@ -359,7 +404,6 @@ int main() { srand(123456); - int read_value; for (int i = 0; i < NUM_PROCESSOR; ++ i) @@ -372,7 +416,7 @@ int main() print_cacheline(); #endif - for (int i = 0; i < 10000; ++ i) + for (int i = 0; i < 100000; ++ i) { int core_index = rand() % NUM_PROCESSOR; int op = rand() % 3; @@ -394,7 +438,7 @@ int main() // printf("evict [%d]\n", core_index); do_print = evict_cacheline(core_index); } - + #ifdef DEBUG if (do_print) {