Skip to content

Commit

Permalink
debug mesi & implement false sharing
Browse files Browse the repository at this point in the history
  • Loading branch information
yangminz committed Jun 6, 2021
1 parent 4475e50 commit 3e16dd4
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 97 deletions.
2 changes: 1 addition & 1 deletion src/hardware/cpu/sram.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ char *trace_ptr = (char *)&trace_buf;
typedef enum
{
CACHE_LINE_INVALID,
CACHE_LINE_CLEAN,
CACHE_LINE_CLEAN, // in MESI: E, S
CACHE_LINE_DIRTY
} sram_cacheline_state_t;

Expand Down
148 changes: 60 additions & 88 deletions src/mains/false_sharing.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,165 +5,137 @@
#include <stdint.h>
#include <stdio.h>
#include <time.h>
#include <assert.h>
#include <sched.h>

// different page size to make sure the spatial relation
#define PAGE_BYTES (4096)

int64_t result_page0[PAGE_BYTES / sizeof(uint64_t)];
int64_t result_page1[PAGE_BYTES / sizeof(uint64_t)];
int64_t result_page2[PAGE_BYTES / sizeof(uint64_t)];
int64_t result_page3[PAGE_BYTES / sizeof(uint64_t)];

int LENGTH = 200000000;
int64_t result_page0[PAGE_BYTES / sizeof(int64_t)];
int64_t result_page1[PAGE_BYTES / sizeof(int64_t)];
int64_t result_page2[PAGE_BYTES / sizeof(int64_t)];
int64_t result_page3[PAGE_BYTES / sizeof(int64_t)];

typedef struct
{
int64_t *cache_write_ptr;
int cpu_id;
int length;
} param_t;

void *work_thread(void *param)
{
param_t *p = (param_t *)param;
int64_t *w = p->cache_write_ptr;
int64_t *ptr = p->cache_write_ptr;
int cpu_id = p->cpu_id;
int length = p->length;

// try to run on cpu
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(cpu_id, &mask);
pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask);

// sync or async
printf(" * thread[%lu] running on cpu[%d] writes to %p\n",
pthread_self(), cpu_id, w);
pthread_self(), sched_getcpu(), ptr);

for (int i = 0; i < LENGTH; ++ i)
for (int i = 0; i < length; ++ i)
{
*w += 1;
// write - not thread safe
// just write to make cache line dirty
*ptr += 1;
}

return NULL;
}

void sequential_run()
{
// true sharing counting
pthread_t ts_tid_1, ts_tid_2;

int64_t seq_result;

param_t p = {
.cache_write_ptr = &seq_result,
.cpu_id = 0
};

printf("[Sequential]\n");

long t0 = clock();

pthread_create(&ts_tid_1, NULL, work_thread, (void *)&p);
pthread_join(ts_tid_1, NULL);

pthread_create(&ts_tid_2, NULL, work_thread, (void *)&p);
pthread_join(ts_tid_2, NULL);

printf(" Result %ld; elapsed tick tock: %ld\n", seq_result, clock() - t0);
}
int LENGTH = 200000000;

void true_sharing_run()
{
// true sharing counting
pthread_t ts_tid_1, ts_tid_2;
pthread_t t1, t2;

param_t param_t1 = {
param_t p1 = {
.cache_write_ptr = &result_page0[0],
.cpu_id = 0
.cpu_id = 0,
.length = LENGTH
};
param_t param_t2 = {

param_t p2 = {
.cache_write_ptr = &result_page0[0],
.cpu_id = 1
.cpu_id = 1,
.length = LENGTH
};

printf("[True sharing]\n");

long t0 = clock();

pthread_create(&ts_tid_1, NULL, work_thread, (void *)&param_t1);
pthread_create(&ts_tid_2, NULL, work_thread, (void *)&param_t2);

pthread_join(ts_tid_1, NULL);
pthread_join(ts_tid_2, NULL);
pthread_create(&t1, NULL, work_thread, (void *)&p1);
pthread_create(&t2, NULL, work_thread, (void *)&p2);

printf(" Result %ld; elapsed tick tock: %ld\n", result_page0[0], clock() - t0);
pthread_join(t1, NULL);
pthread_join(t2, NULL);

printf("[True Sharing]\n\tresult: %ld; elapsed tick tock: %ld\n",
result_page0[0],
clock() - t0);
}

void false_sharing_run()
{
// true sharing counting
pthread_t ts_tid_1, ts_tid_2;
pthread_t t1, t2;

param_t param_t1 = {
param_t p1 = {
.cache_write_ptr = &result_page1[0],
.cpu_id = 0
.cpu_id = 0,
.length = LENGTH
};
param_t param_t2 = {

param_t p2 = {
.cache_write_ptr = &result_page1[1],
.cpu_id = 1
.cpu_id = 1,
.length = LENGTH
};

printf("[False sharing]\n");

long t0 = clock();

pthread_create(&ts_tid_1, NULL, work_thread, (void *)&param_t1);
pthread_create(&ts_tid_2, NULL, work_thread, (void *)&param_t2);

pthread_join(ts_tid_1, NULL);
pthread_join(ts_tid_2, NULL);
pthread_create(&t1, NULL, work_thread, (void *)&p1);
pthread_create(&t2, NULL, work_thread, (void *)&p2);

printf(" Result %ld; elapsed tick tock: %ld\n", result_page1[0] + result_page1[1], clock() - t0);
pthread_join(t1, NULL);
pthread_join(t2, NULL);

printf("[False Sharing]\n\tresult: %ld; elapsed tick tock: %ld\n",
result_page1[0] + result_page1[1], clock() - t0);
}

void exclusive_run()
void no_sharing_run()
{
// true sharing counting
pthread_t ts_tid_1, ts_tid_2;
pthread_t t1, t2;

param_t param_t1 = {
param_t p1 = {
.cache_write_ptr = &result_page2[0],
.cpu_id = 0
.cpu_id = 0,
.length = LENGTH
};
param_t param_t2 = {

param_t p2 = {
.cache_write_ptr = &result_page3[0],
.cpu_id = 1
.cpu_id = 1,
.length = LENGTH
};

printf("[Exclusive]\n");

long t0 = clock();

pthread_create(&ts_tid_1, NULL, work_thread, (void *)&param_t1);
pthread_create(&ts_tid_2, NULL, work_thread, (void *)&param_t2);

pthread_join(ts_tid_1, NULL);
pthread_join(ts_tid_2, NULL);
pthread_create(&t1, NULL, work_thread, (void *)&p1);
pthread_create(&t2, NULL, work_thread, (void *)&p2);

printf(" Result %ld; elapsed tick tock: %ld\n\n",
pthread_join(t1, NULL);
pthread_join(t2, NULL);

printf("[No Sharing]\n\tresult: %ld; elapsed tick tock: %ld\n",
result_page2[0] + result_page3[0], clock() - t0);
}

int main()
{
assert((LENGTH % 0x1) == 0);
srand(12306);

sequential_run();
true_sharing_run();
false_sharing_run();
exclusive_run();

return 0;
no_sharing_run();
}
60 changes: 52 additions & 8 deletions src/mains/mesi.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ typedef struct
int value;
} line_t;

#define NUM_PROCESSOR (1000)
#define NUM_PROCESSOR (2048)

line_t cache[NUM_PROCESSOR];

Expand Down Expand Up @@ -64,7 +64,8 @@ int check_state()

if ((m_count == 1 && i_count == (NUM_PROCESSOR - 1)) ||
(e_count == 1 && i_count == (NUM_PROCESSOR - 1)) ||
(s_count >= 2 && i_count == (NUM_PROCESSOR - s_count)))
(s_count >= 2 && i_count == (NUM_PROCESSOR - s_count)) ||
(i_count == NUM_PROCESSOR))
{
return 1;
}
Expand All @@ -73,6 +74,8 @@ int check_state()
}

// i - the index of processor
// read_value - the address of read value
// int return - if this event is related with target physical address
int read_cacheline(int i, int *read_value)
{
if (cache[i].state == MODIFIED)
Expand Down Expand Up @@ -136,7 +139,7 @@ int read_cacheline(int i, int *read_value)
cache[i].value = cache[j].value;

// there are eaxctly 2 copies in processors
cache[i].state = SHARED;
cache[j].state = SHARED;

*read_value = cache[i].value;

Expand Down Expand Up @@ -179,6 +182,9 @@ int read_cacheline(int i, int *read_value)
return 0;
}

// i - the index of processor
// write_value - the value to be written to the physical address
// int return - if this event is related with target physical address
int write_cacheline(int i, int write_value)
{
if (cache[i].state == MODIFIED)
Expand Down Expand Up @@ -232,10 +238,17 @@ int write_cacheline(int i, int write_value)
{
if (cache[j].state == MODIFIED)
{
// write back
mem_value = cache[j].value;

// invalid old cache line
cache[j].state = INVALID;
cache[j].value = 0;

// write allocate
cache[i].value = mem_value;

// update to modified
cache[i].state = MODIFIED;
cache[i].value = write_value;

Expand Down Expand Up @@ -295,6 +308,8 @@ int write_cacheline(int i, int write_value)
return 0;
}

// i - the index of processor
// int return - if this event is related with target physical address
int evict_cacheline(int i)
{
if (cache[i].state == MODIFIED)
Expand All @@ -310,7 +325,7 @@ int evict_cacheline(int i)

return 1;
}
else if (cache[i].state == EXCLUSIVE || cache[i].state == SHARED)
else if (cache[i].state == EXCLUSIVE)
{
cache[i].state = INVALID;
cache[i].value = 0;
Expand All @@ -321,11 +336,41 @@ int evict_cacheline(int i)

return 1;
}
else if (cache[i].state == SHARED)
{
cache[i].state = INVALID;
cache[i].value = 0;

// may left only one shared to be exclusive
int s_count = 0;
int last_s = -1;

for (int j = 0; j < NUM_PROCESSOR; ++ j)
{
if (cache[j].state == SHARED)
{
last_s = j;
s_count ++;
}
}

if (s_count == 1)
{
cache[last_s].state = EXCLUSIVE;
}

#ifdef DEBUG
printf("[%d] evict\n", i);
#endif

return 1;
}

// evict when cache line is Invalid
// not related with target physical address
return 0;
}


void print_cacheline()
{
for (int i = 0; i < NUM_PROCESSOR; ++ i)
Expand Down Expand Up @@ -359,7 +404,6 @@ int main()
{
srand(123456);


int read_value;

for (int i = 0; i < NUM_PROCESSOR; ++ i)
Expand All @@ -372,7 +416,7 @@ int main()
print_cacheline();
#endif

for (int i = 0; i < 10000; ++ i)
for (int i = 0; i < 100000; ++ i)
{
int core_index = rand() % NUM_PROCESSOR;
int op = rand() % 3;
Expand All @@ -394,7 +438,7 @@ int main()
// printf("evict [%d]\n", core_index);
do_print = evict_cacheline(core_index);
}

#ifdef DEBUG
if (do_print)
{
Expand Down

0 comments on commit 3e16dd4

Please sign in to comment.