diff --git a/cmd.py b/cmd.py index 4d8700c..1f8c9a7 100644 --- a/cmd.py +++ b/cmd.py @@ -143,11 +143,10 @@ def build(key): "./src/common/print.c", "./src/common/convert.c", "./src/common/cleanup.c", - "./src/datastruct/trie.c", - "./src/datastruct/array.c", + "./src/algorithm/trie.c", + "./src/algorithm/array.c", "./src/hardware/cpu/isa.c", "./src/hardware/cpu/mmu.c", - "./src/hardware/memory/dram.c", "-o", EXE_BIN_MACHINE ] ], @@ -161,9 +160,9 @@ def build(key): "./src/common/convert.c", "./src/common/tagmalloc.c", "./src/common/cleanup.c", - "./src/datastruct/array.c", - "./src/datastruct/hashtable.c", - "./src/datastruct/linkedlist.c", + "./src/algorithm/array.c", + "./src/algorithm/hashtable.c", + "./src/algorithm/linkedlist.c", "./src/linker/parseElf.c", "./src/linker/staticlink.c", "-o", EXE_BIN_LINKER @@ -177,12 +176,12 @@ def build(key): "./src/common/convert.c", "./src/common/tagmalloc.c", "./src/common/cleanup.c", - "./src/datastruct/array.c", - "./src/datastruct/hashtable.c", - "./src/datastruct/linkedlist.c", + "./src/algorithm/array.c", + "./src/algorithm/hashtable.c", + "./src/algorithm/linkedlist.c", "./src/linker/parseElf.c", "./src/linker/staticlink.c", - "-o", "./bin/staticlink.so" + "-o", "./bin/staticLinker.so" ], [ "/usr/bin/gcc-7", @@ -192,9 +191,9 @@ def build(key): "./src/common/convert.c", "./src/common/tagmalloc.c", "./src/common/cleanup.c", - "./src/datastruct/array.c", - "./src/datastruct/hashtable.c", - "./src/datastruct/linkedlist.c", + "./src/algorithm/array.c", + "./src/algorithm/hashtable.c", + "./src/algorithm/linkedlist.c", "./src/linker/linker.c", "-ldl", "-o", "./bin/link" ], @@ -210,13 +209,14 @@ def build(key): def run(key): assert(os.path.isdir("./bin/")) bin_map = { - KEY_MACHINE : EXE_BIN_MACHINE, - KEY_LINKER : EXE_BIN_LINKER + KEY_MACHINE : [EXE_BIN_MACHINE], + KEY_LINKER : [EXE_BIN_LINKER], + "dll" : ["./bin/link", "main", "sum", "-o", "output"], } if not key in bin_map: print("input the correct binary key:", bin_map.keys()) exit() - subprocess.run([bin_map[key]]) + subprocess.run(bin_map[key]) def debug(key): assert(os.path.isdir("./bin/")) diff --git a/files/exe/output.eof.txt b/files/exe/output.eof.txt new file mode 100644 index 0000000..b46605a --- /dev/null +++ b/files/exe/output.eof.txt @@ -0,0 +1,44 @@ +44 +3 +.text,0x400000,5,32 +.data,0x400800,37,3 +.symtab,0x0,40,4 +push %rbp +mov %rsp,%rbp +sub $0x10,%rsp +mov $0x2,%esi +lea 0x0000000000000948(%rip),%rdi +callq 0x0000000000000160 +mov %rax,-0x8(%rbp) +mov -0x8(%rbp),%rax +leaveq +retq +push %rbp +mov %rsp,%rbp +mov %rdi,-0x18(%rbp) +mov %rsi,-0x20(%rbp) +movq $0x0,-0x8(%rbp) +movq $0x0,-0x10(%rbp) +jmp 3d +mov -0x10(%rbp),%rax +lea 0x0(,%rax,8),%rdx +mov -0x18(%rbp),%rax +add %rdx,%rax +mov (%rax),%rax +add %rax,-0x8(%rbp) +addq $0x1,-0x10(%rbp) +mov -0x10(%rbp),%rax +cmp -0x20(%rbp),%rax +jb 1e +mov 0x0000000000000170(%rip),%rdx +mov -0x8(%rbp),%rax +add %rdx,%rax +pop %rbp +retq +0x0000000012340000 +0x000000000000abcd +0x0000000f00000000 +main,STB_GLOBAL,STT_FUNC,.text,0,10 +sum,STB_GLOBAL,STT_FUNC,.text,10,22 +array,STB_GLOBAL,STT_OBJECT,.data,0,2 +bias,STB_GLOBAL,STT_OBJECT,.data,2,1 diff --git a/src/datastruct/array.c b/src/algorithm/array.c similarity index 98% rename from src/datastruct/array.c rename to src/algorithm/array.c index 18966dc..9919035 100644 --- a/src/datastruct/array.c +++ b/src/algorithm/array.c @@ -12,7 +12,7 @@ #include #include #include "headers/common.h" -#include "headers/datastruct.h" +#include "headers/algorithm.h" array_t *array_construct(int size) { diff --git a/src/datastruct/hashtable.c b/src/algorithm/hashtable.c similarity index 97% rename from src/datastruct/hashtable.c rename to src/algorithm/hashtable.c index cb158c0..bece8d5 100644 --- a/src/datastruct/hashtable.c +++ b/src/algorithm/hashtable.c @@ -14,7 +14,7 @@ #include #include #include "headers/common.h" -#include "headers/datastruct.h" +#include "headers/algorithm.h" static uint64_t hash_function(char *str) { @@ -71,6 +71,9 @@ void hashtable_free(hashtable_t *tab) return; } + debug_printf(DEBUG_DATASTRUCTURE, "free hashtable:\n"); + print_hashtable(tab); + for (int i = 0; i < tab->num; ++ i) { hashtable_bucket_t *b = tab->directory[i]; @@ -79,7 +82,7 @@ void hashtable_free(hashtable_t *tab) continue; } - for (int j = 0; j < tab->size; ++ j) + for (int j = 0; j < b->counter; ++ j) { if (b->karray != NULL && b->karray[j] != NULL) { diff --git a/src/datastruct/linkedlist.c b/src/algorithm/linkedlist.c similarity index 99% rename from src/datastruct/linkedlist.c rename to src/algorithm/linkedlist.c index 6f90043..9564e91 100644 --- a/src/datastruct/linkedlist.c +++ b/src/algorithm/linkedlist.c @@ -11,7 +11,7 @@ #include #include #include -#include "headers/datastruct.h" +#include "headers/algorithm.h" // constructor and destructor linkedlist_t *linkedlist_construct() diff --git a/src/datastruct/trie.c b/src/algorithm/trie.c similarity index 98% rename from src/datastruct/trie.c rename to src/algorithm/trie.c index 2574987..b7bf8cc 100644 --- a/src/datastruct/trie.c +++ b/src/algorithm/trie.c @@ -15,7 +15,7 @@ #include "headers/cpu.h" #include "headers/memory.h" #include "headers/common.h" -#include "headers/datastruct.h" +#include "headers/algorithm.h" static int get_index(char c) { diff --git a/src/common/cleanup.c b/src/common/cleanup.c index 84d6651..bdbf0ed 100644 --- a/src/common/cleanup.c +++ b/src/common/cleanup.c @@ -12,7 +12,7 @@ #include #include #include "headers/common.h" -#include "headers/datastruct.h" +#include "headers/algorithm.h" typedef void (*cleanup_t)(); diff --git a/src/common/tagmalloc.c b/src/common/tagmalloc.c index bcbf13d..e09a873 100644 --- a/src/common/tagmalloc.c +++ b/src/common/tagmalloc.c @@ -13,7 +13,7 @@ #include #include #include "headers/common.h" -#include "headers/datastruct.h" +#include "headers/algorithm.h" static uint64_t compute_tag(char *str); static void tag_destroy(); diff --git a/src/hardware/cpu/isa.c b/src/hardware/cpu/isa.c index 90c833c..4599a4a 100644 --- a/src/hardware/cpu/isa.c +++ b/src/hardware/cpu/isa.c @@ -14,63 +14,8 @@ #include "headers/cpu.h" #include "headers/memory.h" #include "headers/common.h" -#include "headers/datastruct.h" - -/*======================================*/ -/* instruction set architecture */ -/*======================================*/ - -// data structures -typedef enum INST_OPERATOR -{ - INST_MOV, // 0 - INST_PUSH, // 1 - INST_POP, // 2 - INST_LEAVE, // 3 - INST_CALL, // 4 - INST_RET, // 5 - INST_ADD, // 6 - INST_SUB, // 7 - INST_CMP, // 8 - INST_JNE, // 9 - INST_JMP, // 10 -} op_t; - -typedef enum OPERAND_TYPE -{ - EMPTY, // 0 - IMM, // 1 - REG, // 2 - MEM_IMM, // 3 - MEM_REG1, // 4 - MEM_IMM_REG1, // 5 - MEM_REG1_REG2, // 6 - MEM_IMM_REG1_REG2, // 7 - MEM_REG2_SCAL, // 8 - MEM_IMM_REG2_SCAL, // 9 - MEM_REG1_REG2_SCAL, // 10 - MEM_IMM_REG1_REG2_SCAL // 11 -} od_type_t; - -typedef struct OPERAND_STRUCT -{ - od_type_t type; // IMM, REG, MEM - uint64_t imm; // immediate number - uint64_t scal; // scale number to register 2 - uint64_t reg1; // main register - uint64_t reg2; // register 2 -} od_t; - -// local variables are allocated in stack in run-time -// we don't consider local STATIC variables -// ref: Computer Systems: A Programmer's Perspective 3rd -// Chapter 7 Linking: 7.5 Symbols and Symbol Tables -typedef struct INST_STRUCT -{ - op_t op; // enum of operators. e.g. mov, call, etc. - od_t src; // operand src of instruction - od_t dst; // operand dst of instruction -} inst_t; +#include "headers/algorithm.h" +#include "headers/instruction.h" /*======================================*/ /* parse assembly instruction */ @@ -668,6 +613,7 @@ static void call_handler(od_t *src_od, od_t *dst_od) va2pa(cpu_reg.rsp), cpu_pc.rip + sizeof(char) * MAX_INSTRUCTION_CHAR); // jump to target function address + // TODO: support PC relative addressing cpu_pc.rip = src; cpu_flags.__flags_value = 0; } diff --git a/src/headers/datastruct.h b/src/headers/algorithm.h similarity index 99% rename from src/headers/datastruct.h rename to src/headers/algorithm.h index a024792..1a07244 100644 --- a/src/headers/datastruct.h +++ b/src/headers/algorithm.h @@ -15,7 +15,6 @@ #include - /*======================================*/ /* Circular Doubly Linked List */ /*======================================*/ diff --git a/src/headers/instruction.h b/src/headers/instruction.h new file mode 100644 index 0000000..c668daf --- /dev/null +++ b/src/headers/instruction.h @@ -0,0 +1,74 @@ +/* BCST - Introduction to Computer Systems + * Author: yangminz@outlook.com + * Github: https://github.com/yangminz/bcst_csapp + * Bilibili: https://space.bilibili.com/4564101 + * Zhihu: https://www.zhihu.com/people/zhao-yang-min + * This project (code repository and videos) is exclusively owned by yangminz + * and shall not be used for commercial and profitting purpose + * without yangminz's permission. + */ + +// include guards to prevent double declaration of any identifiers +// such as types, enums and static variables +#ifndef INSTRUCTION_GUARD +#define INSTRUCTION_GUARD + +#include + +/*======================================*/ +/* instruction set architecture */ +/*======================================*/ + +// data structures +typedef enum INST_OPERATOR +{ + INST_MOV, // 0 + INST_PUSH, // 1 + INST_POP, // 2 + INST_LEAVE, // 3 + INST_CALL, // 4 + INST_RET, // 5 + INST_ADD, // 6 + INST_SUB, // 7 + INST_CMP, // 8 + INST_JNE, // 9 + INST_JMP, // 10 +} op_t; + +typedef enum OPERAND_TYPE +{ + EMPTY, // 0 + IMM, // 1 + REG, // 2 + MEM_IMM, // 3 + MEM_REG1, // 4 + MEM_IMM_REG1, // 5 + MEM_REG1_REG2, // 6 + MEM_IMM_REG1_REG2, // 7 + MEM_REG2_SCAL, // 8 + MEM_IMM_REG2_SCAL, // 9 + MEM_REG1_REG2_SCAL, // 10 + MEM_IMM_REG1_REG2_SCAL // 11 +} od_type_t; + +typedef struct OPERAND_STRUCT +{ + od_type_t type; // IMM, REG, MEM + uint64_t imm; // immediate number + uint64_t scal; // scale number to register 2 + uint64_t reg1; // main register + uint64_t reg2; // register 2 +} od_t; + +// local variables are allocated in stack in run-time +// we don't consider local STATIC variables +// ref: Computer Systems: A Programmer's Perspective 3rd +// Chapter 7 Linking: 7.5 Symbols and Symbol Tables +typedef struct INST_STRUCT +{ + op_t op; // enum of operators. e.g. mov, call, etc. + od_t src; // operand src of instruction + od_t dst; // operand dst of instruction +} inst_t; + +#endif \ No newline at end of file diff --git a/src/headers/linker.h b/src/headers/linker.h index eebb727..0e4809c 100644 --- a/src/headers/linker.h +++ b/src/headers/linker.h @@ -15,7 +15,7 @@ #include #include -#include "headers/datastruct.h" +#include "headers/algorithm.h" #define MAX_CHAR_SECTION_NAME (32) @@ -104,5 +104,6 @@ typedef struct void parse_elf(char *filename, elf_t *elf); void free_elf(elf_t *elf); void link_elf(elf_t **srcs, int num_srcs, elf_t *dst); +void write_eof(const char *filename, elf_t *eof); #endif \ No newline at end of file diff --git a/src/linker/linker.c b/src/linker/linker.c index 6c9df92..8435189 100644 --- a/src/linker/linker.c +++ b/src/linker/linker.c @@ -68,21 +68,24 @@ int main(int argc, char **argv) void (*link_elf)(elf_t **, int, elf_t *); void (*write_eof)(const char *, elf_t *); void (*parse_elf)(const char *, elf_t *); + void (*free_elf)(elf_t *elf); + link_elf = dlsym(linklib, "link_elf"); write_eof = dlsym(linklib, "write_eof"); parse_elf = dlsym(linklib, "parse_elf"); + free_elf = dlsym(linklib, "free_elf"); // do front end logic printf("we are DYNAMICALLY LINKING ./bin/linker.so to do STATIC linking:\nlinking "); - elf_t **srcs = malloc(elf_num * sizeof(elf_t *)); + elf_t **srcs = tag_malloc(elf_num * sizeof(elf_t *), "link"); for (int i = 0; i < elf_num; ++ i) { char elf_fullpath[100]; sprintf(elf_fullpath, "%s/%s.elf.txt", EXECUTABLE_DIRECTORY, elf_fn[i]); - printf("%s ", elf_fullpath); + printf("%s\n", elf_fullpath); - srcs[i] = malloc(sizeof(elf_t)); + srcs[i] = tag_malloc(sizeof(elf_t), "link"); parse_elf(elf_fullpath, srcs[i]); } @@ -98,9 +101,10 @@ int main(int argc, char **argv) // releaes elf heap for (int i = 0; i < elf_num; ++ i) { - free(srcs[i]); + free_elf(srcs[i]); } - free(srcs); + + tag_free(srcs); return 0; } diff --git a/src/linker/staticlink.c b/src/linker/staticlink.c index 3c8adb4..83d1158 100644 --- a/src/linker/staticlink.c +++ b/src/linker/staticlink.c @@ -14,6 +14,7 @@ #include #include "headers/linker.h" #include "headers/common.h" +#include "headers/instruction.h" #define MAX_SYMBOL_MAP_LENGTH 64 #define MAX_SECTION_BUFFER_LENGTH 64 @@ -53,9 +54,6 @@ static void R_X86_64_32_handler(elf_t *dst, sh_entry_t *sh, static void R_X86_64_PC32_handler(elf_t *dst, sh_entry_t *sh, int row_referencing, int col_referencing, int addend, st_entry_t *sym_referenced); -static void R_X86_64_PLT32_handler(elf_t *dst, sh_entry_t *sh, - int row_referencing, int col_referencing, int addend, - st_entry_t *sym_referenced); typedef void (*rela_handler_t)(elf_t *dst, sh_entry_t *sh, int row_referencing, int col_referencing, int addend, @@ -65,6 +63,7 @@ static rela_handler_t handler_table[3] = { &R_X86_64_32_handler, // 0 &R_X86_64_PC32_handler, // 1 // linux commit b21ebf2: x86: Treat R_X86_64_PLT32 as R_X86_64_PC32 + // https://github.com/torvalds/linux/commit/b21ebf2fb4cde1618915a97cc773e287ff49173e &R_X86_64_PC32_handler, // 2 }; @@ -755,35 +754,77 @@ static void relocation_processing(elf_t **srcs, int num_srcs, elf_t *dst, static uint64_t get_symbol_runtime_address(elf_t *dst, st_entry_t *sym) { - // TODO: get the run-time address of symbol - return 0; + // get the run-time address of symbol + uint64_t base = 0x00400000; + + uint64_t text_base = base; + uint64_t rodata_base = base; + uint64_t data_base = base; + + int inst_size = sizeof(inst_t); + int data_size = sizeof(uint64_t); + + // must visit in .text, .rodata, .data order + sh_entry_t *sht = dst->sht; + for (int i = 0; i < dst->sht_count; ++ i) + { + if (strcmp(sht[i].sh_name, ".text") == 0) + { + rodata_base = text_base + sht[i].sh_size * inst_size; + data_base = rodata_base; + } + else if (strcmp(sht[i].sh_name, ".rodata") == 0) + { + data_base = rodata_base + sht[i].sh_size * data_size; + } + } + + // check this symbol's section + if (strcmp(sym->st_shndx, ".text") == 0) + { + return text_base + inst_size * sym->st_value; + } + else if (strcmp(sym->st_shndx, ".rodata") == 0) + { + return rodata_base + data_size * sym->st_value; + } + else if (strcmp(sym->st_shndx, ".data") == 0) + { + return data_base + data_size * sym->st_value; + } + + return 0xFFFFFFFFFFFFFFFF; +} + +static void write_relocation(char *dst, uint64_t val) +{ + char temp[20]; + sprintf(temp, "0x%016lx", val); + for (int i = 0; i < 18; ++ i) + { + dst[i] = temp[i]; + } } static void R_X86_64_32_handler(elf_t *dst, sh_entry_t *sh, int row_referencing, int col_referencing, int addend, st_entry_t *sym_referenced) { - printf("row = %d, col = %d, symbol referenced = %s\n", - row_referencing, col_referencing, sym_referenced->st_name - ); + uint64_t sym_address = get_symbol_runtime_address(dst, sym_referenced); + char *s = &dst->buffer[sh->sh_offset + row_referencing][col_referencing]; + write_relocation(s, sym_address); } static void R_X86_64_PC32_handler(elf_t *dst, sh_entry_t *sh, int row_referencing, int col_referencing, int addend, st_entry_t *sym_referenced) { - printf("row = %d, col = %d, symbol referenced = %s\n", - row_referencing, col_referencing, sym_referenced->st_name - ); -} + assert(strcmp(sh->sh_name, ".text") == 0); -static void R_X86_64_PLT32_handler(elf_t *dst, sh_entry_t *sh, - int row_referencing, int col_referencing, int addend, - st_entry_t *sym_referenced) -{ - printf("row = %d, col = %d, symbol referenced = %s\n", - row_referencing, col_referencing, sym_referenced->st_name - ); + uint64_t sym_address = get_symbol_runtime_address(dst, sym_referenced); + uint64_t rip_value = 0x00400000 + (row_referencing + 1) * sizeof(inst_t); + char *s = &dst->buffer[sh->sh_offset + row_referencing][col_referencing]; + write_relocation(s, sym_address - rip_value); } static const char *get_stb_string(st_bind_t bind) diff --git a/src/tests/test_elf.c b/src/tests/test_elf.c index 1affefd..9af624e 100644 --- a/src/tests/test_elf.c +++ b/src/tests/test_elf.c @@ -28,6 +28,8 @@ int main() srcp[1] = &src[1]; link_elf((elf_t **)&srcp, 2, &dst); + write_eof("./files/exe/output.eof.txt", &dst); + free_elf(&src[0]); free_elf(&src[1]); free_elf(&dst);