-
Notifications
You must be signed in to change notification settings - Fork 139
Support dynamic linking #244
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,47 @@ | ||||||
/* | ||||||
* shecc - Self-Hosting and Educational C Compiler. | ||||||
* | ||||||
* shecc is freely redistributable under the BSD 2 clause license. See the | ||||||
* file "LICENSE" for information on usage and redistribution of this file. | ||||||
*/ | ||||||
|
||||||
#pragma once | ||||||
/* Declarations of C standard library functions */ | ||||||
|
||||||
#define NULL 0 | ||||||
DrXiao marked this conversation as resolved.
Show resolved
Hide resolved
DrXiao marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
#define bool _Bool | ||||||
#define true 1 | ||||||
#define false 0 | ||||||
|
||||||
/* File I/O */ | ||||||
typedef int FILE; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Defines FILE as int, which is ABI-incompatible with libc's opaque FILE type and will break dynamic linking with real stdio functions. (Based on your team's feedback about fixing potential issues and refining c.c/c.h for dynamic linking.) Prompt for AI agents
DrXiao marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
FILE *fopen(char *filename, char *mode); | ||||||
int fclose(FILE *stream); | ||||||
int fgetc(FILE *stream); | ||||||
char *fgets(char *str, int n, FILE *stream); | ||||||
int fputc(int c, FILE *stream); | ||||||
|
||||||
/* string-related functions */ | ||||||
int strlen(char *str); | ||||||
int strcmp(char *s1, char *s2); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add const to strcmp parameters for correctness and compatibility with libc. Prompt for AI agents
Suggested change
|
||||||
int strncmp(char *s1, char *s2, int len); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nonstandard strncmp signature (missing const, len should be size_t) is incompatible with libc and may mis-handle large sizes. (Based on your team's feedback about fixing potential issues and refining c.c/c.h for dynamic linking.) Prompt for AI agents
|
||||||
char *strcpy(char *dest, char *src); | ||||||
char *strncpy(char *dest, char *src, int len); | ||||||
char *memcpy(char *dest, char *src, int count); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nonstandard memcpy signature (return type and parameter types incorrect) is incompatible with libc and risks ABI mismatch. (Based on your team's feedback about fixing potential issues and refining c.c/c.h for dynamic linking.) Prompt for AI agents
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. memcpy should use void* for pointers, const on src, and size_t for count to match libc and avoid size truncation. Prompt for AI agents
|
||||||
int memcmp(void *s1, void *s2, int n); | ||||||
void *memset(void *s, int c, int n); | ||||||
DrXiao marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
/* formatted output string */ | ||||||
int printf(char *str, ...); | ||||||
int sprintf(char *buffer, char *str, ...); | ||||||
int snprintf(char *buffer, int n, char *str, ...); | ||||||
|
||||||
/* Terminating program */ | ||||||
void exit(int exit_code); | ||||||
void abort(void); | ||||||
|
||||||
/* Dynamic memory allocation/deallocation functions */ | ||||||
void *malloc(int size); | ||||||
void *calloc(int n, int size); | ||||||
DrXiao marked this conversation as resolved.
Show resolved
Hide resolved
DrXiao marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
void free(void *ptr); |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,4 +7,12 @@ ARCH_DEFS = \ | |
\#define ARCH_PREDEFINED \"__riscv\" /* Older versions of the GCC toolchain defined __riscv__ */\n$\ | ||
\#define ELF_MACHINE 0xf3\n$\ | ||
\#define ELF_FLAGS 0\n$\ | ||
\#define DYN_LINKER \"/lib/ld-linux.so.3\"\n$\ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. DYN_LINKER points to the ARM loader path (ld-linux.so.3), which is incorrect for RISC-V and will break dynlinked execution. Use the proper RISC-V ELF interpreter path instead. (Based on your team's feedback about implementing RISC-V dynamic linking, this misconfiguration blocks the RISC-V dynlink workflow.) Prompt for AI agents
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. DYN_LINKER points to ARM’s /lib/ld-linux.so.3; set this to the correct RISC-V dynamic loader path to avoid invalid interpreter in produced executables. Prompt for AI agents
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dynamic linker path is for ARM; RISC-V uses an arch-specific ld-linux-riscv32-*.so.1. Use the correct RISC-V interpreter path or make it configurable. Prompt for AI agents
|
||
\#define LIBC_SO \"libc.so.6\"\n$\ | ||
\#define PLT_FIXUP_SIZE 20\n$\ | ||
\#define PLT_ENT_SIZE 12\n$\ | ||
\#define R_ARCH_JUMP_SLOT 0x5\n$\ | ||
" | ||
|
||
# TODO: Set this variable for RISC-V architecture | ||
RUNNER_LD_PREFIX= | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. RUNNER_LD_PREFIX is empty, so qemu-riscv32 won’t be given a sysroot (-L ...) for dynlinked runs, likely causing loader/lib resolution failures. (Based on your team's feedback about making the dynamically linked shecc run, this empty setting prevents executing dynlinked RISC-V binaries.) Prompt for AI agents
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -69,6 +69,8 @@ void update_elf_offset(ph2_ir_t *ph2_ir) | |
return; | ||
case OP_read: | ||
case OP_write: | ||
case OP_push: | ||
case OP_pop: | ||
case OP_jump: | ||
case OP_call: | ||
case OP_load_func: | ||
|
@@ -136,10 +138,17 @@ void update_elf_offset(ph2_ir_t *ph2_ir) | |
|
||
void cfg_flatten(void) | ||
{ | ||
func_t *func = find_func("__syscall"); | ||
func->bbs->elf_offset = 48; /* offset of start + branch + exit in codegen */ | ||
func_t *func; | ||
|
||
if (dynlink) | ||
elf_offset = 80; /* offset of __libc_start_main + start in codegen */ | ||
else { | ||
func = find_func("__syscall"); | ||
func->bbs->elf_offset = 48; /* offset of start + exit in codegen */ | ||
elf_offset = | ||
84; /* offset of start + branch + exit + syscall in codegen */ | ||
} | ||
|
||
elf_offset = 84; /* offset of start + branch + exit + syscall in codegen */ | ||
GLOBAL_FUNC->bbs->elf_offset = elf_offset; | ||
|
||
for (ph2_ir_t *ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir; | ||
|
@@ -148,7 +157,10 @@ void cfg_flatten(void) | |
} | ||
|
||
/* prepare 'argc' and 'argv', then proceed to 'main' function */ | ||
elf_offset += 32; /* 6 insns for main call + 2 for exit */ | ||
if (dynlink) | ||
elf_offset += 20; | ||
else | ||
elf_offset += 32; /* 6 insns for main call + 2 for exit */ | ||
|
||
for (func = FUNC_LIST.head; func; func = func->next) { | ||
/* Skip function declarations without bodies */ | ||
|
@@ -274,6 +286,12 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir) | |
else | ||
abort(); | ||
return; | ||
case OP_push: | ||
emit(__stmdb(__AL, 1, __sp, rn)); | ||
return; | ||
case OP_pop: | ||
emit(__add_i(__AL, __sp, __sp, rn * 4)); | ||
return; | ||
case OP_branch: | ||
emit(__teq(rn)); | ||
if (ph2_ir->is_branch_detached) { | ||
|
@@ -287,7 +305,12 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir) | |
return; | ||
case OP_call: | ||
func = find_func(ph2_ir->func_name); | ||
emit(__bl(__AL, func->bbs->elf_offset - elf_code->size)); | ||
if (func->bbs) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Potential null dereference of func; check func for NULL before accessing bbs to handle unresolved/external symbols safely. Prompt for AI agents
|
||
ofs = func->bbs->elf_offset - elf_code->size; | ||
else | ||
ofs = (elf_plt_start + func->plt_offset) - | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Calls to external functions unconditionally use PLT when func body is missing; this should be gated by dynlink to avoid referencing non-existent PLT in static builds. Prompt for AI agents
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TODO: if (func->bbs)
ofs = func->bbs->elf_offset - elf_code->size;
else if (dynlink)
ofs = (elf_plt_start + func->plt_offset) -
(elf_code_start + elf_code->size);
else
fatal("The function is not implemented"); |
||
(elf_code_start + elf_code->size); | ||
emit(__bl(__AL, ofs)); | ||
return; | ||
case OP_load_data_address: | ||
emit(__movw(__AL, rd, ph2_ir->src0 + elf_data_start)); | ||
|
@@ -299,7 +322,10 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir) | |
return; | ||
case OP_address_of_func: | ||
func = find_func(ph2_ir->func_name); | ||
ofs = elf_code_start + func->bbs->elf_offset; | ||
if (func->bbs) | ||
ofs = elf_code_start + func->bbs->elf_offset; | ||
else | ||
ofs = elf_plt_start + func->plt_offset; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Taking address of external function unconditionally uses PLT when func body is missing; guard with dynlink to prevent invalid addresses in static builds. Prompt for AI agents
|
||
emit(__movw(__AL, __r8, ofs)); | ||
emit(__movt(__AL, __r8, ofs)); | ||
emit(__sw(__AL, __r8, rn, 0)); | ||
|
@@ -456,39 +482,72 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir) | |
} | ||
} | ||
|
||
void plt_generate(void); | ||
void code_generate(void) | ||
{ | ||
elf_data_start = elf_code_start + elf_offset; | ||
elf_rodata_start = elf_data_start + elf_data->size; | ||
elf_bss_start = elf_rodata_start + elf_rodata->size; | ||
if (dynlink) { | ||
plt_generate(); | ||
/* Call __libc_start_main() */ | ||
emit(__mov_i(__AL, __r11, 0)); | ||
emit(__mov_i(__AL, __lr, 0)); | ||
emit(__pop_word(__AL, __r1)); | ||
emit(__mov_r(__AL, __r2, __sp)); | ||
emit(__push_reg(__AL, __r2)); | ||
emit(__push_reg(__AL, __r0)); | ||
emit(__mov_i(__AL, __r12, 0)); | ||
emit(__push_reg(__AL, __r12)); | ||
|
||
int main_wrapper_offset = elf_code->size + 24; | ||
emit(__movw(__AL, __r0, elf_code_start + main_wrapper_offset)); | ||
emit(__movt(__AL, __r0, elf_code_start + main_wrapper_offset)); | ||
emit(__mov_i(__AL, __r3, 0)); | ||
emit(__bl(__AL, (elf_plt_start + PLT_FIXUP_SIZE) - | ||
(elf_code_start + elf_code->size))); | ||
/* Goto the 'exit' code snippet if __libc_start_main returns */ | ||
emit(__mov_i(__AL, __r0, 127)); | ||
emit(__bl(__AL, 28)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hard-coded branch to an 'exit' snippet is emitted in dynlink path, but the exit snippet is only generated for static linking, leading to an invalid/incorrect branch target. Prompt for AI agents
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These instructions are wrong. I will fix them. |
||
|
||
/* start */ | ||
/* If the compiled program is dynamic linking, the starting | ||
* point of 'start' is located here. | ||
* | ||
* Preserve 'argc' and 'argv' for the 'main' function. | ||
* */ | ||
emit(__mov_r(__AL, __r9, __r0)); | ||
DrXiao marked this conversation as resolved.
Show resolved
Hide resolved
DrXiao marked this conversation as resolved.
Show resolved
Hide resolved
|
||
emit(__mov_r(__AL, __r10, __r1)); | ||
} | ||
/* For both static and dynamic linking, we need to set up the stack | ||
* and call the main function. | ||
* */ | ||
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size)); | ||
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size)); | ||
emit(__sub_r(__AL, __sp, __sp, __r8)); | ||
emit(__mov_r(__AL, __r12, __sp)); | ||
emit(__bl(__AL, GLOBAL_FUNC->bbs->elf_offset - elf_code->size)); | ||
/* After global init, jump to main preparation */ | ||
emit(__b(__AL, 56)); /* PC+8: skip exit (24) + syscall (36) + ret (4) - 8 */ | ||
|
||
/* exit */ | ||
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size)); | ||
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size)); | ||
emit(__add_r(__AL, __sp, __sp, __r8)); | ||
emit(__mov_r(__AL, __r0, __r0)); | ||
emit(__mov_i(__AL, __r7, 1)); | ||
emit(__svc()); | ||
|
||
/* syscall */ | ||
emit(__mov_r(__AL, __r7, __r0)); | ||
emit(__mov_r(__AL, __r0, __r1)); | ||
emit(__mov_r(__AL, __r1, __r2)); | ||
emit(__mov_r(__AL, __r2, __r3)); | ||
emit(__mov_r(__AL, __r3, __r4)); | ||
emit(__mov_r(__AL, __r4, __r5)); | ||
emit(__mov_r(__AL, __r5, __r6)); | ||
emit(__svc()); | ||
emit(__bx(__AL, __lr)); | ||
if (!dynlink) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unconditional branch still skips 56 bytes even when the exit/syscall block is omitted for dynlink, likely causing control-flow to jump into the wrong place. Make the branch size conditional (or omit it) when dynlink is enabled. Prompt for AI agents
|
||
emit(__bl(__AL, GLOBAL_FUNC->bbs->elf_offset - elf_code->size)); | ||
/* After global init, jump to main preparation */ | ||
emit(__b(__AL, | ||
56)); /* PC+8: skip exit (24) + syscall (36) + ret (4) - 8 */ | ||
|
||
/* exit - only for static linking */ | ||
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size)); | ||
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size)); | ||
emit(__add_r(__AL, __sp, __sp, __r8)); | ||
emit(__mov_r(__AL, __r0, __r0)); | ||
emit(__mov_i(__AL, __r7, 1)); | ||
emit(__svc()); | ||
|
||
/* syscall */ | ||
emit(__mov_r(__AL, __r7, __r0)); | ||
emit(__mov_r(__AL, __r0, __r1)); | ||
emit(__mov_r(__AL, __r1, __r2)); | ||
emit(__mov_r(__AL, __r2, __r3)); | ||
emit(__mov_r(__AL, __r3, __r4)); | ||
emit(__mov_r(__AL, __r4, __r5)); | ||
emit(__mov_r(__AL, __r5, __r6)); | ||
emit(__svc()); | ||
emit(__bx(__AL, __lr)); | ||
} | ||
|
||
ph2_ir_t *ph2_ir; | ||
for (ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir; | ||
|
@@ -497,11 +556,16 @@ void code_generate(void) | |
|
||
/* prepare 'argc' and 'argv', then proceed to 'main' function */ | ||
if (MAIN_BB) { | ||
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size)); | ||
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size)); | ||
emit(__add_r(__AL, __r8, __r12, __r8)); | ||
emit(__lw(__AL, __r0, __r8, 0)); | ||
emit(__add_i(__AL, __r1, __r8, 4)); | ||
if (dynlink) { | ||
emit(__mov_r(__AL, __r0, __r9)); | ||
emit(__mov_r(__AL, __r1, __r10)); | ||
} else { | ||
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size)); | ||
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size)); | ||
emit(__add_r(__AL, __r8, __r12, __r8)); | ||
emit(__lw(__AL, __r0, __r8, 0)); | ||
emit(__add_i(__AL, __r1, __r8, 4)); | ||
} | ||
emit(__bl(__AL, MAIN_BB->elf_offset - elf_code->size)); | ||
|
||
/* exit with main's return value - r0 already has the return value */ | ||
|
@@ -514,3 +578,20 @@ void code_generate(void) | |
emit_ph2_ir(ph2_ir); | ||
} | ||
} | ||
|
||
void plt_generate(void) | ||
{ | ||
int addr_of_got = elf_got_start + PTR_SIZE * 2; | ||
int end = plt_size - PLT_FIXUP_SIZE; | ||
elf_write_int(elf_plt, __push_reg(__AL, __lr)); | ||
elf_write_int(elf_plt, __movw(__AL, __r10, addr_of_got)); | ||
elf_write_int(elf_plt, __movt(__AL, __r10, addr_of_got)); | ||
elf_write_int(elf_plt, __mov_r(__AL, __lr, __r10)); | ||
elf_write_int(elf_plt, __lw(__AL, __pc, __lr, 0)); | ||
for (int i = 0; i * PLT_ENT_SIZE < end; i++) { | ||
addr_of_got = elf_got_start + PTR_SIZE * (i + 3); | ||
elf_write_int(elf_plt, __movw(__AL, __r12, addr_of_got)); | ||
elf_write_int(elf_plt, __movt(__AL, __r12, addr_of_got)); | ||
elf_write_int(elf_plt, __lw(__AL, __pc, __r12, 0)); | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.