Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@ STAGE0 := shecc
STAGE1 := shecc-stage1.elf
STAGE2 := shecc-stage2.elf

BUILTIN_LIBC ?= c.c
STAGE0_FLAGS ?= --dump-ir
STAGE1_FLAGS ?=
ifeq ($(DYNLINK),1)
BUILTIN_LIBC := c.h
STAGE0_FLAGS += --dynlink
STAGE1_FLAGS += --dynlink
endif

OUT ?= out
ARCHS = arm riscv
ARCH ?= $(firstword $(ARCHS))
Expand Down Expand Up @@ -122,9 +131,9 @@ $(OUT)/norm-lf: tools/norm-lf.c
$(VECHO) " CC+LD\t$@\n"
$(Q)$(CC) $(CFLAGS) -o $@ $^

$(OUT)/libc.inc: $(OUT)/inliner $(OUT)/norm-lf $(LIBDIR)/c.c
$(OUT)/libc.inc: $(OUT)/inliner $(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC)
$(VECHO) " GEN\t$@\n"
$(Q)$(OUT)/norm-lf $(LIBDIR)/c.c $(OUT)/c.normalized.c
$(Q)$(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC) $(OUT)/c.normalized.c
$(Q)$(OUT)/inliner $(OUT)/c.normalized.c $@
$(Q)$(RM) $(OUT)/c.normalized.c

Expand All @@ -143,12 +152,12 @@ $(OUT)/$(STAGE0)-sanitizer: $(OUT)/libc.inc $(OBJS)
$(OUT)/$(STAGE1): $(OUT)/$(STAGE0)
$(Q)$(STAGE1_CHECK_CMD)
$(VECHO) " SHECC\t$@\n"
$(Q)$(OUT)/$(STAGE0) --dump-ir -o $@ $(SRCDIR)/main.c > $(OUT)/shecc-stage1.log
$(Q)$(OUT)/$(STAGE0) $(STAGE0_FLAGS) -o $@ $(SRCDIR)/main.c > $(OUT)/shecc-stage1.log
$(Q)chmod a+x $@

$(OUT)/$(STAGE2): $(OUT)/$(STAGE1)
$(VECHO) " SHECC\t$@\n"
$(Q)$(TARGET_EXEC) $(OUT)/$(STAGE1) -o $@ $(SRCDIR)/main.c
$(Q)$(TARGET_EXEC) $(OUT)/$(STAGE1) $(STAGE1_FLAGS) -o $@ $(SRCDIR)/main.c

bootstrap: $(OUT)/$(STAGE2)
$(Q)chmod 775 $(OUT)/$(STAGE2)
Expand Down
47 changes: 47 additions & 0 deletions lib/c.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* shecc - Self-Hosting and Educational C Compiler.
*
* shecc is freely redistributable under the BSD 2 clause license. See the
* file "LICENSE" for information on usage and redistribution of this file.
*/

#pragma once
/* Declarations of C standard library functions */

#define NULL 0

#define bool _Bool
#define true 1
#define false 0

/* File I/O */
typedef int FILE;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Defines FILE as int, which is ABI-incompatible with libc's opaque FILE type and will break dynamic linking with real stdio functions.

(Based on your team's feedback about fixing potential issues and refining c.c/c.h for dynamic linking.)

Prompt for AI agents
Address the following comment on lib/c.h at line 18:

<comment>Defines FILE as int, which is ABI-incompatible with libc&#39;s opaque FILE type and will break dynamic linking with real stdio functions.

(Based on your team&#39;s feedback about fixing potential issues and refining c.c/c.h for dynamic linking.)</comment>

<file context>
@@ -0,0 +1,47 @@
+#define false 0
+
+/* File I/O */
+typedef int FILE;
+FILE *fopen(char *filename, char *mode);
+int fclose(FILE *stream);
</file context>

FILE *fopen(char *filename, char *mode);
int fclose(FILE *stream);
int fgetc(FILE *stream);
char *fgets(char *str, int n, FILE *stream);
int fputc(int c, FILE *stream);

/* string-related functions */
int strlen(char *str);
int strcmp(char *s1, char *s2);
Copy link

@cubic-dev-ai cubic-dev-ai bot Sep 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add const to strcmp parameters for correctness and compatibility with libc.

Prompt for AI agents
Address the following comment on lib/c.h at line 27:

<comment>Add const to strcmp parameters for correctness and compatibility with libc.</comment>

<file context>
@@ -0,0 +1,47 @@
+
+/* string-related functions */
+int strlen(char *str);
+int strcmp(char *s1, char *s2);
+int strncmp(char *s1, char *s2, int len);
+char *strcpy(char *dest, char *src);
</file context>
Suggested change
int strcmp(char *s1, char *s2);
int strcmp(const char *s1, const char *s2);
Fix with Cubic

int strncmp(char *s1, char *s2, int len);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nonstandard strncmp signature (missing const, len should be size_t) is incompatible with libc and may mis-handle large sizes.

(Based on your team's feedback about fixing potential issues and refining c.c/c.h for dynamic linking.)

Prompt for AI agents
Address the following comment on lib/c.h at line 28:

<comment>Nonstandard strncmp signature (missing const, len should be size_t) is incompatible with libc and may mis-handle large sizes.

(Based on your team&#39;s feedback about fixing potential issues and refining c.c/c.h for dynamic linking.)</comment>

<file context>
@@ -0,0 +1,47 @@
+/* string-related functions */
+int strlen(char *str);
+int strcmp(char *s1, char *s2);
+int strncmp(char *s1, char *s2, int len);
+char *strcpy(char *dest, char *src);
+char *strncpy(char *dest, char *src, int len);
</file context>

char *strcpy(char *dest, char *src);
char *strncpy(char *dest, char *src, int len);
char *memcpy(char *dest, char *src, int count);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nonstandard memcpy signature (return type and parameter types incorrect) is incompatible with libc and risks ABI mismatch.

(Based on your team's feedback about fixing potential issues and refining c.c/c.h for dynamic linking.)

Prompt for AI agents
Address the following comment on lib/c.h at line 31:

<comment>Nonstandard memcpy signature (return type and parameter types incorrect) is incompatible with libc and risks ABI mismatch.

(Based on your team&#39;s feedback about fixing potential issues and refining c.c/c.h for dynamic linking.)</comment>

<file context>
@@ -0,0 +1,47 @@
+int strncmp(char *s1, char *s2, int len);
+char *strcpy(char *dest, char *src);
+char *strncpy(char *dest, char *src, int len);
+char *memcpy(char *dest, char *src, int count);
+int memcmp(void *s1, void *s2, int n);
+void *memset(void *s, int c, int n);
</file context>

Copy link

@cubic-dev-ai cubic-dev-ai bot Sep 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

memcpy should use void* for pointers, const on src, and size_t for count to match libc and avoid size truncation.

Prompt for AI agents
Address the following comment on lib/c.h at line 31:

<comment>memcpy should use void* for pointers, const on src, and size_t for count to match libc and avoid size truncation.</comment>

<file context>
@@ -0,0 +1,47 @@
+int strncmp(char *s1, char *s2, int len);
+char *strcpy(char *dest, char *src);
+char *strncpy(char *dest, char *src, int len);
+char *memcpy(char *dest, char *src, int count);
+int memcmp(void *s1, void *s2, int n);
+void *memset(void *s, int c, int n);
</file context>
Fix with Cubic

int memcmp(void *s1, void *s2, int n);
void *memset(void *s, int c, int n);

/* formatted output string */
int printf(char *str, ...);
int sprintf(char *buffer, char *str, ...);
int snprintf(char *buffer, int n, char *str, ...);

/* Terminating program */
void exit(int exit_code);
void abort(void);

/* Dynamic memory allocation/deallocation functions */
void *malloc(int size);
void *calloc(int n, int size);
void free(void *ptr);
6 changes: 6 additions & 0 deletions mk/arm.mk
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,10 @@ ARCH_DEFS = \
\#define ARCH_PREDEFINED \"__arm__\" /* defined by GNU C and RealView */\n$\
\#define ELF_MACHINE 0x28 /* up to ARMv7/Aarch32 */\n$\
\#define ELF_FLAGS 0x5000200\n$\
\#define DYN_LINKER \"/lib/ld-linux.so.3\"\n$\
\#define LIBC_SO \"libc.so.6\"\n$\
\#define PLT_FIXUP_SIZE 20\n$\
\#define PLT_ENT_SIZE 12\n$\
\#define R_ARCH_JUMP_SLOT 0x16\n$\
"
RUNNER_LD_PREFIX=-L /usr/arm-linux-gnueabi/
3 changes: 3 additions & 0 deletions mk/common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ ifneq ($(HOST_ARCH),$(ARCH_NAME))

# Generate the path to the architecture-specific qemu
TARGET_EXEC = $(shell which $(ARCH_RUNNER))
ifeq ($(DYNLINK),1)
TARGET_EXEC += $(RUNNER_LD_PREFIX)
endif
endif
export TARGET_EXEC

Expand Down
8 changes: 8 additions & 0 deletions mk/riscv.mk
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,12 @@ ARCH_DEFS = \
\#define ARCH_PREDEFINED \"__riscv\" /* Older versions of the GCC toolchain defined __riscv__ */\n$\
\#define ELF_MACHINE 0xf3\n$\
\#define ELF_FLAGS 0\n$\
\#define DYN_LINKER \"/lib/ld-linux.so.3\"\n$\
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DYN_LINKER points to the ARM loader path (ld-linux.so.3), which is incorrect for RISC-V and will break dynlinked execution. Use the proper RISC-V ELF interpreter path instead.

(Based on your team's feedback about implementing RISC-V dynamic linking, this misconfiguration blocks the RISC-V dynlink workflow.)

Prompt for AI agents
Address the following comment on mk/riscv.mk at line 10:

<comment>DYN_LINKER points to the ARM loader path (ld-linux.so.3), which is incorrect for RISC-V and will break dynlinked execution. Use the proper RISC-V ELF interpreter path instead.

(Based on your team&#39;s feedback about implementing RISC-V dynamic linking, this misconfiguration blocks the RISC-V dynlink workflow.)</comment>

<file context>
@@ -7,4 +7,12 @@ ARCH_DEFS = \
     \#define ARCH_PREDEFINED \&quot;__riscv\&quot; /* Older versions of the GCC toolchain defined __riscv__ */\n$\
     \#define ELF_MACHINE 0xf3\n$\
     \#define ELF_FLAGS 0\n$\
+    \#define DYN_LINKER \&quot;/lib/ld-linux.so.3\&quot;\n$\
+    \#define LIBC_SO \&quot;libc.so.6\&quot;\n$\
+    \#define PLT_FIXUP_SIZE 20\n$\
</file context>

Copy link

@cubic-dev-ai cubic-dev-ai bot Sep 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DYN_LINKER points to ARM’s /lib/ld-linux.so.3; set this to the correct RISC-V dynamic loader path to avoid invalid interpreter in produced executables.

Prompt for AI agents
Address the following comment on mk/riscv.mk at line 10:

<comment>DYN_LINKER points to ARM’s /lib/ld-linux.so.3; set this to the correct RISC-V dynamic loader path to avoid invalid interpreter in produced executables.</comment>

<file context>
@@ -7,4 +7,12 @@ ARCH_DEFS = \
     \#define ARCH_PREDEFINED \&quot;__riscv\&quot; /* Older versions of the GCC toolchain defined __riscv__ */\n$\
     \#define ELF_MACHINE 0xf3\n$\
     \#define ELF_FLAGS 0\n$\
+    \#define DYN_LINKER \&quot;/lib/ld-linux.so.3\&quot;\n$\
+    \#define LIBC_SO \&quot;libc.so.6\&quot;\n$\
+    \#define PLT_FIXUP_SIZE 20\n$\
</file context>
Fix with Cubic

Copy link

@cubic-dev-ai cubic-dev-ai bot Sep 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dynamic linker path is for ARM; RISC-V uses an arch-specific ld-linux-riscv32-*.so.1. Use the correct RISC-V interpreter path or make it configurable.

Prompt for AI agents
Address the following comment on mk/riscv.mk at line 10:

<comment>Dynamic linker path is for ARM; RISC-V uses an arch-specific ld-linux-riscv32-*.so.1. Use the correct RISC-V interpreter path or make it configurable.</comment>

<file context>
@@ -7,4 +7,12 @@ ARCH_DEFS = \
     \#define ARCH_PREDEFINED \&quot;__riscv\&quot; /* Older versions of the GCC toolchain defined __riscv__ */\n$\
     \#define ELF_MACHINE 0xf3\n$\
     \#define ELF_FLAGS 0\n$\
+    \#define DYN_LINKER \&quot;/lib/ld-linux.so.3\&quot;\n$\
+    \#define LIBC_SO \&quot;libc.so.6\&quot;\n$\
+    \#define PLT_FIXUP_SIZE 20\n$\
</file context>
Fix with Cubic

\#define LIBC_SO \"libc.so.6\"\n$\
\#define PLT_FIXUP_SIZE 20\n$\
\#define PLT_ENT_SIZE 12\n$\
\#define R_ARCH_JUMP_SLOT 0x5\n$\
"

# TODO: Set this variable for RISC-V architecture
RUNNER_LD_PREFIX=
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RUNNER_LD_PREFIX is empty, so qemu-riscv32 won’t be given a sysroot (-L ...) for dynlinked runs, likely causing loader/lib resolution failures.

(Based on your team's feedback about making the dynamically linked shecc run, this empty setting prevents executing dynlinked RISC-V binaries.)

Prompt for AI agents
Address the following comment on mk/riscv.mk at line 18:

<comment>RUNNER_LD_PREFIX is empty, so qemu-riscv32 won’t be given a sysroot (-L ...) for dynlinked runs, likely causing loader/lib resolution failures.

(Based on your team&#39;s feedback about making the dynamically linked shecc run, this empty setting prevents executing dynlinked RISC-V binaries.)</comment>

<file context>
@@ -7,4 +7,12 @@ ARCH_DEFS = \
     &quot;
+
+# TODO: Set this variable for RISC-V architecture
+RUNNER_LD_PREFIX=
</file context>

153 changes: 117 additions & 36 deletions src/arm-codegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ void update_elf_offset(ph2_ir_t *ph2_ir)
return;
case OP_read:
case OP_write:
case OP_push:
case OP_pop:
case OP_jump:
case OP_call:
case OP_load_func:
Expand Down Expand Up @@ -136,10 +138,17 @@ void update_elf_offset(ph2_ir_t *ph2_ir)

void cfg_flatten(void)
{
func_t *func = find_func("__syscall");
func->bbs->elf_offset = 48; /* offset of start + branch + exit in codegen */
func_t *func;

if (dynlink)
elf_offset = 80; /* offset of __libc_start_main + start in codegen */
else {
func = find_func("__syscall");
func->bbs->elf_offset = 48; /* offset of start + exit in codegen */
elf_offset =
84; /* offset of start + branch + exit + syscall in codegen */
}

elf_offset = 84; /* offset of start + branch + exit + syscall in codegen */
GLOBAL_FUNC->bbs->elf_offset = elf_offset;

for (ph2_ir_t *ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;
Expand All @@ -148,7 +157,10 @@ void cfg_flatten(void)
}

/* prepare 'argc' and 'argv', then proceed to 'main' function */
elf_offset += 32; /* 6 insns for main call + 2 for exit */
if (dynlink)
elf_offset += 20;
else
elf_offset += 32; /* 6 insns for main call + 2 for exit */

for (func = FUNC_LIST.head; func; func = func->next) {
/* Skip function declarations without bodies */
Expand Down Expand Up @@ -274,6 +286,12 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
else
abort();
return;
case OP_push:
emit(__stmdb(__AL, 1, __sp, rn));
return;
case OP_pop:
emit(__add_i(__AL, __sp, __sp, rn * 4));
return;
case OP_branch:
emit(__teq(rn));
if (ph2_ir->is_branch_detached) {
Expand All @@ -287,7 +305,12 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
return;
case OP_call:
func = find_func(ph2_ir->func_name);
emit(__bl(__AL, func->bbs->elf_offset - elf_code->size));
if (func->bbs)
Copy link

@cubic-dev-ai cubic-dev-ai bot Sep 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential null dereference of func; check func for NULL before accessing bbs to handle unresolved/external symbols safely.

Prompt for AI agents
Address the following comment on src/arm-codegen.c at line 299:

<comment>Potential null dereference of func; check func for NULL before accessing bbs to handle unresolved/external symbols safely.</comment>

<file context>
@@ -282,15 +296,23 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
     case OP_call:
         func = find_func(ph2_ir-&gt;func_name);
-        emit(__bl(__AL, func-&gt;bbs-&gt;elf_offset - elf_code-&gt;size));
+        if (func-&gt;bbs)
+            ofs = func-&gt;bbs-&gt;elf_offset - elf_code-&gt;size;
+        else
</file context>
Fix with Cubic

ofs = func->bbs->elf_offset - elf_code->size;
else
ofs = (elf_plt_start + func->plt_offset) -
Copy link

@cubic-dev-ai cubic-dev-ai bot Sep 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Calls to external functions unconditionally use PLT when func body is missing; this should be gated by dynlink to avoid referencing non-existent PLT in static builds.

Prompt for AI agents
Address the following comment on src/arm-codegen.c at line 303:

<comment>Calls to external functions unconditionally use PLT when func body is missing; this should be gated by dynlink to avoid referencing non-existent PLT in static builds.</comment>

<file context>
@@ -287,7 +297,12 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
+        if (func-&gt;bbs)
+            ofs = func-&gt;bbs-&gt;elf_offset - elf_code-&gt;size;
+        else
+            ofs = (elf_plt_start + func-&gt;plt_offset) -
+                  (elf_code_start + elf_code-&gt;size);
+        emit(__bl(__AL, ofs));
</file context>
Fix with Cubic

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO:

if (func->bbs)
    ofs = func->bbs->elf_offset - elf_code->size;
else if (dynlink)
    ofs = (elf_plt_start + func->plt_offset) -
          (elf_code_start + elf_code->size);
else
    fatal("The function is not implemented");

(elf_code_start + elf_code->size);
emit(__bl(__AL, ofs));
return;
case OP_load_data_address:
emit(__movw(__AL, rd, ph2_ir->src0 + elf_data_start));
Expand All @@ -299,7 +322,10 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
return;
case OP_address_of_func:
func = find_func(ph2_ir->func_name);
ofs = elf_code_start + func->bbs->elf_offset;
if (func->bbs)
ofs = elf_code_start + func->bbs->elf_offset;
else
ofs = elf_plt_start + func->plt_offset;
Copy link

@cubic-dev-ai cubic-dev-ai bot Sep 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Taking address of external function unconditionally uses PLT when func body is missing; guard with dynlink to prevent invalid addresses in static builds.

Prompt for AI agents
Address the following comment on src/arm-codegen.c at line 320:

<comment>Taking address of external function unconditionally uses PLT when func body is missing; guard with dynlink to prevent invalid addresses in static builds.</comment>

<file context>
@@ -299,7 +314,10 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
+        if (func-&gt;bbs)
+            ofs = elf_code_start + func-&gt;bbs-&gt;elf_offset;
+        else
+            ofs = elf_plt_start + func-&gt;plt_offset;
         emit(__movw(__AL, __r8, ofs));
         emit(__movt(__AL, __r8, ofs));
</file context>
Fix with Cubic

emit(__movw(__AL, __r8, ofs));
emit(__movt(__AL, __r8, ofs));
emit(__sw(__AL, __r8, rn, 0));
Expand Down Expand Up @@ -456,39 +482,72 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
}
}

void plt_generate(void);
void code_generate(void)
{
elf_data_start = elf_code_start + elf_offset;
elf_rodata_start = elf_data_start + elf_data->size;
elf_bss_start = elf_rodata_start + elf_rodata->size;
if (dynlink) {
plt_generate();
/* Call __libc_start_main() */
emit(__mov_i(__AL, __r11, 0));
emit(__mov_i(__AL, __lr, 0));
emit(__pop_word(__AL, __r1));
emit(__mov_r(__AL, __r2, __sp));
emit(__push_reg(__AL, __r2));
emit(__push_reg(__AL, __r0));
emit(__mov_i(__AL, __r12, 0));
emit(__push_reg(__AL, __r12));

int main_wrapper_offset = elf_code->size + 24;
emit(__movw(__AL, __r0, elf_code_start + main_wrapper_offset));
emit(__movt(__AL, __r0, elf_code_start + main_wrapper_offset));
emit(__mov_i(__AL, __r3, 0));
emit(__bl(__AL, (elf_plt_start + PLT_FIXUP_SIZE) -
(elf_code_start + elf_code->size)));
/* Goto the 'exit' code snippet if __libc_start_main returns */
emit(__mov_i(__AL, __r0, 127));
emit(__bl(__AL, 28));
Copy link

@cubic-dev-ai cubic-dev-ai bot Sep 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hard-coded branch to an 'exit' snippet is emitted in dynlink path, but the exit snippet is only generated for static linking, leading to an invalid/incorrect branch target.

Prompt for AI agents
Address the following comment on src/arm-codegen.c at line 500:

<comment>Hard-coded branch to an &#39;exit&#39; snippet is emitted in dynlink path, but the exit snippet is only generated for static linking, leading to an invalid/incorrect branch target.</comment>

<file context>
@@ -456,13 +474,42 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
+                            (elf_code_start + elf_code-&gt;size)));
+        /* Goto the &#39;exit&#39; code snippet if __libc_start_main returns */
+        emit(__mov_i(__AL, __r0, 127));
+        emit(__bl(__AL, 28));
 
-    /* start */
</file context>
Fix with Cubic

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These instructions are wrong. I will fix them.


/* start */
/* If the compiled program is dynamic linking, the starting
* point of 'start' is located here.
*
* Preserve 'argc' and 'argv' for the 'main' function.
* */
emit(__mov_r(__AL, __r9, __r0));
emit(__mov_r(__AL, __r10, __r1));
}
/* For both static and dynamic linking, we need to set up the stack
* and call the main function.
* */
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__sub_r(__AL, __sp, __sp, __r8));
emit(__mov_r(__AL, __r12, __sp));
emit(__bl(__AL, GLOBAL_FUNC->bbs->elf_offset - elf_code->size));
/* After global init, jump to main preparation */
emit(__b(__AL, 56)); /* PC+8: skip exit (24) + syscall (36) + ret (4) - 8 */

/* exit */
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__add_r(__AL, __sp, __sp, __r8));
emit(__mov_r(__AL, __r0, __r0));
emit(__mov_i(__AL, __r7, 1));
emit(__svc());

/* syscall */
emit(__mov_r(__AL, __r7, __r0));
emit(__mov_r(__AL, __r0, __r1));
emit(__mov_r(__AL, __r1, __r2));
emit(__mov_r(__AL, __r2, __r3));
emit(__mov_r(__AL, __r3, __r4));
emit(__mov_r(__AL, __r4, __r5));
emit(__mov_r(__AL, __r5, __r6));
emit(__svc());
emit(__bx(__AL, __lr));
if (!dynlink) {
Copy link

@cubic-dev-ai cubic-dev-ai bot Sep 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unconditional branch still skips 56 bytes even when the exit/syscall block is omitted for dynlink, likely causing control-flow to jump into the wrong place. Make the branch size conditional (or omit it) when dynlink is enabled.

Prompt for AI agents
Address the following comment on src/arm-codegen.c at line 521:

<comment>Unconditional branch still skips 56 bytes even when the exit/syscall block is omitted for dynlink, likely causing control-flow to jump into the wrong place. Make the branch size conditional (or omit it) when dynlink is enabled.</comment>

<file context>
@@ -471,24 +518,26 @@ void code_generate(void)
-    emit(__mov_r(__AL, __r0, __r0));
-    emit(__mov_i(__AL, __r7, 1));
-    emit(__svc());
+    if (!dynlink) {
+        /* exit - only for static linking */
+        emit(__movw(__AL, __r8, GLOBAL_FUNC-&gt;stack_size));
</file context>

emit(__bl(__AL, GLOBAL_FUNC->bbs->elf_offset - elf_code->size));
/* After global init, jump to main preparation */
emit(__b(__AL,
56)); /* PC+8: skip exit (24) + syscall (36) + ret (4) - 8 */

/* exit - only for static linking */
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__add_r(__AL, __sp, __sp, __r8));
emit(__mov_r(__AL, __r0, __r0));
emit(__mov_i(__AL, __r7, 1));
emit(__svc());

/* syscall */
emit(__mov_r(__AL, __r7, __r0));
emit(__mov_r(__AL, __r0, __r1));
emit(__mov_r(__AL, __r1, __r2));
emit(__mov_r(__AL, __r2, __r3));
emit(__mov_r(__AL, __r3, __r4));
emit(__mov_r(__AL, __r4, __r5));
emit(__mov_r(__AL, __r5, __r6));
emit(__svc());
emit(__bx(__AL, __lr));
}

ph2_ir_t *ph2_ir;
for (ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;
Expand All @@ -497,11 +556,16 @@ void code_generate(void)

/* prepare 'argc' and 'argv', then proceed to 'main' function */
if (MAIN_BB) {
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__add_r(__AL, __r8, __r12, __r8));
emit(__lw(__AL, __r0, __r8, 0));
emit(__add_i(__AL, __r1, __r8, 4));
if (dynlink) {
emit(__mov_r(__AL, __r0, __r9));
emit(__mov_r(__AL, __r1, __r10));
} else {
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__add_r(__AL, __r8, __r12, __r8));
emit(__lw(__AL, __r0, __r8, 0));
emit(__add_i(__AL, __r1, __r8, 4));
}
emit(__bl(__AL, MAIN_BB->elf_offset - elf_code->size));

/* exit with main's return value - r0 already has the return value */
Expand All @@ -514,3 +578,20 @@ void code_generate(void)
emit_ph2_ir(ph2_ir);
}
}

void plt_generate(void)
{
int addr_of_got = elf_got_start + PTR_SIZE * 2;
int end = plt_size - PLT_FIXUP_SIZE;
elf_write_int(elf_plt, __push_reg(__AL, __lr));
elf_write_int(elf_plt, __movw(__AL, __r10, addr_of_got));
elf_write_int(elf_plt, __movt(__AL, __r10, addr_of_got));
elf_write_int(elf_plt, __mov_r(__AL, __lr, __r10));
elf_write_int(elf_plt, __lw(__AL, __pc, __lr, 0));
for (int i = 0; i * PLT_ENT_SIZE < end; i++) {
addr_of_got = elf_got_start + PTR_SIZE * (i + 3);
elf_write_int(elf_plt, __movw(__AL, __r12, addr_of_got));
elf_write_int(elf_plt, __movt(__AL, __r12, addr_of_got));
elf_write_int(elf_plt, __lw(__AL, __pc, __r12, 0));
}
}
10 changes: 10 additions & 0 deletions src/arm.c
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,16 @@ int __ldm(arm_cond_t cond, int w, arm_reg rn, int reg_list)
return arm_encode(cond, arm_ldm + (0x2 << 6) + (w << 1), rn, 0, reg_list);
}

int __push_reg(arm_cond_t cond, arm_reg rt)
{
return arm_encode(cond, (0x5 << 4) | 0x2, 0xd, rt, 0x4);
}

int __pop_word(arm_cond_t cond, arm_reg rt)
{
return arm_encode(cond, (0x4 << 4) | 0x9, 0xd, rt, 0x4);
}

int __b(arm_cond_t cond, int ofs)
{
int o = (ofs - 8) >> 2;
Expand Down
Loading