Skip to content

Commit 967830a

Browse files
committed
Auto merge of #144549 - folkertdev:va-arg-arm, r=saethlin
match clang's `va_arg` assembly on arm targets tracking issue: #44930 For this example ```rust #![feature(c_variadic)] #[unsafe(no_mangle)] unsafe extern "C" fn variadic(a: f64, mut args: ...) -> f64 { let b = args.arg::<f64>(); let c = args.arg::<f64>(); a + b + c } ``` We currently generate (via llvm): ```asm variadic: sub sp, sp, #12 stmib sp, {r2, r3} vmov d0, r0, r1 add r0, sp, #4 vldr d1, [sp, #4] add r0, r0, #15 bic r0, r0, #7 vadd.f64 d0, d0, d1 add r1, r0, #8 str r1, [sp] vldr d1, [r0] vadd.f64 d0, d0, d1 vmov r0, r1, d0 add sp, sp, #12 bx lr ``` LLVM is not doing a good job. In fact, it's well-known that LLVM's implementation of `va_arg` is kind of bad, and we implement it ourselves (based on clang) for many targets already. For arm, our own `emit_ptr_va_arg` saves 3 instructions. Next, it turns out it's important for LLVM to explicitly start and end the lifetime of the `va_list`. In #146059 I already end the lifetime, but when looking at this again, I noticed that it is important to also start it, see https://godbolt.org/z/EGqvKTTsK: failing to explicitly start the lifetime uses an extra register. So, the combination of `emit_ptr_va_arg` with starting/ending the lifetime makes rustc emit exactly the instructions that clang generates:: ```asm variadic: sub sp, sp, #12 stmib sp, {r2, r3} vmov d16, r0, r1 vldr d17, [sp, #4] vadd.f64 d16, d16, d17 vldr d17, [sp, #12] vadd.f64 d16, d16, d17 vmov r0, r1, d16 add sp, sp, #12 bx lr ``` The arguments to `emit_ptr_va_arg` are based on [the clang implementation](https://github.com/llvm/llvm-project/blob/03dc2a41f3d9a500e47b513de5c5008c06860d65/clang/lib/CodeGen/Targets/ARM.cpp#L798-L844). r? `@workingjubilee` (I can re-roll if your queue is too full, but you do seem like the right person here) try-job: armhf-gnu
2 parents 2a9bacf + 94fbb21 commit 967830a

File tree

5 files changed

+67
-1
lines changed

5 files changed

+67
-1
lines changed

compiler/rustc_codegen_llvm/src/va_arg.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,21 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
908908
)
909909
}
910910
"aarch64" => emit_aapcs_va_arg(bx, addr, target_ty),
911+
"arm" => {
912+
// Types wider than 16 bytes are not currently supported. Clang has special logic for
913+
// such types, but `VaArgSafe` is not implemented for any type that is this large.
914+
assert!(bx.cx.size_of(target_ty).bytes() <= 16);
915+
916+
emit_ptr_va_arg(
917+
bx,
918+
addr,
919+
target_ty,
920+
PassMode::Direct,
921+
SlotSize::Bytes4,
922+
AllowHigherAlign::Yes,
923+
ForceRightAdjust::No,
924+
)
925+
}
911926
"s390x" => emit_s390x_va_arg(bx, addr, target_ty),
912927
"powerpc" => emit_powerpc_va_arg(bx, addr, target_ty),
913928
"powerpc64" | "powerpc64le" => emit_ptr_va_arg(

compiler/rustc_codegen_ssa/src/mir/block.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
520520
LocalRef::Place(va_list) => {
521521
bx.va_end(va_list.val.llval);
522522

523-
// Explicitly end the lifetime of the `va_list`, this matters for LLVM.
523+
// Explicitly end the lifetime of the `va_list`, improves LLVM codegen.
524524
bx.lifetime_end(va_list.val.llval, va_list.layout.size);
525525
}
526526
_ => bug!("C-variadic function must have a `VaList` place"),

compiler/rustc_codegen_ssa/src/mir/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,10 @@ fn arg_local_refs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
438438

439439
if fx.fn_abi.c_variadic && arg_index == fx.fn_abi.args.len() {
440440
let va_list = PlaceRef::alloca(bx, bx.layout_of(arg_ty));
441+
442+
// Explicitly start the lifetime of the `va_list`, improves LLVM codegen.
443+
bx.lifetime_start(va_list.val.llval, va_list.layout.size);
444+
441445
bx.va_start(va_list.val.llval);
442446

443447
return LocalRef::Place(va_list);
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
//@ assembly-output: emit-asm
2+
//@ compile-flags: -Copt-level=3
3+
//@ only-arm
4+
//@ ignore-thumb
5+
//@ ignore-android
6+
#![no_std]
7+
#![crate_type = "lib"]
8+
#![feature(c_variadic)]
9+
10+
// Check that the assembly that rustc generates matches what clang emits.
11+
12+
#[unsafe(no_mangle)]
13+
unsafe extern "C" fn variadic(a: f64, mut args: ...) -> f64 {
14+
// CHECK-LABEL: variadic
15+
// CHECK: sub sp, sp
16+
17+
// CHECK: vldr
18+
// CHECK: vadd.f64
19+
// CHECK: vldr
20+
// CHECK: vadd.f64
21+
let b = args.arg::<f64>();
22+
let c = args.arg::<f64>();
23+
a + b + c
24+
25+
// CHECK: add sp, sp
26+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
//@ add-core-stubs
2+
//@ compile-flags: -Copt-level=3
3+
#![feature(c_variadic)]
4+
#![crate_type = "lib"]
5+
6+
// Check that `%args` explicitly has its lifetime start and end. Being explicit can improve
7+
// instruction and register selection, see e.g. https://github.com/rust-lang/rust/pull/144549
8+
9+
#[unsafe(no_mangle)]
10+
unsafe extern "C" fn variadic(a: f64, mut args: ...) -> f64 {
11+
// CHECK: call void @llvm.lifetime.start.p0(i64 {{[0-9]+}}, ptr nonnull %args)
12+
// CHECK: call void @llvm.va_start.p0(ptr nonnull %args)
13+
14+
let b = args.arg::<f64>();
15+
let c = args.arg::<f64>();
16+
17+
a + b + c
18+
19+
// CHECK: call void @llvm.va_end.p0(ptr nonnull %args)
20+
// CHECK: call void @llvm.lifetime.end.p0(i64 {{[0-9]+}}, ptr nonnull %args)
21+
}

0 commit comments

Comments
 (0)