diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp index ad8368e1692be..1169f26a2ae37 100644 --- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp @@ -316,6 +316,12 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType( ThunkArgTranslation::PointerIndirection}; }; + if (T->isHalfTy()) { + // Prefix with `llvm` since MSVC doesn't specify `_Float16` + Out << "__llvm_h__"; + return direct(T); + } + if (T->isFloatTy()) { Out << "f"; return direct(T); @@ -327,8 +333,8 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType( } if (T->isFloatingPointTy()) { - report_fatal_error( - "Only 32 and 64 bit floating points are supported for ARM64EC thunks"); + report_fatal_error("Only 16, 32, and 64 bit floating points are supported " + "for ARM64EC thunks"); } auto &DL = M->getDataLayout(); @@ -342,8 +348,16 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType( uint64_t ElementCnt = T->getArrayNumElements(); uint64_t ElementSizePerBytes = DL.getTypeSizeInBits(ElementTy) / 8; uint64_t TotalSizeBytes = ElementCnt * ElementSizePerBytes; - if (ElementTy->isFloatTy() || ElementTy->isDoubleTy()) { - Out << (ElementTy->isFloatTy() ? "F" : "D") << TotalSizeBytes; + if (ElementTy->isHalfTy() || ElementTy->isFloatTy() || + ElementTy->isDoubleTy()) { + if (ElementTy->isHalfTy()) + // Prefix with `llvm` since MSVC doesn't specify `_Float16` + Out << "__llvm_H__"; + else if (ElementTy->isFloatTy()) + Out << "F"; + else if (ElementTy->isDoubleTy()) + Out << "D"; + Out << TotalSizeBytes; if (Alignment.value() >= 16 && !Ret) Out << "a" << Alignment.value(); if (TotalSizeBytes <= 8) { @@ -355,8 +369,9 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType( return pointerIndirection(T); } } else if (T->isFloatingPointTy()) { - report_fatal_error("Only 32 and 64 bit floating points are supported for " - "ARM64EC thunks"); + report_fatal_error( + "Only 16, 32, and 64 bit floating points are supported " + "for ARM64EC thunks"); } } diff --git a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll index 6aeeeed94543d..709a17e32f58e 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll @@ -85,10 +85,10 @@ define i64 @simple_integers(i8, i16, i32, i64) nounwind { ret i64 0 } -; NOTE: Only float and double are supported. -define double @simple_floats(float, double) nounwind { -; CHECK-LABEL: .def $ientry_thunk$cdecl$d$fd; -; CHECK: .section .wowthk$aa,"xr",discard,$ientry_thunk$cdecl$d$fd +; NOTE: Only half, float, and double are supported. +define double @simple_floats(half, float, double) nounwind { +; CHECK-LABEL: .def $ientry_thunk$cdecl$d$__llvm_h__fd; +; CHECK: .section .wowthk$aa,"xr",discard,$ientry_thunk$cdecl$d$__llvm_h__fd ; CHECK: // %bb.0: ; CHECK-NEXT: stp q6, q7, [sp, #-176]! // 32-byte Folded Spill ; CHECK-NEXT: .seh_save_any_reg_px q6, 176 @@ -600,7 +600,7 @@ start: ; CHECK-NEXT: .symidx $ientry_thunk$cdecl$i8$i8i8i8i8 ; CHECK-NEXT: .word 1 ; CHECK-NEXT: .symidx "#simple_floats" -; CHECK-NEXT: .symidx $ientry_thunk$cdecl$d$fd +; CHECK-NEXT: .symidx $ientry_thunk$cdecl$d$__llvm_h__fd ; CHECK-NEXT: .word 1 ; CHECK-NEXT: .symidx "#has_varargs" ; CHECK-NEXT: .symidx $ientry_thunk$cdecl$v$varargs diff --git a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll index cba7a8100930f..f829227a47cd7 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll @@ -93,10 +93,10 @@ declare i64 @simple_integers(i8, i16, i32, i64) nounwind; ; CHECK-NEXT: .seh_endfunclet ; CHECK-NEXT: .seh_endproc -; NOTE: Only float and double are supported. -declare double @simple_floats(float, double) nounwind; -; CHECK-LABEL: .def $iexit_thunk$cdecl$d$fd; -; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$d$fd +; NOTE: Only half, float, and double are supported. +declare double @simple_floats(half, float, double) nounwind; +; CHECK-LABEL: .def $iexit_thunk$cdecl$d$__llvm_h__fd; +; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$d$__llvm_h__fd ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 ; CHECK-NEXT: .seh_stackalloc 48 @@ -129,8 +129,8 @@ declare double @simple_floats(float, double) nounwind; ; CHECK-NEXT: adrp x11, simple_floats ; CHECK-NEXT: add x11, x11, :lo12:simple_floats ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] -; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$d$fd -; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$d$fd +; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$d$__llvm_h__fd +; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$d$__llvm_h__fd ; CHECK-NEXT: blr x8 ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -282,33 +282,36 @@ declare void @has_aligned_sret(ptr align 32 sret(%TSRet)) nounwind; ; CHECK: .seh_endfunclet ; CHECK: .seh_endproc -declare [2 x i8] @small_array([2 x i8], [2 x float]) nounwind; -; CHECK-LABEL: .def $iexit_thunk$cdecl$m2$m2F8; -; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$m2$m2F8 +declare [2 x i8] @small_array([2 x i8], [2 x half], [2 x float]) nounwind; +; CHECK-LABEL: .def $iexit_thunk$cdecl$m2$m2__llvm_H__4F8; +; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$m2$m2__llvm_H__4F8 ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: .seh_stackalloc 64 -; CHECK-NEXT: stp x29, x30, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .seh_save_fplr 48 -; CHECK-NEXT: add x29, sp, #48 -; CHECK-NEXT: .seh_add_fp 48 +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: .seh_stackalloc 80 +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_fplr 64 +; CHECK-NEXT: add x29, sp, #64 +; CHECK-NEXT: .seh_add_fp 64 ; CHECK-NEXT: .seh_endprologue -; CHECK-NEXT: sturb w1, [x29, #-1] -; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect -; CHECK-NEXT: sturb w0, [x29, #-2] -; CHECK-NEXT: ldr x16, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect] -; CHECK-NEXT: stp s0, s1, [x29, #-12] -; CHECK-NEXT: ldurh w0, [x29, #-2] -; CHECK-NEXT: ldur x1, [x29, #-12] -; CHECK-NEXT: blr x16 -; CHECK-NEXT: mov w0, w8 -; CHECK-NEXT: sturh w8, [x29, #-14] -; CHECK-NEXT: ubfx w1, w8, #8, #8 +; CHECK-NEXT: sturb w0, [x29, #-2] +; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect +; CHECK-NEXT: sturb w1, [x29, #-1] +; CHECK-NEXT: ldr x16, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect] +; CHECK-NEXT: stur h0, [x29, #-6] +; CHECK-NEXT: ldurh w0, [x29, #-2] +; CHECK-NEXT: stur h1, [x29, #-4] +; CHECK-NEXT: stp s2, s3, [x29, #-16] +; CHECK-NEXT: ldur w1, [x29, #-6] +; CHECK-NEXT: ldur x2, [x29, #-16] +; CHECK-NEXT: blr x16 +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: sturh w8, [x29, #-18] +; CHECK-NEXT: ubfx w1, w8, #8, #8 ; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: ldp x29, x30, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: .seh_save_fplr 48 -; CHECK-NEXT: add sp, sp, #64 -; CHECK-NEXT: .seh_stackalloc 64 +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_fplr 64 +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: .seh_stackalloc 80 ; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: ret ; CHECK-NEXT: .seh_endfunclet @@ -325,8 +328,8 @@ declare [2 x i8] @small_array([2 x i8], [2 x float]) nounwind; ; CHECK-NEXT: adrp x11, small_array ; CHECK-NEXT: add x11, x11, :lo12:small_array ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] -; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$m2$m2F8 -; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$m2$m2F8 +; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$m2$m2__llvm_H__4F8 +; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$m2$m2__llvm_H__4F8 ; CHECK-NEXT: blr x8 ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -577,7 +580,7 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind; ; CHECK-NEXT: .symidx simple_integers ; CHECK-NEXT: .word 0 ; CHECK-NEXT: .symidx simple_floats -; CHECK-NEXT: .symidx $iexit_thunk$cdecl$d$fd +; CHECK-NEXT: .symidx $iexit_thunk$cdecl$d$__llvm_h__fd ; CHECK-NEXT: .word 4 ; CHECK-NEXT: .symidx "#simple_floats$exit_thunk" ; CHECK-NEXT: .symidx simple_floats @@ -601,7 +604,7 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind; ; CHECK-NEXT: .symidx has_aligned_sret ; CHECK-NEXT: .word 0 ; CHECK-NEXT: .symidx small_array -; CHECK-NEXT: .symidx $iexit_thunk$cdecl$m2$m2F8 +; CHECK-NEXT: .symidx $iexit_thunk$cdecl$m2$m2__llvm_H__4F8 ; CHECK-NEXT: .word 4 ; CHECK-NEXT: .symidx "#small_array$exit_thunk" ; CHECK-NEXT: .symidx small_array @@ -634,14 +637,14 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind; define void @func_caller() nounwind { call void @no_op() call i64 @simple_integers(i8 0, i16 0, i32 0, i64 0) - call double @simple_floats(float 0.0, double 0.0) + call double @simple_floats(half 0.0, float 0.0, double 0.0) call void (...) @has_varargs() %c = alloca i8 call void @has_sret(ptr sret([100 x i8]) %c) %aligned = alloca %TSRet, align 32 store %TSRet { i64 0, i64 0 }, ptr %aligned, align 32 call void @has_aligned_sret(ptr align 32 sret(%TSRet) %aligned) - call [2 x i8] @small_array([2 x i8] [i8 0, i8 0], [2 x float] [float 0.0, float 0.0]) + call [2 x i8] @small_array([2 x i8] [i8 0, i8 0], [2 x half] [half 0.0, half 0.0], [2 x float] [float 0.0, float 0.0]) call [3 x i64] @large_array([3 x i64] [i64 0, i64 0, i64 0], [2 x double] [double 0.0, double 0.0], [2 x [2 x i64]] [[2 x i64] [i64 0, i64 0], [2 x i64] [i64 0, i64 0]]) call %T2 @simple_struct(%T1 { i16 0 }, %T2 { i32 0, float 0.0 }, %T3 { i64 0, double 0.0 }, %T4 { i64 0, double 0.0, i8 0 }) call <4 x i8> @small_vector(<4 x i8> ) diff --git a/llvm/test/CodeGen/AArch64/frexp-arm64ec.ll b/llvm/test/CodeGen/AArch64/frexp-arm64ec.ll index ee326caa77c0a..c27d3c9588b9d 100644 --- a/llvm/test/CodeGen/AArch64/frexp-arm64ec.ll +++ b/llvm/test/CodeGen/AArch64/frexp-arm64ec.ll @@ -2,6 +2,15 @@ ; Separate from llvm-frexp.ll test because this errors on half cases +; ARM64EC-LABEL: test_frexp_f16_i32 +; ARM64EC: fcvt d0, h0 +; ARM64EC: bl "#frexp" +; ARM64EC: fcvt h0, d0 +define { half, i32 } @test_frexp_f16_i32(half %a) { + %result = call { half, i32 } @llvm.frexp.f16.i32(half %a) + ret { half, i32 } %result +} + ; ARM64EC-LABEL: test_frexp_f32_i32 ; ARM64EC: fcvt d0, s0 ; ARM64EC: bl "#frexp" diff --git a/llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll b/llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll index 1f8eeccf9c338..0fde7b95f5462 100644 --- a/llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll +++ b/llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll @@ -3,6 +3,15 @@ ; Separate from ldexp.ll test because this errors on half cases +; ARM64EC-LABEL: ldexp_f16 = +; ARM64EC: fcvt d0, h0 +; ARM64EC: bl "#ldexp" +; ARM64EC: fcvt h0, d0 +define half @ldexp_f16(half %val, i32 %a) { + %call = call half @llvm.ldexp.f16(half %val, i32 %a) + ret half %call +} + ; ARM64EC-LABEL: ldexp_f32 = ; ARM64EC: fcvt d0, s0 ; ARM64EC: bl "#ldexp" diff --git a/llvm/test/CodeGen/AArch64/powi-arm64ec.ll b/llvm/test/CodeGen/AArch64/powi-arm64ec.ll index 707159eb432ec..2e38f3c5e9a54 100644 --- a/llvm/test/CodeGen/AArch64/powi-arm64ec.ll +++ b/llvm/test/CodeGen/AArch64/powi-arm64ec.ll @@ -1,8 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=arm64ec-windows-msvc < %s | FileCheck -check-prefix=ARM64EC %s -declare double @llvm.powi.f64.i32(double, i32) +declare half @llvm.powi.f16.i32(half, i32) declare float @llvm.powi.f32.i32(float, i32) +declare double @llvm.powi.f64.i32(double, i32) + +; ARM64EC-LABEL: powi_f16 +; ARM64EC: fcvt s0, h0 +; ARM64EC: scvtf s1, w0 +; ARM64EC: bl "#powf" +define half @powi_f16(half %x, i32 %n) nounwind { + %ret = tail call half @llvm.powi.f16.i32(half %x, i32 %n) + ret half %ret +} ; ARM64EC-LABEL: powi_f32 ; ARM64EC: scvtf s1, w0 diff --git a/llvm/test/CodeGen/Generic/half.ll b/llvm/test/CodeGen/Generic/half.ll index f4ea5b5b30621..9d6c8eb2730d2 100644 --- a/llvm/test/CodeGen/Generic/half.ll +++ b/llvm/test/CodeGen/Generic/half.ll @@ -7,8 +7,7 @@ ; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-apple-darwin | FileCheck %s --check-prefixes=ALL,CHECK %} ; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK %} ; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} -; FIXME(#94434) unsupported on arm64ec -; RUN: %if aarch64-registered-target %{ ! llc %s -o - -mtriple=arm64ec-pc-windows-msvc -filetype=null %} +; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=arm64ec-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK %} ; RUN: %if amdgpu-registered-target %{ llc %s -o - -mtriple=amdgcn-amd-amdhsa | FileCheck %s --check-prefixes=ALL,CHECK %} ; RUN: %if arc-registered-target %{ llc %s -o - -mtriple=arc-elf | FileCheck %s --check-prefixes=ALL,CHECK %} ; RUN: %if arm-registered-target %{ llc %s -o - -mtriple=arm-unknown-linux-gnueabi | FileCheck %s --check-prefixes=ALL,CHECK %} @@ -47,6 +46,8 @@ ; RUN: %if xcore-registered-target %{ llc %s -o - -mtriple=xcore-unknown-unknown | FileCheck %s --check-prefixes=ALL,CHECK %} ; RUN: %if xtensa-registered-target %{ llc %s -o - -mtriple=xtensa-none-elf | FileCheck %s --check-prefixes=ALL,CHECK %} +; Note that arm64ec labels are quoted, hence the `{{"?}}:`. + ; Codegen tests don't work the same for graphics targets. Add a dummy directive ; for filecheck, just make sure we don't crash. ; NOCRASH: {{.*}} @@ -58,7 +59,7 @@ ; Regression test for https://github.com/llvm/llvm-project/issues/97981. define half @from_bits(i16 %bits) nounwind { -; ALL-LABEL: from_bits: +; ALL-LABEL: from_bits{{"?}}: ; CHECK-NOT: __extend ; CHECK-NOT: __trunc ; CHECK-NOT: __gnu @@ -68,7 +69,7 @@ define half @from_bits(i16 %bits) nounwind { } define i16 @to_bits(half %f) nounwind { -; ALL-LABEL: to_bits: +; ALL-LABEL: to_bits{{"?}}: ; CHECK-NOT: __extend ; CHECK-NOT: __trunc ; CHECK-NOT: __gnu @@ -81,7 +82,7 @@ define i16 @to_bits(half %f) nounwind { ; https://github.com/llvm/llvm-project/issues/117337 and similar issues. define half @check_freeze(half %f) nounwind { -; ALL-LABEL: check_freeze: +; ALL-LABEL: check_freeze{{"?}}: %t0 = freeze half %f ret half %t0 }