From d73e4d69b98a34d84d853485d1987d2f31ec1d41 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 8 Aug 2025 05:04:09 -0500 Subject: [PATCH] [SelectionDAG] Add `f16` soft promotion for `lrint` and `lround` On platforms that soft promote `half`, using `lrint` intrinsics crashes with the following: SoftPromoteHalfOperand Op #0: t5: i32 = lrint t4 LLVM ERROR: Do not know how to soft promote this operator's operand! PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace. Stack dump: 0. Program arguments: /Users/tmgross/Documents/projects/llvm/llvm-build/bin/llc -mtriple=riscv32 1. Running pass 'Function Pass Manager' on module ''. 2. Running pass 'RISC-V DAG->DAG Pattern Instruction Selection' on function '@test_lrint_ixx_f16' Resolve this by adding a soft promotion. `SoftPromoteHalfOp_FP_TO_XINT` is reused here since it provides the correct input and output types. It is renamed `PromoteFloatOp_UnaryOp` to match `PromoteFloatOp_UnaryOp` and similar functions that are used to handle the same sets of intrinsics. --- .../SelectionDAG/LegalizeFloatTypes.cpp | 17 ++++-- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 +- llvm/test/CodeGen/ARM/lrint-conv.ll | 15 +++--- llvm/test/CodeGen/LoongArch/lrint-conv.ll | 33 ++++++++---- llvm/test/CodeGen/Mips/llrint-conv.ll | 23 ++++---- llvm/test/CodeGen/Mips/lrint-conv.ll | 27 +++++----- llvm/test/CodeGen/RISCV/lrint-conv.ll | 25 ++++++--- llvm/test/CodeGen/X86/lrint-conv-i32.ll | 52 ++++++++++++++++--- 8 files changed, 138 insertions(+), 56 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 2cad36eff9c88..f84e6c8291cce 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -20,6 +20,7 @@ #include "LegalizeTypes.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -3729,10 +3730,20 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) { Res = SoftPromoteHalfOp_FAKE_USE(N, OpNo); break; case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::LLRINT: + case ISD::LLROUND: + case ISD::LRINT: + case ISD::LROUND: case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break; + case ISD::STRICT_LLRINT: + case ISD::STRICT_LLROUND: + case ISD::STRICT_LRINT: + case ISD::STRICT_LROUND: + Res = SoftPromoteHalfOp_UnaryOp(N); + break; case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: Res = SoftPromoteHalfOp_FP_TO_XINT_SAT(N); break; @@ -3811,7 +3822,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) { return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), RVT, Op); } -SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) { +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_UnaryOp(SDNode *N) { EVT RVT = N->getValueType(0); bool IsStrict = N->isStrictFPOpcode(); SDValue Op = N->getOperand(IsStrict ? 1 : 0); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 63544e63e1da1..8eb3cec8bc87a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -840,7 +840,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftPromoteHalfOp_FAKE_USE(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N); - SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N); + SDValue SoftPromoteHalfOp_UnaryOp(SDNode *N); SDValue SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N); SDValue SoftPromoteHalfOp_SETCC(SDNode *N); SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo); diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll index 9aa95112af533..848b14e48f2d1 100644 --- a/llvm/test/CodeGen/ARM/lrint-conv.ll +++ b/llvm/test/CodeGen/ARM/lrint-conv.ll @@ -1,12 +1,15 @@ ; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP ; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP -; FIXME: crash -; define i32 @testmswh_builtin(half %x) { -; entry: -; %0 = tail call i32 @llvm.lrint.i32.f16(half %x) -; ret i32 %0 -; } +; SOFTFP-LABEL: testmswh_builtin: +; SOFTFP: bl lrintf +; HARDFP-LABEL: testmswh_builtin: +; HARDFP: bl lrintf +define i32 @testmswh_builtin(half %x) { +entry: + %0 = tail call i32 @llvm.lrint.i32.f16(half %x) + ret i32 %0 +} ; SOFTFP-LABEL: testmsws_builtin: ; SOFTFP: bl lrintf diff --git a/llvm/test/CodeGen/LoongArch/lrint-conv.ll b/llvm/test/CodeGen/LoongArch/lrint-conv.ll index 85de820025614..262d1c16a6486 100644 --- a/llvm/test/CodeGen/LoongArch/lrint-conv.ll +++ b/llvm/test/CodeGen/LoongArch/lrint-conv.ll @@ -5,16 +5,31 @@ ; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=loongarch64 | FileCheck %s --check-prefixes=LA64-I32 ; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=loongarch64 | FileCheck %s --check-prefixes=LA64-I64 -; FIXME: crash -; define ITy @test_lrint_ixx_f16(half %x) nounwind { -; %res = tail call ITy @llvm.lrint.ITy.f16(half %x) -; ret ITy %res -; } +define ITy @test_lrint_ixx_f16(half %x) nounwind { +; LA32-LABEL: test_lrint_ixx_f16: +; LA32: bl lrintf +; +; LA64-I32-LABEL: test_lrint_ixx_f16: +; LA64-I32: pcaddu18i $ra, %call36(lrintf) +; +; LA64-I64-LABEL: test_lrint_ixx_f16: +; LA64-I64: pcaddu18i $t8, %call36(lrintf) + %res = tail call ITy @llvm.lrint.ITy.f16(half %x) + ret ITy %res +} -; define ITy @test_llrint_ixx_f16(half %x) nounwind { -; %res = tail call ITy @llvm.llrint.ITy.f16(half %x) -; ret ITy %res -; } +define ITy @test_llrint_ixx_f16(half %x) nounwind { +; LA32-LABEL: test_llrint_ixx_f16: +; LA32: bl llrintf +; +; LA64-I32-LABEL: test_llrint_ixx_f16: +; LA64-I32: pcaddu18i $ra, %call36(llrintf) +; +; LA64-I64-LABEL: test_llrint_ixx_f16: +; LA64-I64: pcaddu18i $t8, %call36(llrintf) + %res = tail call ITy @llvm.llrint.ITy.f16(half %x) + ret ITy %res +} define ITy @test_lrint_ixx_f32(float %x) nounwind { ; LA32-LABEL: test_lrint_ixx_f32: diff --git a/llvm/test/CodeGen/Mips/llrint-conv.ll b/llvm/test/CodeGen/Mips/llrint-conv.ll index 592d40c0f65aa..8eaef5d4135bb 100644 --- a/llvm/test/CodeGen/Mips/llrint-conv.ll +++ b/llvm/test/CodeGen/Mips/llrint-conv.ll @@ -1,19 +1,18 @@ ; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s ; RUN: llc < %s -mtriple=mips -mattr=+soft-float | FileCheck %s -; FIXME: crash -; define signext i32 @testmswh(half %x) { -; entry: -; %0 = tail call i64 @llvm.llrint.i64.f16(half %x) -; %conv = trunc i64 %0 to i32 -; ret i32 %conv -; } +define signext i32 @testmswh(half %x) { +entry: + %0 = tail call i64 @llvm.llrint.i64.f16(half %x) + %conv = trunc i64 %0 to i32 + ret i32 %conv +} -; define i64 @testmsxh(half %x) { -; entry: -; %0 = tail call i64 @llvm.llrint.i64.f16(half %x) -; ret i64 %0 -; } +define i64 @testmsxh(half %x) { +entry: + %0 = tail call i64 @llvm.llrint.i64.f16(half %x) + ret i64 %0 +} define signext i32 @testmsws(float %x) { ; CHECK-LABEL: testmsws: diff --git a/llvm/test/CodeGen/Mips/lrint-conv.ll b/llvm/test/CodeGen/Mips/lrint-conv.ll index 6d2e392675f1c..64c5cb9ac5b07 100644 --- a/llvm/test/CodeGen/Mips/lrint-conv.ll +++ b/llvm/test/CodeGen/Mips/lrint-conv.ll @@ -1,19 +1,22 @@ ; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s ; RUN: llc < %s -mtriple=mips -mattr=+soft-float | FileCheck %s -; FIXME: crash -; define signext i32 @testmswh(half %x) { -; entry: -; %0 = tail call i64 @llvm.lrint.i64.f16(half %x) -; %conv = trunc i64 %0 to i32 -; ret i32 %conv -; } +define signext i32 @testmswh(half %x) { +; CHECK-LABEL: testmswh: +; CHECK: jal lrintf +entry: + %0 = tail call i64 @llvm.lrint.i64.f16(half %x) + %conv = trunc i64 %0 to i32 + ret i32 %conv +} -; define i64 @testmsxh(half %x) { -; entry: -; %0 = tail call i64 @llvm.lrint.i64.f16(half %x) -; ret i64 %0 -; } +define i64 @testmsxh(half %x) { +; CHECK-LABEL: testmsxh: +; CHECK: jal lrintf +entry: + %0 = tail call i64 @llvm.lrint.i64.f16(half %x) + ret i64 %0 +} define signext i32 @testmsws(float %x) { ; CHECK-LABEL: testmsws: diff --git a/llvm/test/CodeGen/RISCV/lrint-conv.ll b/llvm/test/CodeGen/RISCV/lrint-conv.ll index d3af2153588a1..ecb6bd0932ef3 100644 --- a/llvm/test/CodeGen/RISCV/lrint-conv.ll +++ b/llvm/test/CodeGen/RISCV/lrint-conv.ll @@ -5,14 +5,25 @@ ; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64 ; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64 -; FIXME: crash -; define ITy @test_lrint_ixx_f16(half %x) nounwind { -; %res = tail call ITy @llvm.lrint.ITy.f16(half %x) -; } +define ITy @test_lrint_ixx_f16(half %x) nounwind { +; RV32-LABEL: test_lrint_ixx_f16: +; RV32: call lrintf +; +; RV64-LABEL: test_lrint_ixx_f16: +; RV64: call lrintf + %res = tail call ITy @llvm.lrint.ITy.f16(half %x) + ret ITy %res +} -; define ITy @test_llrint_ixx_f16(half %x) nounwind { -; %res = tail call ITy @llvm.llrint.ITy.f16(half %x) -; } +define ITy @test_llrint_ixx_f16(half %x) nounwind { +; RV32-LABEL: test_llrint_ixx_f16: +; RV32: call llrintf +; +; RV64-LABEL: test_llrint_ixx_f16: +; RV64: call llrintf + %res = tail call ITy @llvm.llrint.ITy.f16(half %x) + ret ITy %res +} define ITy @test_lrint_ixx_f32(float %x) nounwind { ; RV32-LABEL: test_lrint_ixx_f32: diff --git a/llvm/test/CodeGen/X86/lrint-conv-i32.ll b/llvm/test/CodeGen/X86/lrint-conv-i32.ll index 3c50aea1095f4..5c0a64f1477e6 100644 --- a/llvm/test/CodeGen/X86/lrint-conv-i32.ll +++ b/llvm/test/CodeGen/X86/lrint-conv-i32.ll @@ -7,12 +7,52 @@ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX -; FIXME: crash -; define i32 @testmswh(half %x) nounwind { -; entry: -; %0 = tail call i32 @llvm.lrint.i32.f16(half %x) -; ret i32 %0 -; } +define i32 @testmswh(half %x) nounwind { +; X86-NOSSE-LABEL: testmswh: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: calll __extendhfsf2 +; X86-NOSSE-NEXT: addl $4, %esp +; X86-NOSSE-NEXT: fistpl (%esp) +; X86-NOSSE-NEXT: movl (%esp), %eax +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: testmswh: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 +; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax +; X86-SSE2-NEXT: movw %ax, (%esp) +; X86-SSE2-NEXT: calll __extendhfsf2 +; X86-SSE2-NEXT: fstps (%esp) +; X86-SSE2-NEXT: calll rintf +; X86-SSE2-NEXT: fstps (%esp) +; X86-SSE2-NEXT: calll __truncsfhf2 +; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax +; X86-SSE2-NEXT: movw %ax, (%esp) +; X86-SSE2-NEXT: calll __extendhfsf2 +; X86-SSE2-NEXT: fstps {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: cvttss2si {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: addl $8, %esp +; X86-SSE2-NEXT: retl +; +; X64-SSE-LABEL: testmswh: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: callq __extendhfsf2@PLT +; X64-SSE-NEXT: callq rintf@PLT +; X64-SSE-NEXT: callq __truncsfhf2@PLT +; X64-SSE-NEXT: callq __extendhfsf2@PLT +; X64-SSE-NEXT: cvttss2si %xmm0, %eax +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +entry: + %0 = tail call i32 @llvm.lrint.i32.f16(half %x) + ret i32 %0 +} define i32 @testmsws(float %x) nounwind { ; X86-NOSSE-LABEL: testmsws: