- 
                Notifications
    You must be signed in to change notification settings 
- Fork 15k
          [WebAssembly] Change half to use soft promotion rather than  PromoteFloat
          #152833
        
          New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
| @llvm/pr-subscribers-backend-webassembly Author: Trevor Gross (tgross35) ChangesThe default  Of note in the test updates are that  Fixes the wasm portion of #97981 Patch is 77.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152833.diff 6 Files Affected: 
 diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 72401a7a259c0..e0ce3d1dcb620 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -40,6 +40,8 @@ class WebAssemblyTargetLowering final : public TargetLowering {
   MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override;
   MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const override;
 
+  bool softPromoteHalfType() const override { return true; }
+
 private:
   /// Keep a pointer to the WebAssemblySubtarget around so that we can make the
   /// right decision when generating code for different targets.
diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/f16-intrinsics.ll
similarity index 99%
rename from llvm/test/CodeGen/WebAssembly/half-precision.ll
rename to llvm/test/CodeGen/WebAssembly/f16-intrinsics.ll
index 4e8ff5955c63b..8033ec5d310fa 100644
--- a/llvm/test/CodeGen/WebAssembly/half-precision.ll
+++ b/llvm/test/CodeGen/WebAssembly/f16-intrinsics.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128 | FileCheck %s
 
+; Tests for `llvm.wasm.*.*f16` intrinsics
+
 declare float @llvm.wasm.loadf32.f16(ptr)
 declare void @llvm.wasm.storef16.f32(float, ptr)
 
diff --git a/llvm/test/CodeGen/WebAssembly/f16.ll b/llvm/test/CodeGen/WebAssembly/f16.ll
index b67c0c16d4651..3c31d55abfadc 100644
--- a/llvm/test/CodeGen/WebAssembly/f16.ll
+++ b/llvm/test/CodeGen/WebAssembly/f16.ll
@@ -1,69 +1,627 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -fast-isel | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 
-; Test that f16 is expanded.
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-keep-registers            | FileCheck %s --check-prefixes=ALL,DEFISEL
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-keep-registers -fast-isel | FileCheck %s --check-prefixes=ALL,FASTISEL
+
+; Tests for various operations on half precison float. Much of the test is
+; copied from test/CodeGen/X86/half.ll.
 
 target triple = "wasm32-unknown-unknown"
 
-; CHECK-LABEL: demote.f32:
-; CHECK-NEXT: .functype demote.f32 (f32) -> (f32){{$}}
-; CHECK-NEXT: local.get	$push[[L0:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: call	$push[[L1:[0-9]+]]=, __truncsfhf2, $pop[[L0]]{{$}}
-; CHECK-NEXT: call	$push[[L2:[0-9]+]]=, __extendhfsf2, $pop[[L1]]{{$}}
-; CHECK-NEXT: return  	$pop[[L2]]{{$}}
-define half @demote.f32(float %f) {
-    %t = fptrunc float %f to half
-    ret half %t
-}
-
-; CHECK-LABEL: promote.f32:
-; CHECK-NEXT: .functype promote.f32 (f32) -> (f32){{$}}
-; CHECK-NEXT: local.get	$push0=, 0{{$}}
-; CHECK-NEXT: return  	$pop0{{$}}
-define float @promote.f32(half %f) {
-    %t = fpext half %f to float
-    ret float %t
-}
-
-; CHECK-LABEL: demote.f64:
-; CHECK-NEXT: .functype demote.f64 (f64) -> (f32){{$}}
-; CHECK-NEXT: local.get	$push[[L0:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: call	$push[[L1:[0-9]+]]=, __truncdfhf2, $pop[[L0]]{{$}}
-; CHECK-NEXT: call	$push[[L2:[0-9]+]]=, __extendhfsf2, $pop[[L1]]{{$}}
-; CHECK-NEXT: return  	$pop[[L2]]{{$}}
-define half @demote.f64(double %f) {
-    %t = fptrunc double %f to half
-    ret half %t
-}
-
-; CHECK-LABEL: promote.f64:
-; CHECK-NEXT: .functype promote.f64 (f32) -> (f64){{$}}
-; CHECK-NEXT: local.get	$push[[L0:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: f64.promote_f32 $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}}
-; CHECK-NEXT: return  	$pop[[L1]]{{$}}
-define double @promote.f64(half %f) {
-    %t = fpext half %f to double
-    ret double %t
-}
-
-; CHECK-LABEL: demote.f128:
-; CHECK-NEXT: .functype demote.f128 (i64, i64) -> (f32){{$}}
-; CHECK-NEXT: local.get	$push[[L0:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: local.get	$push[[L1:[0-9]+]]=, 1{{$}}
-; CHECK-NEXT: call	$push[[L2:[0-9]+]]=, __trunctfhf2, $pop[[L0]], $pop[[L1]]{{$}}
-; CHECK-NEXT: call	$push[[L3:[0-9]+]]=, __extendhfsf2, $pop[[L2]]{{$}}
-; CHECK-NEXT: return  	$pop[[L3]]{{$}}
-define half @demote.f128(fp128 %f) {
-    %t = fptrunc fp128 %f to half
-    ret half %t
-}
-
-; CHECK-LABEL: promote.f128:
-; CHECK-NEXT: .functype promote.f128 (i32, f32) -> (){{$}}
-; CHECK: call __extendsftf2
-; CHECK: i64.store
-; CHECK: i64.store
-define fp128 @promote.f128(half %f) {
-    %t = fpext half %f to fp128
-    ret fp128 %t
+define void @store(half %x, ptr %p) nounwind {
+; ALL-LABEL: store:
+; ALL:         .functype store (i32, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push1=, 1
+; ALL-NEXT:    local.get $push0=, 0
+; ALL-NEXT:    i32.store16 0($pop1), $pop0
+; ALL-NEXT:    return
+  store half %x, ptr %p
+  ret void
+}
+
+define half @return(ptr %p) nounwind {
+; ALL-LABEL: return:
+; ALL:         .functype return (i32) -> (i32)
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push1=, 0
+; ALL-NEXT:    i32.load16_u $push0=, 0($pop1)
+; ALL-NEXT:    return $pop0
+  %r = load half, ptr %p
+  ret half %r
+}
+
+define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr nounwind {
+; DEFISEL-LABEL: loadd:
+; DEFISEL:         .functype loadd (i32) -> (f64)
+; DEFISEL-NEXT:  # %bb.0:
+; DEFISEL-NEXT:    local.get $push3=, 0
+; DEFISEL-NEXT:    i32.load16_u $push0=, 2($pop3)
+; DEFISEL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; DEFISEL-NEXT:    f64.promote_f32 $push2=, $pop1
+; DEFISEL-NEXT:    return $pop2
+;
+; FASTISEL-LABEL: loadd:
+; FASTISEL:         .functype loadd (i32) -> (f64)
+; FASTISEL-NEXT:  # %bb.0:
+; FASTISEL-NEXT:    local.get $push3=, 0
+; FASTISEL-NEXT:    i32.load16_u $push2=, 2($pop3)
+; FASTISEL-NEXT:    call $push1=, __extendhfsf2, $pop2
+; FASTISEL-NEXT:    f64.promote_f32 $push0=, $pop1
+; FASTISEL-NEXT:    return $pop0
+  %arrayidx = getelementptr inbounds i16, ptr %a, i64 1
+  %x = load i16, ptr %arrayidx, align 2
+  %ret = tail call double @llvm.convert.from.fp16.f64(i16 %x)
+  ret double %ret
+}
+
+define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr nounwind {
+; DEFISEL-LABEL: loadf:
+; DEFISEL:         .functype loadf (i32) -> (f32)
+; DEFISEL-NEXT:  # %bb.0:
+; DEFISEL-NEXT:    local.get $push2=, 0
+; DEFISEL-NEXT:    i32.load16_u $push0=, 2($pop2)
+; DEFISEL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; DEFISEL-NEXT:    return $pop1
+;
+; FASTISEL-LABEL: loadf:
+; FASTISEL:         .functype loadf (i32) -> (f32)
+; FASTISEL-NEXT:  # %bb.0:
+; FASTISEL-NEXT:    local.get $push2=, 0
+; FASTISEL-NEXT:    i32.load16_u $push1=, 2($pop2)
+; FASTISEL-NEXT:    call $push0=, __extendhfsf2, $pop1
+; FASTISEL-NEXT:    return $pop0
+  %arrayidx = getelementptr inbounds i16, ptr %a, i64 1
+  %x = load i16, ptr %arrayidx, align 2
+  %ret = tail call float @llvm.convert.from.fp16.f32(i16 %x)
+  ret float %ret
+}
+
+define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr nounwind {
+; ALL-LABEL: stored:
+; ALL:         .functype stored (i32, f64) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push2=, 0
+; ALL-NEXT:    local.get $push1=, 1
+; ALL-NEXT:    call $push0=, __truncdfhf2, $pop1
+; ALL-NEXT:    i32.store16 0($pop2), $pop0
+; ALL-NEXT:    return
+  %x = tail call i16 @llvm.convert.to.fp16.f64(double %b)
+  store i16 %x, ptr %a, align 2
+  ret void
+}
+
+define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr nounwind {
+; ALL-LABEL: storef:
+; ALL:         .functype storef (i32, f32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push2=, 0
+; ALL-NEXT:    local.get $push1=, 1
+; ALL-NEXT:    call $push0=, __truncsfhf2, $pop1
+; ALL-NEXT:    i32.store16 0($pop2), $pop0
+; ALL-NEXT:    return
+  %x = tail call i16 @llvm.convert.to.fp16.f32(float %b)
+  store i16 %x, ptr %a, align 2
+  ret void
+}
+
+define void @test_load_store(ptr %in, ptr %out) nounwind {
+; ALL-LABEL: test_load_store:
+; ALL:         .functype test_load_store (i32, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push2=, 1
+; ALL-NEXT:    local.get $push1=, 0
+; ALL-NEXT:    i32.load16_u $push0=, 0($pop1)
+; ALL-NEXT:    i32.store16 0($pop2), $pop0
+; ALL-NEXT:    return
+  %val = load half, ptr %in
+  store half %val, ptr %out
+  ret void
+}
+
+define i16 @test_bitcast_from_half(ptr %addr) nounwind {
+; ALL-LABEL: test_bitcast_from_half:
+; ALL:         .functype test_bitcast_from_half (i32) -> (i32)
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push1=, 0
+; ALL-NEXT:    i32.load16_u $push0=, 0($pop1)
+; ALL-NEXT:    return $pop0
+  %val = load half, ptr %addr
+  %val_int = bitcast half %val to i16
+  ret i16 %val_int
+}
+
+define void @test_bitcast_to_half(ptr %addr, i16 %in) nounwind {
+; ALL-LABEL: test_bitcast_to_half:
+; ALL:         .functype test_bitcast_to_half (i32, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push1=, 0
+; ALL-NEXT:    local.get $push0=, 1
+; ALL-NEXT:    i32.store16 0($pop1), $pop0
+; ALL-NEXT:    return
+  %val_fp = bitcast i16 %in to half
+  store half %val_fp, ptr %addr
+  ret void
+}
+
+define half @from_bits(i16 %x) nounwind {
+; ALL-LABEL: from_bits:
+; ALL:         .functype from_bits (i32) -> (i32)
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push0=, 0
+; ALL-NEXT:    return $pop0
+  %res = bitcast i16 %x to half
+  ret half %res
+}
+
+define i16 @to_bits(half %x) nounwind {
+; ALL-LABEL: to_bits:
+; ALL:         .functype to_bits (i32) -> (i32)
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push0=, 0
+; ALL-NEXT:    return $pop0
+    %res = bitcast half %x to i16
+    ret i16 %res
+}
+
+define float @test_extend32(ptr %addr) nounwind {
+; DEFISEL-LABEL: test_extend32:
+; DEFISEL:         .functype test_extend32 (i32) -> (f32)
+; DEFISEL-NEXT:  # %bb.0:
+; DEFISEL-NEXT:    local.get $push2=, 0
+; DEFISEL-NEXT:    i32.load16_u $push0=, 0($pop2)
+; DEFISEL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; DEFISEL-NEXT:    return $pop1
+;
+; FASTISEL-LABEL: test_extend32:
+; FASTISEL:         .functype test_extend32 (i32) -> (f32)
+; FASTISEL-NEXT:  # %bb.0:
+; FASTISEL-NEXT:    local.get $push2=, 0
+; FASTISEL-NEXT:    i32.load16_u $push1=, 0($pop2)
+; FASTISEL-NEXT:    call $push0=, __extendhfsf2, $pop1
+; FASTISEL-NEXT:    return $pop0
+  %val16 = load half, ptr %addr
+  %val32 = fpext half %val16 to float
+  ret float %val32
+}
+
+define double @test_extend64(ptr %addr) nounwind {
+; DEFISEL-LABEL: test_extend64:
+; DEFISEL:         .functype test_extend64 (i32) -> (f64)
+; DEFISEL-NEXT:  # %bb.0:
+; DEFISEL-NEXT:    local.get $push3=, 0
+; DEFISEL-NEXT:    i32.load16_u $push0=, 0($pop3)
+; DEFISEL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; DEFISEL-NEXT:    f64.promote_f32 $push2=, $pop1
+; DEFISEL-NEXT:    return $pop2
+;
+; FASTISEL-LABEL: test_extend64:
+; FASTISEL:         .functype test_extend64 (i32) -> (f64)
+; FASTISEL-NEXT:  # %bb.0:
+; FASTISEL-NEXT:    local.get $push3=, 0
+; FASTISEL-NEXT:    i32.load16_u $push1=, 0($pop3)
+; FASTISEL-NEXT:    call $push2=, __extendhfsf2, $pop1
+; FASTISEL-NEXT:    f64.promote_f32 $push0=, $pop2
+; FASTISEL-NEXT:    return $pop0
+  %val16 = load half, ptr %addr
+  %val32 = fpext half %val16 to double
+  ret double %val32
+}
+
+define fp128 @test_extend128(ptr %addr) nounwind {
+; ALL-LABEL: test_extend128:
+; ALL:         .functype test_extend128 (i32, i32) -> ()
+; ALL-NEXT:    .local i32
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    global.get $push4=, __stack_pointer
+; ALL-NEXT:    i32.const $push5=, 16
+; ALL-NEXT:    i32.sub $push9=, $pop4, $pop5
+; ALL-NEXT:    local.tee $push8=, 2, $pop9
+; ALL-NEXT:    global.set __stack_pointer, $pop8
+; ALL-NEXT:    local.get $push11=, 2
+; ALL-NEXT:    local.get $push10=, 1
+; ALL-NEXT:    i32.load16_u $push0=, 0($pop10)
+; ALL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; ALL-NEXT:    call __extendsftf2, $pop11, $pop1
+; ALL-NEXT:    local.get $push13=, 0
+; ALL-NEXT:    local.get $push12=, 2
+; ALL-NEXT:    i64.load $push2=, 8($pop12)
+; ALL-NEXT:    i64.store 8($pop13), $pop2
+; ALL-NEXT:    local.get $push15=, 0
+; ALL-NEXT:    local.get $push14=, 2
+; ALL-NEXT:    i64.load $push3=, 0($pop14)
+; ALL-NEXT:    i64.store 0($pop15), $pop3
+; ALL-NEXT:    local.get $push16=, 2
+; ALL-NEXT:    i32.const $push6=, 16
+; ALL-NEXT:    i32.add $push7=, $pop16, $pop6
+; ALL-NEXT:    global.set __stack_pointer, $pop7
+; ALL-NEXT:    return
+  %val16 = load half, ptr %addr
+  %val32 = fpext half %val16 to fp128
+  ret fp128 %val32
+}
+
+define void @test_trunc32(float %in, ptr %addr) nounwind {
+; ALL-LABEL: test_trunc32:
+; ALL:         .functype test_trunc32 (f32, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push2=, 1
+; ALL-NEXT:    local.get $push1=, 0
+; ALL-NEXT:    call $push0=, __truncsfhf2, $pop1
+; ALL-NEXT:    i32.store16 0($pop2), $pop0
+; ALL-NEXT:    return
+  %val16 = fptrunc float %in to half
+  store half %val16, ptr %addr
+  ret void
+}
+
+define void @test_trunc64(double %in, ptr %addr) nounwind {
+; ALL-LABEL: test_trunc64:
+; ALL:         .functype test_trunc64 (f64, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push2=, 1
+; ALL-NEXT:    local.get $push1=, 0
+; ALL-NEXT:    call $push0=, __truncdfhf2, $pop1
+; ALL-NEXT:    i32.store16 0($pop2), $pop0
+; ALL-NEXT:    return
+  %val16 = fptrunc double %in to half
+  store half %val16, ptr %addr
+  ret void
+}
+
+define void @test_trunc128(fp128 %in, ptr %addr) nounwind {
+; ALL-LABEL: test_trunc128:
+; ALL:         .functype test_trunc128 (i64, i64, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push3=, 2
+; ALL-NEXT:    local.get $push2=, 0
+; ALL-NEXT:    local.get $push1=, 1
+; ALL-NEXT:    call $push0=, __trunctfhf2, $pop2, $pop1
+; ALL-NEXT:    i32.store16 0($pop3), $pop0
+; ALL-NEXT:    return
+  %val16 = fptrunc fp128 %in to half
+  store half %val16, ptr %addr
+  ret void
+}
+
+define i64 @test_fptosi_i64(ptr %p) nounwind {
+; DEFISEL-LABEL: test_fptosi_i64:
+; DEFISEL:         .functype test_fptosi_i64 (i32) -> (i64)
+; DEFISEL-NEXT:  # %bb.0:
+; DEFISEL-NEXT:    local.get $push3=, 0
+; DEFISEL-NEXT:    i32.load16_u $push0=, 0($pop3)
+; DEFISEL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; DEFISEL-NEXT:    i64.trunc_sat_f32_s $push2=, $pop1
+; DEFISEL-NEXT:    return $pop2
+;
+; FASTISEL-LABEL: test_fptosi_i64:
+; FASTISEL:         .functype test_fptosi_i64 (i32) -> (i64)
+; FASTISEL-NEXT:  # %bb.0:
+; FASTISEL-NEXT:    local.get $push3=, 0
+; FASTISEL-NEXT:    i32.load16_u $push1=, 0($pop3)
+; FASTISEL-NEXT:    call $push2=, __extendhfsf2, $pop1
+; FASTISEL-NEXT:    i64.trunc_sat_f32_s $push0=, $pop2
+; FASTISEL-NEXT:    return $pop0
+  %a = load half, ptr %p, align 2
+  %r = fptosi half %a to i64
+  ret i64 %r
+}
+
+define void @test_sitofp_i64(i64 %a, ptr %p) nounwind {
+; ALL-LABEL: test_sitofp_i64:
+; ALL:         .functype test_sitofp_i64 (i64, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push3=, 1
+; ALL-NEXT:    local.get $push2=, 0
+; ALL-NEXT:    f32.convert_i64_s $push0=, $pop2
+; ALL-NEXT:    call $push1=, __truncsfhf2, $pop0
+; ALL-NEXT:    i32.store16 0($pop3), $pop1
+; ALL-NEXT:    return
+  %r = sitofp i64 %a to half
+  store half %r, ptr %p
+  ret void
+}
+
+define i64 @test_fptoui_i64(ptr %p) nounwind {
+; DEFISEL-LABEL: test_fptoui_i64:
+; DEFISEL:         .functype test_fptoui_i64 (i32) -> (i64)
+; DEFISEL-NEXT:  # %bb.0:
+; DEFISEL-NEXT:    local.get $push3=, 0
+; DEFISEL-NEXT:    i32.load16_u $push0=, 0($pop3)
+; DEFISEL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; DEFISEL-NEXT:    i64.trunc_sat_f32_u $push2=, $pop1
+; DEFISEL-NEXT:    return $pop2
+;
+; FASTISEL-LABEL: test_fptoui_i64:
+; FASTISEL:         .functype test_fptoui_i64 (i32) -> (i64)
+; FASTISEL-NEXT:  # %bb.0:
+; FASTISEL-NEXT:    local.get $push3=, 0
+; FASTISEL-NEXT:    i32.load16_u $push1=, 0($pop3)
+; FASTISEL-NEXT:    call $push2=, __extendhfsf2, $pop1
+; FASTISEL-NEXT:    i64.trunc_sat_f32_u $push0=, $pop2
+; FASTISEL-NEXT:    return $pop0
+  %a = load half, ptr %p, align 2
+  %r = fptoui half %a to i64
+  ret i64 %r
+}
+
+define void @test_uitofp_i64(i64 %a, ptr %p) nounwind {
+; ALL-LABEL: test_uitofp_i64:
+; ALL:         .functype test_uitofp_i64 (i64, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push3=, 1
+; ALL-NEXT:    local.get $push2=, 0
+; ALL-NEXT:    f32.convert_i64_u $push0=, $pop2
+; ALL-NEXT:    call $push1=, __truncsfhf2, $pop0
+; ALL-NEXT:    i32.store16 0($pop3), $pop1
+; ALL-NEXT:    return
+  %r = uitofp i64 %a to half
+  store half %r, ptr %p
+  ret void
+}
+
+define <4 x float> @test_extend32_vec4(ptr %p) nounwind {
+; ALL-LABEL: test_extend32_vec4:
+; ALL:         .functype test_extend32_vec4 (i32, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push9=, 0
+; ALL-NEXT:    local.get $push8=, 1
+; ALL-NEXT:    i32.load16_u $push0=, 6($pop8)
+; ALL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; ALL-NEXT:    f32.store 12($pop9), $pop1
+; ALL-NEXT:    local.get $push11=, 0
+; ALL-NEXT:    local.get $push10=, 1
+; ALL-NEXT:    i32.load16_u $push2=, 4($pop10)
+; ALL-NEXT:    call $push3=, __extendhfsf2, $pop2
+; ALL-NEXT:    f32.store 8($pop11), $pop3
+; ALL-NEXT:    local.get $push13=, 0
+; ALL-NEXT:    local.get $push12=, 1
+; ALL-NEXT:    i32.load16_u $push4=, 2($pop12)
+; ALL-NEXT:    call $push5=, __extendhfsf2, $pop4
+; ALL-NEXT:    f32.store 4($pop13), $pop5
+; ALL-NEXT:    local.get $push15=, 0
+; ALL-NEXT:    local.get $push14=, 1
+; ALL-NEXT:    i32.load16_u $push6=, 0($pop14)
+; ALL-NEXT:    call $push7=, __extendhfsf2, $pop6
+; ALL-NEXT:    f32.store 0($pop15), $pop7
+; ALL-NEXT:    return
+  %a = load <4 x half>, ptr %p, align 8
+  %b = fpext <4 x half> %a to <4 x float>
+  ret <4 x float> %b
+}
+
+define <4 x double> @test_extend64_vec4(ptr %p) nounwind {
+; ALL-LABEL: test_extend64_vec4:
+; ALL:         .functype test_extend64_vec4 (i32, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push13=, 0
+; ALL-NEXT:    local.get $push12=, 1
+; ALL-NEXT:    i64.load16_u $push0=, 6($pop12)
+; ALL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; ALL-NEXT:    f64.promote_f32 $push2=, $pop1
+; ALL-NEXT:    f64.store 24($pop13), $pop2
+; ALL-NEXT:    local.get $push15=, 0
+; ALL-NEXT:    local.get $push14=, 1
+; ALL-NEXT:    i64.load16_u $push3=, 4($pop14)
+; ALL-NEXT:    call $push4=, __extendhfsf2, $pop3
+; ALL-NEXT:    f64.promote_f32 $push5=, $pop4
+; ALL-NEXT:    f64.store 16($pop15), $pop5
+; ALL-NEXT:    local.get $push17=, 0
+; ALL-NEXT:    local.get $push16=, 1
+; ALL-NEXT:    i64.load16_u $push6=, 2($pop16)
+; ALL-NEXT:    call $push7=, __extendhfsf2, $pop6
+; ALL-NEXT:    f64.promote_f32 $push8=, $pop7
+; ALL-NEXT:    f64.store 8($pop17), $pop8
+; ALL-NEXT:    local.get $push19=, 0
+; ALL-NEXT:    local.get $push18=, 1
+; ALL-NEXT:    i64.load16_u $push9=, 0($pop18)
+; ALL-NEXT:    call $push10=, __extendhfsf2, $pop9
+; ALL-NEXT:    f64.promote_f32 $push11=, $pop10
+; ALL-NEXT:    f64.store 0($pop19), $pop11
+; ALL-NEXT:    return
+  %a = load <4 x half>, ptr %p, align 8
+  %b = fpext <4 x half> %a to <4 x double>
+  ret <4 x double> %b
+}
+
+define void @test_trunc32_vec4(<4 x float> %a, ptr %p) nounwind {
+; DEFISEL-LABEL: test_trunc32_vec4:
+; DEFISEL:         .functype test_trunc32_vec4 (f32, f32, f32, f32, i32) -> ()
+; DEFISEL-NEXT:  # %bb.0:
+; DEFISEL-NEXT:    local.get $push5=, 4
+; DEFISEL-NEXT:    local.get $push4=, 3
+; DEFISEL-NEXT:    call $push0=, __truncsfhf2, $pop4
+; DEFISEL-NEXT:    i32.store16 6($pop5), $pop0
+; DEFISEL-NEXT:    local.get $push7=, 4
+; DEFISEL-NEXT:    local.get $push6=, 2
+; DEFISEL-NEXT:    call $push1=, __truncsfhf2, $pop6
+; DEFISEL-NEXT:    i32.store16 4($pop7), $pop1
+; DEFISEL-NEXT:    local.get $push9=, 4
+; DEFISEL-NEXT:    local.get $push8=, 1
+; DEFISEL-NEXT:    call $push2=, __truncsfhf2, $pop8
+; DEFISEL-NEXT:    i32.store16 2($pop9), $pop2
+; DEFISEL-NEXT:    local.get $push11=, 4
+; DEFISEL-NEXT:    local.get $push10=, 0
+; DEFISEL-NEXT:    call $push3=, __truncsfhf2, $pop10
+; DEFISEL-NEXT:    i32.store16 0($pop11), $pop3
+; DEFISEL-NEXT:    return
+;
+; FASTISEL-LABEL: test_trunc32_vec4:
+; FASTISEL:         .functype test_trunc32_vec4 (f32, f32, f32, f32, i32) -> ()
+; FASTISEL-NEXT:  # %bb.0:
+; FASTISEL-NEXT:    lo...
[truncated]
 | 
| The pretest should land before this #152832 Not sure who the codeowners for wasm are, maybe @sunfishcode or @aheejin could you review? | 
57babd0    to
    14d4dee      
    Compare
  
    | 
 Indeed, thanks. Updated | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
It's worth noting that this changes the ABI for passing half floats (without +fp16), but in a way that doesn't get exposed to Clang, which currently forces these to use i16. (This change will allow us to remove that hack.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+cc @brendandahl
I think this makes sense to match other targets for the non-fp16 case. Once we pick up the fp16 proposal again we should think more about how the ABI should work in that case.
…eFloat` The default `half` legalization, which Wasm currently uses, does not respect IEEE conventions: for example, casting to bits may invoke a lossy libcall, meaning soft float operations cannot be correctly implemented. Change to the soft promotion legalization which passes `f16` as an `i16` and treats each `half` operation as an individual f16->f32->libcall->f32->f16 sequence. Of note in the test updates are that `from_bits` and `to_bits` are now libcall-free, and that chained operations now round back to `f16` after each step. Fixes the wasm portion of llvm#97981 Fixes the wasm portion of llvm#97975 Fixes: llvm#96438 Fixes: llvm#96438
14d4dee    to
    1dfe717      
    Compare
  
    | My own test got in the way here, now blocked on #152684 | 
The default
halflegalization, which Wasm currently uses, does not respect IEEE conventions: for example, casting to bits may invoke alossy libcall, meaning soft float operations cannot be correctly implemented. Change to the soft promotion legalization which passesf16as ani16and treats eachhalfoperation as an individual f16->f32->libcall->f32->f16 sequence.Of note in the test updates are that
from_bitsandto_bitsare now libcall-free, and that chained operations now round back tof16after each step.Fixes the wasm portion of #97981
Fixes the wasm portion of #97975
Fixes: #96437
Fixes: #96438