diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index aa98a4b860dda..58fdb9b724931 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5280,7 +5280,8 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64 : AMDGPU::V_CEIL_F16_fake16_e64; case AMDGPU::S_FLOOR_F16: - return AMDGPU::V_FLOOR_F16_fake16_e64; + return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64 + : AMDGPU::V_FLOOR_F16_fake16_e64; case AMDGPU::S_TRUNC_F16: return AMDGPU::V_TRUNC_F16_fake16_e64; case AMDGPU::S_RNDNE_F16: diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index d604990dc88c2..b0dd92af4a027 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -881,6 +881,7 @@ defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16" defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">; defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">; defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">; +defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">; defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">; defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">; defm V_CEIL_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir index 52c37ec6246c9..30975a8937db6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir @@ -1,5 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GCN,GFX11 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GCN,GFX11-FAKE16 %s --- name: ffloor_s16_ss @@ -19,6 +21,15 @@ body: | ; VI-NEXT: [[FFLOOR:%[0-9]+]]:sreg_32(s16) = G_FFLOOR [[TRUNC]] ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FFLOOR]](s16) ; VI-NEXT: $sgpr0 = COPY [[COPY1]](s32) + ; + ; GCN-LABEL: name: ffloor_s16_ss + ; GCN: liveins: $sgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GCN-NEXT: [[FFLOOR:%[0-9]+]]:sreg_32(s16) = G_FFLOOR [[TRUNC]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FFLOOR]](s16) + ; GCN-NEXT: $sgpr0 = COPY [[COPY1]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s16) = G_FFLOOR %1 @@ -40,8 +51,24 @@ body: | ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %2 + ; VI-NEXT: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_e64_]] + ; + ; GFX11-LABEL: name: ffloor_s16_vv + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]] + ; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]] + ; GFX11-NEXT: $vgpr0 = COPY [[COPY2]] + ; + ; GFX11-FAKE16-LABEL: name: ffloor_s16_vv + ; GFX11-FAKE16: liveins: $vgpr0 + ; GFX11-FAKE16-NEXT: {{ $}} + ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-FAKE16-NEXT: [[V_FLOOR_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FFLOOR %1 @@ -63,8 +90,23 @@ body: | ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %2 + ; VI-NEXT: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_e64_]] + ; + ; GFX11-LABEL: name: ffloor_s16_vs + ; GFX11: liveins: $sgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]] + ; GFX11-NEXT: $vgpr0 = COPY [[COPY1]] + ; + ; GFX11-FAKE16-LABEL: name: ffloor_s16_vs + ; GFX11-FAKE16: liveins: $sgpr0 + ; GFX11-FAKE16-NEXT: {{ $}} + ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX11-FAKE16-NEXT: [[V_FLOOR_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_fake16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FFLOOR %1 @@ -86,8 +128,24 @@ body: | ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_FLOOR_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %3 + ; VI-NEXT: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_e64_]] + ; + ; GFX11-LABEL: name: ffloor_fneg_s16_vv + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]] + ; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]] + ; GFX11-NEXT: $vgpr0 = COPY [[COPY2]] + ; + ; GFX11-FAKE16-LABEL: name: ffloor_fneg_s16_vv + ; GFX11-FAKE16: liveins: $vgpr0 + ; GFX11-FAKE16-NEXT: {{ $}} + ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-FAKE16-NEXT: [[V_FLOOR_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_fake16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_FLOOR_F16_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir index d4eab5b797e66..7767aa54c8151 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir @@ -78,3 +78,24 @@ body: | %2:sreg_32 = COPY %1:vgpr_32 %3:sreg_32 = nofpexcept S_CEIL_F16 killed %2:sreg_32, implicit $mode ... + +--- +name: floor_f16 +body: | + bb.0: + ; REAL16-LABEL: name: floor_f16 + ; REAL16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; REAL16-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec + ; REAL16-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; REAL16-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, implicit $mode, implicit $exec + ; + ; FAKE16-LABEL: name: floor_f16 + ; FAKE16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; FAKE16-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec + ; FAKE16-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; FAKE16-NEXT: [[V_FLOOR_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_fake16_e64 0, [[V_CVT_F32_U32_e64_]], 0, 0, implicit $mode, implicit $exec + %0:vgpr_32 = IMPLICIT_DEF + %1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec + %2:sreg_32 = COPY %1:vgpr_32 + %3:sreg_32 = nofpexcept S_FLOOR_F16 killed %2:sreg_32, implicit $mode +... diff --git a/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll index 00bb32c768dca..e8d037c5ff53e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll @@ -1,7 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=SI %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s declare half @llvm.floor.f16(half %a) declare <2 x half> @llvm.floor.v2f16(<2 x half> %a) @@ -59,11 +60,31 @@ define amdgpu_kernel void @floor_f16( ; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0 ; GFX11-NEXT: s_mov_b32 s5, s1 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_floor_f16_e32 v0, v0 +; GFX11-NEXT: v_floor_f16_e32 v0.l, v0.l ; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm +; +; GFX11-FAKE16-LABEL: floor_f16: +; GFX11-FAKE16: ; %bb.0: ; %entry +; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX11-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX11-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX11-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[8:11], 0 +; GFX11-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-FAKE16-NEXT: v_floor_f16_e32 v0, v0 +; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0 +; GFX11-FAKE16-NEXT: s_nop 0 +; GFX11-FAKE16-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-FAKE16-NEXT: s_endpgm ptr addrspace(1) %r, ptr addrspace(1) %a) { entry: @@ -143,14 +164,43 @@ define amdgpu_kernel void @floor_v2f16( ; GFX11-NEXT: s_mov_b32 s5, s1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX11-NEXT: v_floor_f16_e32 v0, v0 +; GFX11-NEXT: v_floor_f16_e32 v0.l, v0.l +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_mov_b16_e32 v0.h, v1.l +; GFX11-NEXT: v_mov_b16_e32 v1.l, v0.l ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_floor_f16_e32 v1, v1 -; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX11-NEXT: v_floor_f16_e32 v0.h, v0.h +; GFX11-NEXT: v_mov_b16_e32 v0.l, v0.h +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm +; +; GFX11-FAKE16-LABEL: floor_v2f16: +; GFX11-FAKE16: ; %bb.0: ; %entry +; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX11-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX11-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX11-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX11-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0 +; GFX11-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX11-FAKE16-NEXT: v_floor_f16_e32 v0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_floor_f16_e32 v1, v1 +; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0 +; GFX11-FAKE16-NEXT: s_nop 0 +; GFX11-FAKE16-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-FAKE16-NEXT: s_endpgm ptr addrspace(1) %r, ptr addrspace(1) %a) { entry: diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1-fake16.s index 668085cffbf00..a155b74046bab 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1-fake16.s @@ -1,6 +1,54 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s +v_floor_f16 v5, v1 +// GFX11: encoding: [0x01,0xb7,0x0a,0x7e] + +v_floor_f16 v5, v127 +// GFX11: encoding: [0x7f,0xb7,0x0a,0x7e] + +v_floor_f16 v5, s1 +// GFX11: encoding: [0x01,0xb6,0x0a,0x7e] + +v_floor_f16 v5, s105 +// GFX11: encoding: [0x69,0xb6,0x0a,0x7e] + +v_floor_f16 v5, vcc_lo +// GFX11: encoding: [0x6a,0xb6,0x0a,0x7e] + +v_floor_f16 v5, vcc_hi +// GFX11: encoding: [0x6b,0xb6,0x0a,0x7e] + +v_floor_f16 v5, ttmp15 +// GFX11: encoding: [0x7b,0xb6,0x0a,0x7e] + +v_floor_f16 v5, m0 +// GFX11: encoding: [0x7d,0xb6,0x0a,0x7e] + +v_floor_f16 v5, exec_lo +// GFX11: encoding: [0x7e,0xb6,0x0a,0x7e] + +v_floor_f16 v5, exec_hi +// GFX11: encoding: [0x7f,0xb6,0x0a,0x7e] + +v_floor_f16 v5, null +// GFX11: encoding: [0x7c,0xb6,0x0a,0x7e] + +v_floor_f16 v5, -1 +// GFX11: encoding: [0xc1,0xb6,0x0a,0x7e] + +v_floor_f16 v5, 0.5 +// GFX11: encoding: [0xf0,0xb6,0x0a,0x7e] + +v_floor_f16 v5, src_scc +// GFX11: encoding: [0xfd,0xb6,0x0a,0x7e] + +v_floor_f16 v127, 0xfe0b +// GFX11: encoding: [0xff,0xb6,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_floor_f32 v5, v1 +// GFX11: encoding: [0x01,0x49,0x0a,0x7e] + v_ceil_f16 v5, v1 // GFX11: encoding: [0x01,0xb9,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s index 6b19a5c94a64e..86c2375c89496 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s @@ -1906,50 +1906,56 @@ v_ffbl_b32 v5, src_scc v_ffbl_b32 v255, 0xaf123456 // GFX11: encoding: [0xff,0x74,0xfe,0x7f,0x56,0x34,0x12,0xaf] -v_floor_f16 v5, v1 +v_floor_f16 v5.l, v1.l // GFX11: encoding: [0x01,0xb7,0x0a,0x7e] -v_floor_f16 v5, v127 +v_floor_f16 v5.l, v127.l // GFX11: encoding: [0x7f,0xb7,0x0a,0x7e] -v_floor_f16 v5, s1 +v_floor_f16 v5.l, v1.h +// GFX11: encoding: [0x81,0xb7,0x0a,0x7e] + +v_floor_f16 v5.l, v127.h +// GFX11: encoding: [0xff,0xb7,0x0a,0x7e] + +v_floor_f16 v5.l, s1 // GFX11: encoding: [0x01,0xb6,0x0a,0x7e] -v_floor_f16 v5, s105 +v_floor_f16 v5.l, s105 // GFX11: encoding: [0x69,0xb6,0x0a,0x7e] -v_floor_f16 v5, vcc_lo +v_floor_f16 v5.l, vcc_lo // GFX11: encoding: [0x6a,0xb6,0x0a,0x7e] -v_floor_f16 v5, vcc_hi +v_floor_f16 v5.l, vcc_hi // GFX11: encoding: [0x6b,0xb6,0x0a,0x7e] -v_floor_f16 v5, ttmp15 +v_floor_f16 v5.l, ttmp15 // GFX11: encoding: [0x7b,0xb6,0x0a,0x7e] -v_floor_f16 v5, m0 +v_floor_f16 v5.l, m0 // GFX11: encoding: [0x7d,0xb6,0x0a,0x7e] -v_floor_f16 v5, exec_lo +v_floor_f16 v5.l, exec_lo // GFX11: encoding: [0x7e,0xb6,0x0a,0x7e] -v_floor_f16 v5, exec_hi +v_floor_f16 v5.l, exec_hi // GFX11: encoding: [0x7f,0xb6,0x0a,0x7e] -v_floor_f16 v5, null +v_floor_f16 v5.l, null // GFX11: encoding: [0x7c,0xb6,0x0a,0x7e] -v_floor_f16 v5, -1 +v_floor_f16 v5.l, -1 // GFX11: encoding: [0xc1,0xb6,0x0a,0x7e] -v_floor_f16 v5, 0.5 -// GFX11: encoding: [0xf0,0xb6,0x0a,0x7e] +v_floor_f16 v127.l, 0.5 +// GFX11: encoding: [0xf0,0xb6,0xfe,0x7e] -v_floor_f16 v5, src_scc -// GFX11: encoding: [0xfd,0xb6,0x0a,0x7e] +v_floor_f16 v5.h, src_scc +// GFX11: encoding: [0xfd,0xb6,0x0a,0x7f] -v_floor_f16 v127, 0xfe0b -// GFX11: encoding: [0xff,0xb6,0xfe,0x7e,0x0b,0xfe,0x00,0x00] +v_floor_f16 v127.h, 0xfe0b +// GFX11: encoding: [0xff,0xb6,0xfe,0x7f,0x0b,0xfe,0x00,0x00] v_floor_f32 v5, v1 // GFX11: encoding: [0x01,0x49,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16-fake16.s index e3679b9321f43..038a9d4c9e189 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16-fake16.s @@ -1,6 +1,48 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s +v_floor_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_floor_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_floor_f16 v5, v1 row_mirror +// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_floor_f16 v5, v1 row_half_mirror +// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_floor_f16 v5, v1 row_shl:1 +// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_floor_f16 v5, v1 row_shl:15 +// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_floor_f16 v5, v1 row_shr:1 +// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_floor_f16 v5, v1 row_shr:15 +// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_floor_f16 v5, v1 row_ror:1 +// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_floor_f16 v5, v1 row_ror:15 +// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_floor_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_floor_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_floor_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_floor_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xb6,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + v_ceil_f16 v5, v1 quad_perm:[3,2,1,0] // GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s index cd9aa9273f1d8..fa6df6affeb1e 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s @@ -1513,47 +1513,47 @@ v_ffbl_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 v_ffbl_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: encoding: [0xfa,0x74,0xfe,0x7f,0xff,0x6f,0x05,0x30] -v_floor_f16 v5, v1 quad_perm:[3,2,1,0] +v_floor_f16 v5.l, v1 quad_perm:[3,2,1,0] // GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1b,0x00,0xff] -v_floor_f16 v5, v1 quad_perm:[0,1,2,3] +v_floor_f16 v5.l, v1 quad_perm:[0,1,2,3] // GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0xe4,0x00,0xff] -v_floor_f16 v5, v1 row_mirror +v_floor_f16 v5.l, v1 row_mirror // GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x40,0x01,0xff] -v_floor_f16 v5, v1 row_half_mirror +v_floor_f16 v5.l, v1 row_half_mirror // GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x41,0x01,0xff] -v_floor_f16 v5, v1 row_shl:1 +v_floor_f16 v5.l, v1 row_shl:1 // GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x01,0x01,0xff] -v_floor_f16 v5, v1 row_shl:15 +v_floor_f16 v5.l, v1 row_shl:15 // GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x0f,0x01,0xff] -v_floor_f16 v5, v1 row_shr:1 +v_floor_f16 v5.l, v1 row_shr:1 // GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x11,0x01,0xff] -v_floor_f16 v5, v1 row_shr:15 +v_floor_f16 v5.l, v1 row_shr:15 // GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1f,0x01,0xff] -v_floor_f16 v5, v1 row_ror:1 +v_floor_f16 v5.l, v1 row_ror:1 // GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x21,0x01,0xff] -v_floor_f16 v5, v1 row_ror:15 +v_floor_f16 v5.l, v1 row_ror:15 // GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x2f,0x01,0xff] -v_floor_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +v_floor_f16 v5.l, v1 row_share:0 row_mask:0xf bank_mask:0xf // GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x50,0x01,0xff] -v_floor_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x5f,0x01,0x01] +v_floor_f16 v127.l, v127 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0xb6,0xfe,0x7e,0x7f,0x5f,0x01,0x01] -v_floor_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x60,0x09,0x13] +v_floor_f16 v5.h, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0xb6,0x0a,0x7f,0x01,0x60,0x09,0x13] -v_floor_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xb6,0xfe,0x7e,0x7f,0x6f,0x35,0x30] +v_floor_f16 v127.h, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xb6,0xfe,0x7f,0x7f,0x6f,0x35,0x30] v_floor_f32 v5, v1 quad_perm:[3,2,1,0] // GFX11: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8-fake16.s index b6573c9778d80..caadcb861e9f9 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8-fake16.s @@ -1,6 +1,15 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s +v_floor_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0xb6,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_floor_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0xb6,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_floor_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xb6,0xfe,0x7e,0x7f,0x00,0x00,0x00] + v_ceil_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX11: encoding: [0xe9,0xb8,0x0a,0x7e,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s index 1633e5115efbe..4fe3aa121b590 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s @@ -328,14 +328,17 @@ v_ffbl_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_ffbl_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: encoding: [0xe9,0x74,0xfe,0x7f,0xff,0x00,0x00,0x00] -v_floor_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +v_floor_f16 v5.l, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX11: encoding: [0xe9,0xb6,0x0a,0x7e,0x01,0x77,0x39,0x05] -v_floor_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xb6,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_floor_f16 v127.l, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0xb6,0xfe,0x7e,0x7f,0x77,0x39,0x05] -v_floor_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xb6,0xfe,0x7e,0x7f,0x00,0x00,0x00] +v_floor_f16 v5.h, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0xb6,0x0a,0x7f,0x01,0x77,0x39,0x05] + +v_floor_f16 v127.h, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xb6,0xfe,0x7f,0x7f,0x00,0x00,0x00] v_floor_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX11: encoding: [0xe9,0x48,0x0a,0x7e,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err-fake16.s index d6f317ee0829b..d441e64dad67e 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err-fake16.s @@ -1,6 +1,27 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,+wavefrontsize32,-wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s +v_floor_f16_e32 v128, 0xfe0b +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_floor_f16_e32 v255, v1 +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_floor_f16_e32 v5, v199 +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_floor_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_floor_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_floor_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_floor_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + v_ceil_f16_e32 v128, 0xfe0b // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s index b2ebc3d26549b..47855a951d4b3 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s @@ -109,14 +109,14 @@ v_exp_f16_e32 v255, v1 v_exp_f16_e32 v5, v199 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode -v_floor_f16_e32 v128, 0xfe0b -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_floor_f16_e32 v128.l, 0xfe0b +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction -v_floor_f16_e32 v255, v1 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_floor_f16_e32 v255.l, v1.l +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction -v_floor_f16_e32 v5, v199 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_floor_f16_e32 v5.l, v199.l +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_fract_f16_e32 v128, 0xfe0b // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode @@ -298,10 +298,10 @@ v_exp_f16_e32 v255, v1 quad_perm:[3,2,1,0] v_exp_f16_e32 v5, v199 quad_perm:[3,2,1,0] // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction -v_floor_f16_e32 v255, v1 quad_perm:[3,2,1,0] +v_floor_f16_e32 v255.l, v1.l quad_perm:[3,2,1,0] // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction -v_floor_f16_e32 v5, v199 quad_perm:[3,2,1,0] +v_floor_f16_e32 v5.l, v199.l quad_perm:[3,2,1,0] // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_fract_f16_e32 v255, v1 quad_perm:[3,2,1,0] @@ -451,10 +451,10 @@ v_exp_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] v_exp_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction -v_floor_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +v_floor_f16_e32 v255.l, v1.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction -v_floor_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +v_floor_f16_e32 v5.l, v199.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_fract_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] @@ -525,3 +525,24 @@ v_trunc_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] v_trunc_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_floor_f16_e32 v128.h, 0xfe0b +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_floor_f16_e32 v255.h, v1.h +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_floor_f16_e32 v5.h, v199.h +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_floor_f16_e32 v255.h, v1.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_floor_f16_e32 v5.h, v199.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_floor_f16_e32 v255.h, v1.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_floor_f16_e32 v5.h, v199.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx11_unsupported_sdwa-fake16.s b/llvm/test/MC/AMDGPU/gfx11_unsupported_sdwa-fake16.s index 737256d6b727a..6fc67a0fb95b9 100644 --- a/llvm/test/MC/AMDGPU/gfx11_unsupported_sdwa-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx11_unsupported_sdwa-fake16.s @@ -1,5 +1,8 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s +v_floor_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported + v_ceil_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported diff --git a/llvm/test/MC/AMDGPU/gfx11_unsupported_sdwa.s b/llvm/test/MC/AMDGPU/gfx11_unsupported_sdwa.s index 1121f394b3d26..894b3971bdc67 100644 --- a/llvm/test/MC/AMDGPU/gfx11_unsupported_sdwa.s +++ b/llvm/test/MC/AMDGPU/gfx11_unsupported_sdwa.s @@ -493,7 +493,7 @@ v_ffbh_u32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD v_ffbl_b32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported -v_floor_f16_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +v_floor_f16_sdwa v255.l, v1.l dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported v_floor_f32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt index b8efe4b1e5623..b27d9bdb58c2a 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt @@ -1700,50 +1700,56 @@ # GFX11: v_exp_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x4a,0xfe,0x7f,0x56,0x34,0x12,0xaf] 0xff,0x4a,0xfe,0x7f,0x56,0x34,0x12,0xaf -# GFX11: v_floor_f16_e32 v5, v1 ; encoding: [0x01,0xb7,0x0a,0x7e] +# GFX11-REAL16: v_floor_f16_e32 v5.l, v1.l ; encoding: [0x01,0xb7,0x0a,0x7e] 0x01,0xb7,0x0a,0x7e -# GFX11: v_floor_f16_e32 v5, v127 ; encoding: [0x7f,0xb7,0x0a,0x7e] +# GFX11-REAL16: v_floor_f16_e32 v5.l, v127.l ; encoding: [0x7f,0xb7,0x0a,0x7e] 0x7f,0xb7,0x0a,0x7e -# GFX11: v_floor_f16_e32 v5, s1 ; encoding: [0x01,0xb6,0x0a,0x7e] +# GFX11-REAL16: v_floor_f16_e32 v5.l, v1.h ; encoding: [0x81,0xb7,0x0a,0x7e] +0x81,0xb7,0x0a,0x7e + +# GFX11-REAL16: v_floor_f16_e32 v5.l, v127.h ; encoding: [0xff,0xb7,0x0a,0x7e] +0xff,0xb7,0x0a,0x7e + +# GFX11-REAL16: v_floor_f16_e32 v5.l, s1 ; encoding: [0x01,0xb6,0x0a,0x7e] 0x01,0xb6,0x0a,0x7e -# GFX11: v_floor_f16_e32 v5, s105 ; encoding: [0x69,0xb6,0x0a,0x7e] +# GFX11-REAL16: v_floor_f16_e32 v5.l, s105 ; encoding: [0x69,0xb6,0x0a,0x7e] 0x69,0xb6,0x0a,0x7e -# GFX11: v_floor_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xb6,0x0a,0x7e] +# GFX11-REAL16: v_floor_f16_e32 v5.l, vcc_lo ; encoding: [0x6a,0xb6,0x0a,0x7e] 0x6a,0xb6,0x0a,0x7e -# GFX11: v_floor_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xb6,0x0a,0x7e] +# GFX11-REAL16: v_floor_f16_e32 v5.l, vcc_hi ; encoding: [0x6b,0xb6,0x0a,0x7e] 0x6b,0xb6,0x0a,0x7e -# GFX11: v_floor_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xb6,0x0a,0x7e] +# GFX11-REAL16: v_floor_f16_e32 v5.l, ttmp15 ; encoding: [0x7b,0xb6,0x0a,0x7e] 0x7b,0xb6,0x0a,0x7e -# GFX11: v_floor_f16_e32 v5, m0 ; encoding: [0x7d,0xb6,0x0a,0x7e] +# GFX11-REAL16: v_floor_f16_e32 v5.l, m0 ; encoding: [0x7d,0xb6,0x0a,0x7e] 0x7d,0xb6,0x0a,0x7e -# GFX11: v_floor_f16_e32 v5, exec_lo ; encoding: [0x7e,0xb6,0x0a,0x7e] +# GFX11-REAL16: v_floor_f16_e32 v5.l, exec_lo ; encoding: [0x7e,0xb6,0x0a,0x7e] 0x7e,0xb6,0x0a,0x7e -# GFX11: v_floor_f16_e32 v5, exec_hi ; encoding: [0x7f,0xb6,0x0a,0x7e] +# GFX11-REAL16: v_floor_f16_e32 v5.l, exec_hi ; encoding: [0x7f,0xb6,0x0a,0x7e] 0x7f,0xb6,0x0a,0x7e -# GFX11: v_floor_f16_e32 v5, null ; encoding: [0x7c,0xb6,0x0a,0x7e] +# GFX11-REAL16: v_floor_f16_e32 v5.l, null ; encoding: [0x7c,0xb6,0x0a,0x7e] 0x7c,0xb6,0x0a,0x7e -# GFX11: v_floor_f16_e32 v5, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e] +# GFX11-REAL16: v_floor_f16_e32 v5.l, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e] 0xc1,0xb6,0x0a,0x7e -# GFX11: v_floor_f16_e32 v5, 0.5 ; encoding: [0xf0,0xb6,0x0a,0x7e] -0xf0,0xb6,0x0a,0x7e +# GFX11-REAL16: v_floor_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xb6,0xfe,0x7e] +0xf0,0xb6,0xfe,0x7e -# GFX11: v_floor_f16_e32 v5, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7e] -0xfd,0xb6,0x0a,0x7e +# GFX11-REAL16: v_floor_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7f] +0xfd,0xb6,0x0a,0x7f -# GFX11: v_floor_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xb6,0xfe,0x7e,0x0b,0xfe,0x00,0x00] -0xff,0xb6,0xfe,0x7e,0x0b,0xfe,0x00,0x00 +# GFX11-REAL16: v_floor_f16_e32 v127.h, 0xfe0b ; encoding: [0xff,0xb6,0xfe,0x7f,0x0b,0xfe,0x00,0x00] +0xff,0xb6,0xfe,0x7f,0x0b,0xfe,0x00,0x00 # GFX11: v_floor_f32_e32 v5, v1 ; encoding: [0x01,0x49,0x0a,0x7e] 0x01,0x49,0x0a,0x7e