@@ -767,6 +767,136 @@ define amdgpu_gfx_whole_wave void @sgpr_spill_only(i1 %active, i32 %a, i32 %b) {
767767  ret void
768768}
769769
770+ define amdgpu_gfx_whole_wave void @realign_stack(i1 %active, i32 %x) {
771+ ; DAGISEL-LABEL: realign_stack:
772+ ; DAGISEL:       ; %bb.0:
773+ ; DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
774+ ; DAGISEL-NEXT:    s_wait_expcnt 0x0
775+ ; DAGISEL-NEXT:    s_wait_samplecnt 0x0
776+ ; DAGISEL-NEXT:    s_wait_bvhcnt 0x0
777+ ; DAGISEL-NEXT:    s_wait_kmcnt 0x0
778+ ; DAGISEL-NEXT:    s_mov_b32 s1, s33
779+ ; DAGISEL-NEXT:    s_add_co_i32 s33, s32, 0x3ff
780+ ; DAGISEL-NEXT:    s_wait_alu 0xfffe
781+ ; DAGISEL-NEXT:    s_and_b32 s33, s33, 0xfffffc00
782+ ; DAGISEL-NEXT:    s_xor_saveexec_b32 s0, -1
783+ ; DAGISEL-NEXT:    s_mov_b32 s2, s34
784+ ; DAGISEL-NEXT:    s_mov_b32 s34, s32
785+ ; DAGISEL-NEXT:    s_addk_co_i32 s32, 0x800
786+ ; DAGISEL-NEXT:    s_wait_storecnt 0x0
787+ ; DAGISEL-NEXT:    scratch_store_b32 off, v0, s33 scope:SCOPE_SYS
788+ ; DAGISEL-NEXT:    s_wait_storecnt 0x0
789+ ; DAGISEL-NEXT:    s_wait_alu 0xfffe
790+ ; DAGISEL-NEXT:    s_mov_b32 s32, s34
791+ ; DAGISEL-NEXT:    s_mov_b32 s34, s2
792+ ; DAGISEL-NEXT:    s_mov_b32 exec_lo, s0
793+ ; DAGISEL-NEXT:    s_mov_b32 s33, s1
794+ ; DAGISEL-NEXT:    s_wait_alu 0xfffe
795+ ; DAGISEL-NEXT:    s_setpc_b64 s[30:31]
796+ ;
797+ ; GISEL-LABEL: realign_stack:
798+ ; GISEL:       ; %bb.0:
799+ ; GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
800+ ; GISEL-NEXT:    s_wait_expcnt 0x0
801+ ; GISEL-NEXT:    s_wait_samplecnt 0x0
802+ ; GISEL-NEXT:    s_wait_bvhcnt 0x0
803+ ; GISEL-NEXT:    s_wait_kmcnt 0x0
804+ ; GISEL-NEXT:    s_mov_b32 s1, s33
805+ ; GISEL-NEXT:    s_add_co_i32 s33, s32, 0x3ff
806+ ; GISEL-NEXT:    s_wait_alu 0xfffe
807+ ; GISEL-NEXT:    s_and_b32 s33, s33, 0xfffffc00
808+ ; GISEL-NEXT:    s_xor_saveexec_b32 s0, -1
809+ ; GISEL-NEXT:    s_mov_b32 s2, s34
810+ ; GISEL-NEXT:    s_mov_b32 s34, s32
811+ ; GISEL-NEXT:    s_addk_co_i32 s32, 0x800
812+ ; GISEL-NEXT:    s_wait_storecnt 0x0
813+ ; GISEL-NEXT:    scratch_store_b32 off, v0, s33 scope:SCOPE_SYS
814+ ; GISEL-NEXT:    s_wait_storecnt 0x0
815+ ; GISEL-NEXT:    s_wait_alu 0xfffe
816+ ; GISEL-NEXT:    s_mov_b32 s32, s34
817+ ; GISEL-NEXT:    s_mov_b32 s34, s2
818+ ; GISEL-NEXT:    s_mov_b32 exec_lo, s0
819+ ; GISEL-NEXT:    s_mov_b32 s33, s1
820+ ; GISEL-NEXT:    s_wait_alu 0xfffe
821+ ; GISEL-NEXT:    s_setpc_b64 s[30:31]
822+ ;
823+ ; DAGISEL64-LABEL: realign_stack:
824+ ; DAGISEL64:       ; %bb.0:
825+ ; DAGISEL64-NEXT:    s_wait_loadcnt_dscnt 0x0
826+ ; DAGISEL64-NEXT:    s_wait_expcnt 0x0
827+ ; DAGISEL64-NEXT:    s_wait_samplecnt 0x0
828+ ; DAGISEL64-NEXT:    s_wait_bvhcnt 0x0
829+ ; DAGISEL64-NEXT:    s_wait_kmcnt 0x0
830+ ; DAGISEL64-NEXT:    s_mov_b32 s2, s33
831+ ; DAGISEL64-NEXT:    s_add_co_i32 s33, s32, 0x3ff
832+ ; DAGISEL64-NEXT:    s_wait_alu 0xfffe
833+ ; DAGISEL64-NEXT:    s_and_b32 s33, s33, 0xfffffc00
834+ ; DAGISEL64-NEXT:    s_xor_saveexec_b64 s[0:1], -1
835+ ; DAGISEL64-NEXT:    s_mov_b32 s3, s34
836+ ; DAGISEL64-NEXT:    s_mov_b32 s34, s32
837+ ; DAGISEL64-NEXT:    s_addk_co_i32 s32, 0x800
838+ ; DAGISEL64-NEXT:    s_wait_storecnt 0x0
839+ ; DAGISEL64-NEXT:    scratch_store_b32 off, v0, s33 scope:SCOPE_SYS
840+ ; DAGISEL64-NEXT:    s_wait_storecnt 0x0
841+ ; DAGISEL64-NEXT:    s_wait_alu 0xfffe
842+ ; DAGISEL64-NEXT:    s_mov_b32 s32, s34
843+ ; DAGISEL64-NEXT:    s_mov_b32 s34, s3
844+ ; DAGISEL64-NEXT:    s_mov_b64 exec, s[0:1]
845+ ; DAGISEL64-NEXT:    s_mov_b32 s33, s2
846+ ; DAGISEL64-NEXT:    s_wait_alu 0xfffe
847+ ; DAGISEL64-NEXT:    s_setpc_b64 s[30:31]
848+ ;
849+ ; GISEL64-LABEL: realign_stack:
850+ ; GISEL64:       ; %bb.0:
851+ ; GISEL64-NEXT:    s_wait_loadcnt_dscnt 0x0
852+ ; GISEL64-NEXT:    s_wait_expcnt 0x0
853+ ; GISEL64-NEXT:    s_wait_samplecnt 0x0
854+ ; GISEL64-NEXT:    s_wait_bvhcnt 0x0
855+ ; GISEL64-NEXT:    s_wait_kmcnt 0x0
856+ ; GISEL64-NEXT:    s_mov_b32 s2, s33
857+ ; GISEL64-NEXT:    s_add_co_i32 s33, s32, 0x3ff
858+ ; GISEL64-NEXT:    s_wait_alu 0xfffe
859+ ; GISEL64-NEXT:    s_and_b32 s33, s33, 0xfffffc00
860+ ; GISEL64-NEXT:    s_xor_saveexec_b64 s[0:1], -1
861+ ; GISEL64-NEXT:    s_mov_b32 s3, s34
862+ ; GISEL64-NEXT:    s_mov_b32 s34, s32
863+ ; GISEL64-NEXT:    s_addk_co_i32 s32, 0x800
864+ ; GISEL64-NEXT:    s_wait_storecnt 0x0
865+ ; GISEL64-NEXT:    scratch_store_b32 off, v0, s33 scope:SCOPE_SYS
866+ ; GISEL64-NEXT:    s_wait_storecnt 0x0
867+ ; GISEL64-NEXT:    s_wait_alu 0xfffe
868+ ; GISEL64-NEXT:    s_mov_b32 s32, s34
869+ ; GISEL64-NEXT:    s_mov_b32 s34, s3
870+ ; GISEL64-NEXT:    s_mov_b64 exec, s[0:1]
871+ ; GISEL64-NEXT:    s_mov_b32 s33, s2
872+ ; GISEL64-NEXT:    s_wait_alu 0xfffe
873+ ; GISEL64-NEXT:    s_setpc_b64 s[30:31]
874+ ;
875+ ; GFX1250-DAGISEL-LABEL: realign_stack:
876+ ; GFX1250-DAGISEL:       ; %bb.0:
877+ ; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
878+ ; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
879+ ; GFX1250-DAGISEL-NEXT:    s_mov_b32 s1, s33
880+ ; GFX1250-DAGISEL-NEXT:    s_add_co_i32 s33, s32, 0x3ff
881+ ; GFX1250-DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
882+ ; GFX1250-DAGISEL-NEXT:    s_and_b32 s33, s33, 0xfffffc00
883+ ; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 s0, -1
884+ ; GFX1250-DAGISEL-NEXT:    s_mov_b32 s2, s34
885+ ; GFX1250-DAGISEL-NEXT:    s_mov_b32 s34, s32
886+ ; GFX1250-DAGISEL-NEXT:    s_addk_co_i32 s32, 0x800
887+ ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s33 scope:SCOPE_SYS
888+ ; GFX1250-DAGISEL-NEXT:    s_wait_storecnt 0x0
889+ ; GFX1250-DAGISEL-NEXT:    s_mov_b32 s32, s34
890+ ; GFX1250-DAGISEL-NEXT:    s_mov_b32 s34, s2
891+ ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
892+ ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, s0
893+ ; GFX1250-DAGISEL-NEXT:    s_mov_b32 s33, s1
894+ ; GFX1250-DAGISEL-NEXT:    s_set_pc_i64 s[30:31]
895+   %fussy = alloca i32, align 1024, addrspace(5)
896+   store volatile i32 %x, ptr addrspace(5) %fussy, align 1024
897+   ret void
898+ }
899+ 
770900define amdgpu_gfx_whole_wave i32 @multiple_blocks(i1 %active, i32 %a, i32 %b) {
771901; DAGISEL-LABEL: multiple_blocks:
772902; DAGISEL:       ; %bb.0:
0 commit comments