From fa141b580a688fd4264e7b26c632dcf1c8604bf5 Mon Sep 17 00:00:00 2001 From: zohassadar Date: Thu, 23 Jan 2025 12:46:07 +0000 Subject: [PATCH 1/3] replace switch_s_plus_2a with branch macro Uses a macro with destinations as args to create a high bytes and a low bytes table for each destination offset by -1, then uses the stack and rts for the actual branching. Macro expands to 11-12 bytes per branch with nothing centralized, compared to the previous 5-6 bytes per branch to a centralized routine. Reduces cycle count from 54-57 cycles per branch to 23-26, with the variance from possible page boundary crossing and if the destination is in zero page. Normal game logic involves 16 branches per frame. Supports up to 16 destinations but can be easily extended, the most in use is the playstate branch with 12 destinations. Previous: lda dest ; 3/4 jsr switch_s_plus_2a ; 6 switch_s_plus_2a: asl a ; 2 tay ; 2 iny ; 2 pla ; 4 sta switchTmp1 ; 3 pla ; 4 sta switchTmp2 ; 3 lda (switchTmp1),y ; 5/6 tax ; 2 iny ; 2 lda (switchTmp1),y ; 5/6 sta switchTmp2 ; 3 stx switchTmp1 ; 3 jmp (switchTmp1) ; 5 ; 54-57 New: ldx dest ; 3/4 lda hiBytes,x ; 4/5 pha ; 3 lda loBytes,x ; 4/5 pha ; 3 rts ; 6 ; 23-26 --- src/gamemode/branch.asm | 20 +++++++++--------- src/gamemode/gametypemenu/linecap.asm | 9 ++++---- src/gamemode/levelmenu.asm | 13 ++++++------ src/gamemodestate/branch.asm | 30 +++++++++++++++++---------- src/macros.asm | 28 +++++++++++++++++++++++++ src/main.asm | 2 ++ src/modes/garbage.asm | 14 ++++++------- src/nmi/render.asm | 21 +++++++++---------- src/playstate/branch.asm | 27 ++++++++++++------------ src/ram.asm | 2 -- src/util/core.asm | 16 -------------- 11 files changed, 99 insertions(+), 83 deletions(-) create mode 100644 src/macros.asm diff --git a/src/gamemode/branch.asm b/src/gamemode/branch.asm index 333697be..b3111a1c 100644 --- a/src/gamemode/branch.asm +++ b/src/gamemode/branch.asm @@ -1,14 +1,14 @@ +; 2nd and 3rd instances of playAndEndingHighScore_jmp used to be demo and startDemo respectively branchOnGameMode: - lda gameMode - jsr switch_s_plus_2a - .addr gameMode_bootScreen - .addr gameMode_waitScreen - .addr gameMode_gameTypeMenu - .addr gameMode_levelMenu - .addr gameMode_playAndEndingHighScore_jmp - .addr gameMode_playAndEndingHighScore_jmp ; use to be demo - .addr gameMode_playAndEndingHighScore_jmp ; used to be startDemo - .addr gameMode_speedTest + branchTo gameMode, \ + gameMode_bootScreen, \ + gameMode_waitScreen, \ + gameMode_gameTypeMenu, \ + gameMode_levelMenu, \ + gameMode_playAndEndingHighScore_jmp, \ + gameMode_playAndEndingHighScore_jmp, \ + gameMode_playAndEndingHighScore_jmp, \ + gameMode_speedTest .include "bootscreen.asm" .include "waitscreen.asm" diff --git a/src/gamemode/gametypemenu/linecap.asm b/src/gamemode/gametypemenu/linecap.asm index 11982391..e112f078 100644 --- a/src/gamemode/gametypemenu/linecap.asm +++ b/src/gamemode/gametypemenu/linecap.asm @@ -104,11 +104,10 @@ linecapMenuControls: rts linecapMenuControlsLR: - lda linecapCursorIndex - jsr switch_s_plus_2a - .addr linecapMenuControlsWhen - .addr linecapMenuControlsLinesLevel - .addr linecapMenuControlsHow + branchTo linecapCursorIndex, \ + linecapMenuControlsWhen, \ + linecapMenuControlsLinesLevel, \ + linecapMenuControlsHow linecapMenuControlsWhen: lda newlyPressedButtons_player1 and #BUTTON_LEFT|BUTTON_RIGHT diff --git a/src/gamemode/levelmenu.asm b/src/gamemode/levelmenu.asm index ae3fa08b..21f992d8 100644 --- a/src/gamemode/levelmenu.asm +++ b/src/gamemode/levelmenu.asm @@ -186,13 +186,12 @@ makeNotReady: rts levelControl: - lda levelControlMode - jsr switch_s_plus_2a - .addr levelControlNormal - .addr levelControlCustomLevel - .addr levelControlHearts - .addr levelControlClearHighScores - .addr levelControlClearHighScoresConfirm + branchTo levelControlMode, \ + levelControlNormal, \ + levelControlCustomLevel, \ + levelControlHearts, \ + levelControlClearHighScores, \ + levelControlClearHighScoresConfirm levelControlClearHighScores: lda #$20 diff --git a/src/gamemodestate/branch.asm b/src/gamemodestate/branch.asm index f8453481..8ae60ecd 100644 --- a/src/gamemodestate/branch.asm +++ b/src/gamemodestate/branch.asm @@ -1,17 +1,25 @@ ; the return value of this routine dictates if we should wait for nmi or not right after +; initGameBackground gms: 1 acc: 0 - ne +; initGameState gms: 2 acc: 4/0 - ne +; updateCountersAndNonPlayerState gms: 3 acc: 0/1 - ne +; handleGameOver gms: 4 acc: eq (set to $9) if gameOver, $1 otherwise (ne) +; updatePlayer1 gms: 5 acc: $FF - ne +; next gms: 6 acc: $1 ne +; checkForResetKeyCombo gms: 7 acc: 0 or heldButtons - eq if holding down, left and right +; handlePause gms: 8 acc: 0/3 - ne +; vblankThenRunState2 gms: 2 acc eq (set to $2) branchOnGameModeState: - lda gameModeState - jsr switch_s_plus_2a - .addr gameModeState_initGameBackground ; gms: 1 acc: 0 - ne - .addr gameModeState_initGameState ; gms: 2 acc: 4/0 - ne - .addr gameModeState_updateCountersAndNonPlayerState ; gms: 3 acc: 0/1 - ne - .addr gameModeState_handleGameOver ; gms: 4 acc: eq (set to $9) if gameOver, $1 otherwise (ne) - .addr gameModeState_updatePlayer1 ; gms: 5 acc: $FF - ne - .addr gameModeState_next ; gms: 6 acc: $1 ne - .addr gameModeState_checkForResetKeyCombo ; gms: 7 acc: 0 or heldButtons - eq if holding down, left and right - .addr gameModeState_handlePause ; gms: 8 acc: 0/3 - ne - .addr gameModeState_vblankThenRunState2 ; gms: 2 acc eq (set to $2) + branchTo gameModeState, \ + gameModeState_initGameBackground, \ + gameModeState_initGameState, \ + gameModeState_updateCountersAndNonPlayerState, \ + gameModeState_handleGameOver, \ + gameModeState_updatePlayer1, \ + gameModeState_next, \ + gameModeState_checkForResetKeyCombo, \ + gameModeState_handlePause, \ + gameModeState_vblankThenRunState2 gameModeState_next: ; used to be updatePlayer2 inc gameModeState diff --git a/src/macros.asm b/src/macros.asm new file mode 100644 index 00000000..71ea1b71 --- /dev/null +++ b/src/macros.asm @@ -0,0 +1,28 @@ +.macro _makeRtsTable byte, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 + .if .strat (byte, 0) = '>' + .byte >(a0-1) + .elseif .strat (byte, 0) = '<' + .byte <(a0-1) + .endif + .ifnblank a1 ; recurse until end of argument list + _makeRtsTable byte, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 + .endif +.endmacro + +.macro branchTo dest, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 + ; uses each destination-1 and rts to branch + ; add additional arguments as needed, max a255 + .scope + ldx dest + lda hiBytes,x + pha + lda loBytes,x + pha + rts + + hiBytes: + _makeRtsTable ">", a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 + loBytes: + _makeRtsTable "<", a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 + .endscope +.endmacro diff --git a/src/main.asm b/src/main.asm index 1bebb3e3..443fa554 100644 --- a/src/main.asm +++ b/src/main.asm @@ -5,6 +5,7 @@ ; ; TetrisGYM - A Tetris Practise ROM +.include "macros.asm" .include "charmap.asm" .include "constants.asm" .include "io.asm" @@ -13,6 +14,7 @@ .setcpu "6502" .feature force_range +.linecont .segment "PRG_chunk1": absolute diff --git a/src/modes/garbage.asm b/src/modes/garbage.asm index 9cbaed15..285fe0ad 100644 --- a/src/modes/garbage.asm +++ b/src/modes/garbage.asm @@ -1,11 +1,11 @@ prepareNextGarbage: - lda garbageModifier - jsr switch_s_plus_2a - .addr garbageAlwaysTetrisReady - .addr garbageNormal - .addr garbageSmart - .addr garbageHard - .addr garbageTypeC ; infinite dig + branchTo garbageModifier, \ + garbageAlwaysTetrisReady, \ + garbageNormal, \ + garbageSmart, \ + garbageHard, \ + garbageTypeC + ; garbageTypeC = infinite dig garbageTypeC: jsr findTopBulky diff --git a/src/nmi/render.asm b/src/nmi/render.asm index 3b468942..1858e84d 100644 --- a/src/nmi/render.asm +++ b/src/nmi/render.asm @@ -1,14 +1,13 @@ -render: lda renderMode - jsr switch_s_plus_2a - .addr render_mode_static - .addr render_mode_scroll - .addr render_mode_congratulations_screen - .addr render_mode_play_and_demo - .addr render_mode_pause - .addr render_mode_rocket - .addr render_mode_speed_test - .addr render_mode_level_menu - .addr render_mode_linecap_menu +render: branchTo renderMode, \ + render_mode_static, \ + render_mode_scroll, \ + render_mode_congratulations_screen, \ + render_mode_play_and_demo, \ + render_mode_pause, \ + render_mode_rocket, \ + render_mode_speed_test, \ + render_mode_level_menu, \ + render_mode_linecap_menu .include "render_mode_level_menu.asm" ; no rts / jmp diff --git a/src/playstate/branch.asm b/src/playstate/branch.asm index d5e4b45c..035a6eb2 100644 --- a/src/playstate/branch.asm +++ b/src/playstate/branch.asm @@ -1,18 +1,17 @@ branchOnPlayStatePlayer1: - lda playState - jsr switch_s_plus_2a - .addr playState_unassignOrientationId - .addr playState_playerControlsActiveTetrimino - .addr playState_lockTetrimino - .addr playState_checkForCompletedRows - .addr playState_noop - .addr playState_updateLinesAndStatistics - .addr playState_prepareNext ; used to be bTypeGoalCheck - .addr playState_receiveGarbage - .addr playState_spawnNextTetrimino - .addr playState_noop - .addr playState_checkStartGameOver - .addr playState_incrementPlayState + branchTo playState, \ + playState_unassignOrientationId, \ + playState_playerControlsActiveTetrimino, \ + playState_lockTetrimino, \ + playState_checkForCompletedRows, \ + playState_noop, \ + playState_updateLinesAndStatistics, \ + playState_prepareNext , \ + playState_receiveGarbage, \ + playState_spawnNextTetrimino, \ + playState_noop, \ + playState_checkStartGameOver, \ + playState_incrementPlayState playState_unassignOrientationId: lda #$13 diff --git a/src/ram.asm b/src/ram.asm index f021f206..7ba9824b 100755 --- a/src/ram.asm +++ b/src/ram.asm @@ -5,8 +5,6 @@ tmp3: .res 1 tmpX: .res 1 ; $0003 tmpY: .res 1 ; $0004 tmpZ: .res 1 ; $0005 -switchTmp1 := tmpX ; for switch_s_plus_2a -switchTmp2 := tmpY tmpBulkCopyToPpuReturnAddr: .res 2 ; $0006 ; 2 bytes binScore: .res 4 ; $8 ; 4 bytes binary diff --git a/src/util/core.asm b/src/util/core.asm index 9204e1fd..26d921d4 100755 --- a/src/util/core.asm +++ b/src/util/core.asm @@ -285,19 +285,3 @@ memset_page: inx bne @setByte rts - -switch_s_plus_2a: - asl a - tay - iny - pla - sta switchTmp1 - pla - sta switchTmp2 - lda (switchTmp1),y - tax - iny - lda (switchTmp1),y - sta switchTmp2 - stx switchTmp1 - jmp (switchTmp1) From 530750058e46ceb76483653e65f282312f428043 Mon Sep 17 00:00:00 2001 From: zohassadar Date: Sat, 25 Jan 2025 15:40:03 +0000 Subject: [PATCH 2/3] Restore comment --- src/playstate/branch.asm | 1 + 1 file changed, 1 insertion(+) diff --git a/src/playstate/branch.asm b/src/playstate/branch.asm index 035a6eb2..aa5b982d 100644 --- a/src/playstate/branch.asm +++ b/src/playstate/branch.asm @@ -1,3 +1,4 @@ +; prepareNext used to be bTypeGoalCheck branchOnPlayStatePlayer1: branchTo playState, \ playState_unassignOrientationId, \ From 58315cd11e305f672297d8625bcdbd6b552edbfb Mon Sep 17 00:00:00 2001 From: zohassadar Date: Sun, 26 Jan 2025 21:18:03 +0000 Subject: [PATCH 3/3] branch differently for fewer destinations Throws warning when single destination is defined but optimizes to jmp instruction anyway. For 2 to 4 destinations, uses a decrementing x register, branch instructions and jmp instructions to save a minimum of 7 cycles, up to 18. Rom space is saved for 1-3 destinations. In the case of 4 destinations, it uses an additional 2 bytes compared to the rts method. How each option expands: 1 destination: 3 cycles jmp a1 2 destinations: maximum 11 cycles to a0 ldx dest beq addr0 jmp a1 addr0: jmp a0 3 destinations: maximum 15 cycles to a1 ldx dest beq addr0 dex beq addr1 jmp a2 addr1: jmp a1 addr0: jmp a0 4 destinations: maximum 19 cycles to a2 ldx dest beq addr0 dex beq addr1 dex beq addr2 jmp a3 addr2: jmp a2 addr1: jmp a1 addr0: jmp a0 5+ destinations: 23-26 cycles ldx dest lda hyBytes,x pha lda loBytes,x pha rts hiBytes: .byte >a0,>a1,>a2,>a3,>a4 hiBytes: .byte ", a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 - loBytes: - _makeRtsTable "<", a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 - .endscope + hiBytes: + _makeRtsTable ">", a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 + loBytes: + _makeRtsTable "<", a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 + .endif +.endscope .endmacro