From 1e6c67dc84c006cd3b4a5ed977f1522f7c22b999 Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Thu, 4 Sep 2025 21:17:56 +0200 Subject: [PATCH 1/2] CM-51935 - Fix pre-commit hook for bare repositories --- cycode/cli/apps/scan/commit_range_scanner.py | 5 +- cycode/cli/consts.py | 1 + .../files_collector/commit_range_documents.py | 42 +++++- .../test_commit_range_documents.py | 120 ++++++++++++++++++ 4 files changed, 160 insertions(+), 8 deletions(-) create mode 100644 tests/cli/files_collector/test_commit_range_documents.py diff --git a/cycode/cli/apps/scan/commit_range_scanner.py b/cycode/cli/apps/scan/commit_range_scanner.py index 5a7893df..390d03a5 100644 --- a/cycode/cli/apps/scan/commit_range_scanner.py +++ b/cycode/cli/apps/scan/commit_range_scanner.py @@ -25,6 +25,7 @@ get_diff_file_content, get_diff_file_path, get_pre_commit_modified_documents, + get_safe_head_reference_for_diff, parse_commit_range_sast, parse_commit_range_sca, ) @@ -271,7 +272,9 @@ def _scan_sca_pre_commit(ctx: typer.Context, repo_path: str) -> None: def _scan_secret_pre_commit(ctx: typer.Context, repo_path: str) -> None: progress_bar = ctx.obj['progress_bar'] - diff_index = git_proxy.get_repo(repo_path).index.diff(consts.GIT_HEAD_COMMIT_REV, create_patch=True, R=True) + repo = git_proxy.get_repo(repo_path) + head_reference = get_safe_head_reference_for_diff(repo) + diff_index = repo.index.diff(head_reference, create_patch=True, R=True) progress_bar.set_section_length(ScanProgressBarSection.PREPARE_LOCAL_FILES, len(diff_index)) diff --git a/cycode/cli/consts.py b/cycode/cli/consts.py index 94903552..19347b1d 100644 --- a/cycode/cli/consts.py +++ b/cycode/cli/consts.py @@ -261,6 +261,7 @@ # git consts COMMIT_DIFF_DELETED_FILE_CHANGE_TYPE = 'D' GIT_HEAD_COMMIT_REV = 'HEAD' +GIT_EMPTY_TREE_OBJECT = '4b825dc642cb6eb9a060e54bf8d69288fbee4904' EMPTY_COMMIT_SHA = '0000000000000000000000000000000000000000' GIT_PUSH_OPTION_COUNT_ENV_VAR_NAME = 'GIT_PUSH_OPTION_COUNT' GIT_PUSH_OPTION_ENV_VAR_PREFIX = 'GIT_PUSH_OPTION_' diff --git a/cycode/cli/files_collector/commit_range_documents.py b/cycode/cli/files_collector/commit_range_documents.py index 0fdcad22..fa7af193 100644 --- a/cycode/cli/files_collector/commit_range_documents.py +++ b/cycode/cli/files_collector/commit_range_documents.py @@ -22,6 +22,31 @@ logger = get_logger('Commit Range Collector') +def get_safe_head_reference_for_diff(repo: 'Repo') -> str: + """Get a safe reference to use for diffing against the current HEAD. + In repositories with no commits, HEAD doesn't exist, so we return the empty tree hash. + + Args: + repo: Git repository object + + Returns: + Either "HEAD" string if commits exist, or empty tree hash if no commits exist + """ + try: + repo.rev_parse(consts.GIT_HEAD_COMMIT_REV) + return consts.GIT_HEAD_COMMIT_REV + except Exception as e: # actually gitdb.exc.BadObject; no import because of lazy loading + logger.debug( + 'Repository has no commits, using empty tree hash for diffs, %s', + {'repo_path': repo.working_tree_dir}, + exc_info=e, + ) + + # Repository has no commits, use the universal empty tree hash + # This is the standard Git approach for initial commits + return consts.GIT_EMPTY_TREE_OBJECT + + def _does_reach_to_max_commits_to_scan_limit(commit_ids: list[str], max_commits_count: Optional[int]) -> bool: if max_commits_count is None: return False @@ -213,7 +238,8 @@ def get_pre_commit_modified_documents( diff_documents = [] repo = git_proxy.get_repo(repo_path) - diff_index = repo.index.diff(consts.GIT_HEAD_COMMIT_REV, create_patch=True, R=True) + head_reference = get_safe_head_reference_for_diff(repo) + diff_index = repo.index.diff(head_reference, create_patch=True, R=True) progress_bar.set_section_length(progress_bar_section, len(diff_index)) for diff in diff_index: progress_bar.update(progress_bar_section) @@ -228,9 +254,11 @@ def get_pre_commit_modified_documents( ) ) - file_content = _get_file_content_from_commit_diff(repo, consts.GIT_HEAD_COMMIT_REV, diff) - if file_content: - git_head_documents.append(Document(file_path, file_content)) + # Only get file content from HEAD if HEAD exists (not the empty tree hash) + if head_reference == consts.GIT_HEAD_COMMIT_REV: + file_content = _get_file_content_from_commit_diff(repo, head_reference, diff) + if file_content: + git_head_documents.append(Document(file_path, file_content)) if os.path.exists(file_path): file_content = get_file_content(file_path) @@ -274,13 +302,13 @@ def parse_commit_range_sast(commit_range: str, path: str) -> tuple[Optional[str] else: # Git commands like 'git diff ' compare against HEAD. from_spec = commit_range - to_spec = 'HEAD' + to_spec = consts.GIT_HEAD_COMMIT_REV # If a spec is empty (e.g., from '..master'), default it to 'HEAD' if not from_spec: - from_spec = 'HEAD' + from_spec = consts.GIT_HEAD_COMMIT_REV if not to_spec: - to_spec = 'HEAD' + to_spec = consts.GIT_HEAD_COMMIT_REV try: # Use rev_parse to resolve each specifier to its full commit SHA diff --git a/tests/cli/files_collector/test_commit_range_documents.py b/tests/cli/files_collector/test_commit_range_documents.py new file mode 100644 index 00000000..a8905529 --- /dev/null +++ b/tests/cli/files_collector/test_commit_range_documents.py @@ -0,0 +1,120 @@ +import os +import tempfile + +from git import Repo + +from cycode.cli import consts +from cycode.cli.files_collector.commit_range_documents import get_safe_head_reference_for_diff + + +class TestGetSafeHeadReferenceForDiff: + """Test the safe HEAD reference functionality for git diff operations.""" + + def test_returns_head_when_repository_has_commits(self) -> None: + """Test that HEAD is returned when the repository has existing commits.""" + with tempfile.TemporaryDirectory() as temp_dir: + repo = Repo.init(temp_dir) + + test_file = os.path.join(temp_dir, 'test.py') + with open(test_file, 'w') as f: + f.write("print('test')") + + repo.index.add(['test.py']) + repo.index.commit('Initial commit') + + result = get_safe_head_reference_for_diff(repo) + assert result == consts.GIT_HEAD_COMMIT_REV + + def test_returns_empty_tree_hash_when_repository_has_no_commits(self) -> None: + """Test that an empty tree hash is returned when the repository has no commits.""" + with tempfile.TemporaryDirectory() as temp_dir: + repo = Repo.init(temp_dir) + + result = get_safe_head_reference_for_diff(repo) + expected_empty_tree_hash = consts.GIT_EMPTY_TREE_OBJECT + assert result == expected_empty_tree_hash + + +class TestIndexDiffWithSafeHeadReference: + """Test that index.diff works correctly with the safe head reference.""" + + def test_index_diff_works_on_bare_repository(self) -> None: + """Test that index.diff works on repositories with no commits.""" + with tempfile.TemporaryDirectory() as temp_dir: + repo = Repo.init(temp_dir) + + test_file = os.path.join(temp_dir, 'staged_file.py') + with open(test_file, 'w') as f: + f.write("print('staged content')") + + repo.index.add(['staged_file.py']) + + head_ref = get_safe_head_reference_for_diff(repo) + diff_index = repo.index.diff(head_ref, create_patch=True, R=True) + + assert len(diff_index) == 1 + diff = diff_index[0] + assert diff.b_path == 'staged_file.py' + + def test_index_diff_works_on_repository_with_commits(self) -> None: + """Test that index.diff continues to work on repositories with existing commits.""" + with tempfile.TemporaryDirectory() as temp_dir: + repo = Repo.init(temp_dir) + + initial_file = os.path.join(temp_dir, 'initial.py') + with open(initial_file, 'w') as f: + f.write("print('initial')") + + repo.index.add(['initial.py']) + repo.index.commit('Initial commit') + + new_file = os.path.join(temp_dir, 'new_file.py') + with open(new_file, 'w') as f: + f.write("print('new file')") + + with open(initial_file, 'w') as f: + f.write("print('modified initial')") + + repo.index.add(['new_file.py', 'initial.py']) + + head_ref = get_safe_head_reference_for_diff(repo) + diff_index = repo.index.diff(head_ref, create_patch=True, R=True) + + assert len(diff_index) == 2 + file_paths = {diff.b_path or diff.a_path for diff in diff_index} + assert 'new_file.py' in file_paths + assert 'initial.py' in file_paths + assert head_ref == consts.GIT_HEAD_COMMIT_REV + + def test_sequential_operations_on_same_repository(self) -> None: + """Test behavior when transitioning from bare to committed repository.""" + with tempfile.TemporaryDirectory() as temp_dir: + repo = Repo.init(temp_dir) + + test_file = os.path.join(temp_dir, 'test.py') + with open(test_file, 'w') as f: + f.write("print('test')") + + repo.index.add(['test.py']) + + head_ref_before = get_safe_head_reference_for_diff(repo) + diff_before = repo.index.diff(head_ref_before, create_patch=True, R=True) + + expected_empty_tree = consts.GIT_EMPTY_TREE_OBJECT + assert head_ref_before == expected_empty_tree + assert len(diff_before) == 1 + + repo.index.commit('First commit') + + new_file = os.path.join(temp_dir, 'new.py') + with open(new_file, 'w') as f: + f.write("print('new')") + + repo.index.add(['new.py']) + + head_ref_after = get_safe_head_reference_for_diff(repo) + diff_after = repo.index.diff(head_ref_after, create_patch=True, R=True) + + assert head_ref_after == consts.GIT_HEAD_COMMIT_REV + assert len(diff_after) == 1 + assert diff_after[0].b_path == 'new.py' From 795919504cb32fa72055fdd94690078f4752f8bc Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Thu, 4 Sep 2025 21:31:25 +0200 Subject: [PATCH 2/2] fix tests in Windows --- .../test_commit_range_documents.py | 40 ++++++++++++------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/tests/cli/files_collector/test_commit_range_documents.py b/tests/cli/files_collector/test_commit_range_documents.py index a8905529..9bf2474f 100644 --- a/tests/cli/files_collector/test_commit_range_documents.py +++ b/tests/cli/files_collector/test_commit_range_documents.py @@ -1,5 +1,7 @@ import os import tempfile +from collections.abc import Generator +from contextlib import contextmanager from git import Repo @@ -7,14 +9,30 @@ from cycode.cli.files_collector.commit_range_documents import get_safe_head_reference_for_diff +@contextmanager +def git_repository(path: str) -> Generator[Repo, None, None]: + """Context manager for Git repositories that ensures proper cleanup on Windows.""" + repo = Repo.init(path) + try: + yield repo + finally: + # Properly close the repository to release file handles + repo.close() + + +@contextmanager +def temporary_git_repository() -> Generator[tuple[str, Repo], None, None]: + """Combined context manager for temporary directory with Git repository.""" + with tempfile.TemporaryDirectory() as temp_dir, git_repository(temp_dir) as repo: + yield temp_dir, repo + + class TestGetSafeHeadReferenceForDiff: """Test the safe HEAD reference functionality for git diff operations.""" def test_returns_head_when_repository_has_commits(self) -> None: """Test that HEAD is returned when the repository has existing commits.""" - with tempfile.TemporaryDirectory() as temp_dir: - repo = Repo.init(temp_dir) - + with temporary_git_repository() as (temp_dir, repo): test_file = os.path.join(temp_dir, 'test.py') with open(test_file, 'w') as f: f.write("print('test')") @@ -27,9 +45,7 @@ def test_returns_head_when_repository_has_commits(self) -> None: def test_returns_empty_tree_hash_when_repository_has_no_commits(self) -> None: """Test that an empty tree hash is returned when the repository has no commits.""" - with tempfile.TemporaryDirectory() as temp_dir: - repo = Repo.init(temp_dir) - + with temporary_git_repository() as (temp_dir, repo): result = get_safe_head_reference_for_diff(repo) expected_empty_tree_hash = consts.GIT_EMPTY_TREE_OBJECT assert result == expected_empty_tree_hash @@ -40,9 +56,7 @@ class TestIndexDiffWithSafeHeadReference: def test_index_diff_works_on_bare_repository(self) -> None: """Test that index.diff works on repositories with no commits.""" - with tempfile.TemporaryDirectory() as temp_dir: - repo = Repo.init(temp_dir) - + with temporary_git_repository() as (temp_dir, repo): test_file = os.path.join(temp_dir, 'staged_file.py') with open(test_file, 'w') as f: f.write("print('staged content')") @@ -58,9 +72,7 @@ def test_index_diff_works_on_bare_repository(self) -> None: def test_index_diff_works_on_repository_with_commits(self) -> None: """Test that index.diff continues to work on repositories with existing commits.""" - with tempfile.TemporaryDirectory() as temp_dir: - repo = Repo.init(temp_dir) - + with temporary_git_repository() as (temp_dir, repo): initial_file = os.path.join(temp_dir, 'initial.py') with open(initial_file, 'w') as f: f.write("print('initial')") @@ -88,9 +100,7 @@ def test_index_diff_works_on_repository_with_commits(self) -> None: def test_sequential_operations_on_same_repository(self) -> None: """Test behavior when transitioning from bare to committed repository.""" - with tempfile.TemporaryDirectory() as temp_dir: - repo = Repo.init(temp_dir) - + with temporary_git_repository() as (temp_dir, repo): test_file = os.path.join(temp_dir, 'test.py') with open(test_file, 'w') as f: f.write("print('test')")