Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion cycode/cli/apps/scan/commit_range_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
get_diff_file_content,
get_diff_file_path,
get_pre_commit_modified_documents,
get_safe_head_reference_for_diff,
parse_commit_range_sast,
parse_commit_range_sca,
)
Expand Down Expand Up @@ -271,7 +272,9 @@ def _scan_sca_pre_commit(ctx: typer.Context, repo_path: str) -> None:

def _scan_secret_pre_commit(ctx: typer.Context, repo_path: str) -> None:
progress_bar = ctx.obj['progress_bar']
diff_index = git_proxy.get_repo(repo_path).index.diff(consts.GIT_HEAD_COMMIT_REV, create_patch=True, R=True)
repo = git_proxy.get_repo(repo_path)
head_reference = get_safe_head_reference_for_diff(repo)
diff_index = repo.index.diff(head_reference, create_patch=True, R=True)

progress_bar.set_section_length(ScanProgressBarSection.PREPARE_LOCAL_FILES, len(diff_index))

Expand Down
1 change: 1 addition & 0 deletions cycode/cli/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@
# git consts
COMMIT_DIFF_DELETED_FILE_CHANGE_TYPE = 'D'
GIT_HEAD_COMMIT_REV = 'HEAD'
GIT_EMPTY_TREE_OBJECT = '4b825dc642cb6eb9a060e54bf8d69288fbee4904'
EMPTY_COMMIT_SHA = '0000000000000000000000000000000000000000'
GIT_PUSH_OPTION_COUNT_ENV_VAR_NAME = 'GIT_PUSH_OPTION_COUNT'
GIT_PUSH_OPTION_ENV_VAR_PREFIX = 'GIT_PUSH_OPTION_'
Expand Down
42 changes: 35 additions & 7 deletions cycode/cli/files_collector/commit_range_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,31 @@
logger = get_logger('Commit Range Collector')


def get_safe_head_reference_for_diff(repo: 'Repo') -> str:
"""Get a safe reference to use for diffing against the current HEAD.
In repositories with no commits, HEAD doesn't exist, so we return the empty tree hash.

Args:
repo: Git repository object

Returns:
Either "HEAD" string if commits exist, or empty tree hash if no commits exist
"""
try:
repo.rev_parse(consts.GIT_HEAD_COMMIT_REV)
return consts.GIT_HEAD_COMMIT_REV
except Exception as e: # actually gitdb.exc.BadObject; no import because of lazy loading
logger.debug(
'Repository has no commits, using empty tree hash for diffs, %s',
{'repo_path': repo.working_tree_dir},
exc_info=e,
)

# Repository has no commits, use the universal empty tree hash
# This is the standard Git approach for initial commits
return consts.GIT_EMPTY_TREE_OBJECT


def _does_reach_to_max_commits_to_scan_limit(commit_ids: list[str], max_commits_count: Optional[int]) -> bool:
if max_commits_count is None:
return False
Expand Down Expand Up @@ -213,7 +238,8 @@ def get_pre_commit_modified_documents(
diff_documents = []

repo = git_proxy.get_repo(repo_path)
diff_index = repo.index.diff(consts.GIT_HEAD_COMMIT_REV, create_patch=True, R=True)
head_reference = get_safe_head_reference_for_diff(repo)
diff_index = repo.index.diff(head_reference, create_patch=True, R=True)
progress_bar.set_section_length(progress_bar_section, len(diff_index))
for diff in diff_index:
progress_bar.update(progress_bar_section)
Expand All @@ -228,9 +254,11 @@ def get_pre_commit_modified_documents(
)
)

file_content = _get_file_content_from_commit_diff(repo, consts.GIT_HEAD_COMMIT_REV, diff)
if file_content:
git_head_documents.append(Document(file_path, file_content))
# Only get file content from HEAD if HEAD exists (not the empty tree hash)
if head_reference == consts.GIT_HEAD_COMMIT_REV:
file_content = _get_file_content_from_commit_diff(repo, head_reference, diff)
if file_content:
git_head_documents.append(Document(file_path, file_content))

if os.path.exists(file_path):
file_content = get_file_content(file_path)
Expand Down Expand Up @@ -274,13 +302,13 @@ def parse_commit_range_sast(commit_range: str, path: str) -> tuple[Optional[str]
else:
# Git commands like 'git diff <commit>' compare against HEAD.
from_spec = commit_range
to_spec = 'HEAD'
to_spec = consts.GIT_HEAD_COMMIT_REV

# If a spec is empty (e.g., from '..master'), default it to 'HEAD'
if not from_spec:
from_spec = 'HEAD'
from_spec = consts.GIT_HEAD_COMMIT_REV
if not to_spec:
to_spec = 'HEAD'
to_spec = consts.GIT_HEAD_COMMIT_REV

try:
# Use rev_parse to resolve each specifier to its full commit SHA
Expand Down
130 changes: 130 additions & 0 deletions tests/cli/files_collector/test_commit_range_documents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import os
import tempfile
from collections.abc import Generator
from contextlib import contextmanager

from git import Repo

from cycode.cli import consts
from cycode.cli.files_collector.commit_range_documents import get_safe_head_reference_for_diff


@contextmanager
def git_repository(path: str) -> Generator[Repo, None, None]:
"""Context manager for Git repositories that ensures proper cleanup on Windows."""
repo = Repo.init(path)
try:
yield repo
finally:
# Properly close the repository to release file handles
repo.close()


@contextmanager
def temporary_git_repository() -> Generator[tuple[str, Repo], None, None]:
"""Combined context manager for temporary directory with Git repository."""
with tempfile.TemporaryDirectory() as temp_dir, git_repository(temp_dir) as repo:
yield temp_dir, repo


class TestGetSafeHeadReferenceForDiff:
"""Test the safe HEAD reference functionality for git diff operations."""

def test_returns_head_when_repository_has_commits(self) -> None:
"""Test that HEAD is returned when the repository has existing commits."""
with temporary_git_repository() as (temp_dir, repo):
test_file = os.path.join(temp_dir, 'test.py')
with open(test_file, 'w') as f:
f.write("print('test')")

repo.index.add(['test.py'])
repo.index.commit('Initial commit')

result = get_safe_head_reference_for_diff(repo)
assert result == consts.GIT_HEAD_COMMIT_REV

def test_returns_empty_tree_hash_when_repository_has_no_commits(self) -> None:
"""Test that an empty tree hash is returned when the repository has no commits."""
with temporary_git_repository() as (temp_dir, repo):
result = get_safe_head_reference_for_diff(repo)
expected_empty_tree_hash = consts.GIT_EMPTY_TREE_OBJECT
assert result == expected_empty_tree_hash


class TestIndexDiffWithSafeHeadReference:
"""Test that index.diff works correctly with the safe head reference."""

def test_index_diff_works_on_bare_repository(self) -> None:
"""Test that index.diff works on repositories with no commits."""
with temporary_git_repository() as (temp_dir, repo):
test_file = os.path.join(temp_dir, 'staged_file.py')
with open(test_file, 'w') as f:
f.write("print('staged content')")

repo.index.add(['staged_file.py'])

head_ref = get_safe_head_reference_for_diff(repo)
diff_index = repo.index.diff(head_ref, create_patch=True, R=True)

assert len(diff_index) == 1
diff = diff_index[0]
assert diff.b_path == 'staged_file.py'

def test_index_diff_works_on_repository_with_commits(self) -> None:
"""Test that index.diff continues to work on repositories with existing commits."""
with temporary_git_repository() as (temp_dir, repo):
initial_file = os.path.join(temp_dir, 'initial.py')
with open(initial_file, 'w') as f:
f.write("print('initial')")

repo.index.add(['initial.py'])
repo.index.commit('Initial commit')

new_file = os.path.join(temp_dir, 'new_file.py')
with open(new_file, 'w') as f:
f.write("print('new file')")

with open(initial_file, 'w') as f:
f.write("print('modified initial')")

repo.index.add(['new_file.py', 'initial.py'])

head_ref = get_safe_head_reference_for_diff(repo)
diff_index = repo.index.diff(head_ref, create_patch=True, R=True)

assert len(diff_index) == 2
file_paths = {diff.b_path or diff.a_path for diff in diff_index}
assert 'new_file.py' in file_paths
assert 'initial.py' in file_paths
assert head_ref == consts.GIT_HEAD_COMMIT_REV

def test_sequential_operations_on_same_repository(self) -> None:
"""Test behavior when transitioning from bare to committed repository."""
with temporary_git_repository() as (temp_dir, repo):
test_file = os.path.join(temp_dir, 'test.py')
with open(test_file, 'w') as f:
f.write("print('test')")

repo.index.add(['test.py'])

head_ref_before = get_safe_head_reference_for_diff(repo)
diff_before = repo.index.diff(head_ref_before, create_patch=True, R=True)

expected_empty_tree = consts.GIT_EMPTY_TREE_OBJECT
assert head_ref_before == expected_empty_tree
assert len(diff_before) == 1

repo.index.commit('First commit')

new_file = os.path.join(temp_dir, 'new.py')
with open(new_file, 'w') as f:
f.write("print('new')")

repo.index.add(['new.py'])

head_ref_after = get_safe_head_reference_for_diff(repo)
diff_after = repo.index.diff(head_ref_after, create_patch=True, R=True)

assert head_ref_after == consts.GIT_HEAD_COMMIT_REV
assert len(diff_after) == 1
assert diff_after[0].b_path == 'new.py'
Loading