diff --git a/.gitignore b/.gitignore index 1c2d52b..48fd780 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ .idea/* +__pycache__/ diff --git a/MAINTAINERS.md b/MAINTAINERS.md index f3ed4f2..1f854af 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -59,6 +59,160 @@ that are published to crates.io the release process includes publishing a new version. Ask one of the maintainers to give you permissions for the crate on crates.io. +## Inactive CODEOWNERS Policy + +To maintain an accurate view of repository health and ensure an efficient PR +review process, we have implemented a policy for managing inactive CODEOWNERS +(see [issue #187](https://github.com/rust-vmm/community/issues/187) for +details). + +### Inactivity Definition + +A CODEOWNER is considered inactive after **1 year** without any review activity +(comments, approvals, etc.) in a repository. + +### Removal Process + +1. Regular checks (e.g. monthly) will identify CODEOWNERS who haven't been + active in a year +2. An issue will be opened in the community repository tagging all inactive + CODEOWNERS +3. After a 1-month notification period, inactive members will be gracefully + removed from the `CODEOWNERS` file +4. The required number of approvals per PR should be adjusted to align with + the current number of active maintainers + +### Re-adding Removed CODEOWNERS + +Being removed from CODEOWNERS doesn't mean someone isn't wanted as a maintainer +anymore. It's simply to keep track of how many active maintainers we have and +monitor project health. Anyone removed can easily be re-added as a maintainer +whenever they wish to become active again by: +- Pinging the current maintainers, or +- Opening a PR against the `CODEOWNERS` file to re-add themselves + +**Note:** The gatekeeper inactivity policy has been deferred until the monorepo +migration is complete, at which point gatekeepers will be "codeowners for the +repo root" and can be handled by this policy automatically. + +### Checking for Inactive CODEOWNERS + +The `scripts/check_inactive_codeowners.py` script can help with the process of +identifying inactive CODEOWNERS by checking their activity (PR reviews, +PR/issue comments, and optionally commits) in one or more repositories. + +**Requirements:** +- Python 3 with dependencies from `scripts/requirements.txt` (install with `pip install -r scripts/requirements.txt`) +- GitHub personal access token (optional but recommended, set as `GITHUB_TOKEN` environment variable) + +**Basic usage:** +```bash +export GITHUB_TOKEN=your_token_here +python scripts/check_inactive_codeowners.py +``` + +**Common options:** +- `--repos`: Comma-separated repository name(s) to check (if not specified, checks all repos in the organization) +- `--org`: GitHub organization (default: rust-vmm) +- `--days`: Number of days to check for activity (default: 365) +- `--until`: End date for activity check in YYYY-MM-DD format (default: today) +- `--verbose` or `-v`: Enable detailed debug output including API queries and individual activity found +- `--include-commits`: Include commit count in activity check (default: only reviews and comments) + +The script will: +1. Fetch all repositories from the organization (if `--repos` is not specified) +2. Skip archived repositories +3. Fetch the CODEOWNERS file from each repository (trying common locations) +4. Skip repositories with no CODEOWNERS file or empty CODEOWNERS +5. Extract individual GitHub usernames (team references are not supported) +6. Check if users exist on GitHub +7. Check each user's activity over the specified period (PR reviews, PR/issue comments, and optionally commits) +8. Report inactive and non-existent users with a summary + +**Exit codes:** +- `0`: All CODEOWNERS are active +- `1`: There are inactive CODEOWNERS +- `2`: Errors occurred while querying GitHub + +**Notes:** +- The script uses PyGithub to efficiently batch GitHub API queries for checking activity. +- Without a GitHub token, the script will work but slower due to lower + [rate limits](https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api) + (60 requests/hour vs 5000 requests/hour with authentication). +- You can generate a token at https://github.com/settings/tokens + +### Notification Issue Template + +When inactive CODEOWNERS are identified, an issue should be created in the +community repository using this template: + +```markdown +Title: Inactive CODEOWNERS Notification - [Month Year] + +# Inactive CODEOWNERS Notification + +During our regular audit, we identified CODEOWNERS who haven't had any review +activity (comments, approvals, etc.) in their respective repositories for over +1 year. + +The following users are affected: + +@username1 @username2 @username3 ... + +## Next Steps + +According to our [Inactive CODEOWNERS Policy](https://github.com/rust-vmm/community/blob/main/MAINTAINERS.md#inactive-codeowners-policy): + +1. We will wait **1 month** from the date of this issue before proceeding with + any removals +2. If you are still interested in maintaining your repository, please comment on + this issue or resume review activity +3. If you are no longer able to maintain the repository, no action is needed - + we will proceed with the removal after the notification period +4. You can be re-added as a CODEOWNER at any time in the future by pinging + current maintainers or opening a PR + +Being removed from CODEOWNERS doesn't mean you aren't wanted as a maintainer. +It's simply to help us track active maintainers and adjust approval rules +accordingly. + +If you have any questions or concerns, please comment below. + +**Removal Date:** [Date 1 month from issue creation] +``` + +### Removal PR Template + +After the notification period expires, a PR should be created to remove +inactive CODEOWNERS using this template: + +```markdown +Title: Remove inactive CODEOWNERS - [Month Year] + +# Remove Inactive CODEOWNERS + +This PR removes CODEOWNERS who have been inactive for over 1 year and were +notified in https://github.com/rust-vmm/community/issues/[issue-number]. + +## Changes + +- Removed inactive CODEOWNERS: @username1 @username2 @username3 ... +- Adjusted PR approval requirements where needed to match the new number of + active maintainers + +## Notification + +All affected CODEOWNERS were notified on [date] via https://github.com/rust-vmm/community/issues/[issue-number] +and given a 1-month period to respond. + +## Policy Reference + +This removal follows our [Inactive CODEOWNERS Policy](https://github.com/rust-vmm/community/blob/main/MAINTAINERS.md#inactive-codeowners-policy). + +Removed maintainers can be re-added at any time by opening a PR or pinging +current maintainers. +``` + ## The Gatekeeper Role The wider organization is managed by a team diff --git a/scripts/check_inactive_codeowners.py b/scripts/check_inactive_codeowners.py new file mode 100755 index 0000000..32894ed --- /dev/null +++ b/scripts/check_inactive_codeowners.py @@ -0,0 +1,546 @@ +#!/usr/bin/env python3 +""" +Check for inactive codeowners in GitHub repositories. + +This script fetches the CODEOWNERS file from one or more GitHub repositories +and checks if any codeowners have had no activity (reviews, PR comments, +issue comments) over a specified time period (default: past year). + +Dependencies: + pip install -r scripts/requirements.txt + +Usage: + # Check a single repository + python check_inactive_codeowners.py --org rust-vmm --repos vhost + + # Check multiple repositories + python check_inactive_codeowners.py --org rust-vmm --repos vhost,virtio-vsock,vm-memory + + # Check all repositories in an organization + python check_inactive_codeowners.py --org rust-vmm + + # Check with custom time period and verbose output + python check_inactive_codeowners.py --org myorg --repos myrepo --days 180 --verbose + + # Check activity in a specific date range + python check_inactive_codeowners.py --org rust-vmm --days 365 --until 2024-12-31 + +Returns: + 0 if all codeowners are active + 1 if there are inactive codeowners + 2 if there are errors while querying GitHub +""" + +import argparse +import logging +import os +import re +import sys +from datetime import datetime, timedelta, timezone + +try: + from github import Github, Auth, GithubException +except ImportError: + print("Error: PyGithub is not installed. Install it with: pip install PyGithub", file=sys.stderr) + sys.exit(2) + +logger = logging.getLogger(__name__) + + +def parse_codeowners(codeowners_content): + """Parse CODEOWNERS content and extract GitHub usernames. + + Parses the CODEOWNERS file format and extracts individual GitHub user references. + Skips comments, empty lines, and team references (@org/team-name). + + Args: + codeowners_content: String content of a CODEOWNERS file + + Returns: + set: Set of GitHub usernames (without @ prefix) + + Note: + Only individual users are extracted. Team references are currently not supported. + """ + usernames = set() + + for line in codeowners_content.split('\n'): + line = line.strip() + + if not line or line.startswith('#'): + continue + + # Extract @username patterns, skip team references (@org/team-name) + # GitHub usernames can contain alphanumeric and dashes + # Only match if followed by whitespace or end of string (not /) + matches = re.findall(r'@([a-zA-Z0-9-]+)(?=\s|$)', line) + usernames.update(matches) + + return usernames + + +def check_user_exists(gh, username): + """Check if a GitHub user exists. + + Args: + gh: PyGithub Github instance + username: GitHub username to check + + Returns: + bool: True if user exists, False otherwise + + Note: + Uses the GitHub Users API to verify user existence. + """ + try: + gh.get_user(username) + logger.debug("User %s exists", username) + return True + except GithubException as e: + if e.status == 404: + logger.debug("User %s not found (404)", username) + return False + + logger.warning("Unexpected error checking user %s: %s", username, e) + return True # Assume exists to avoid false positives + except Exception as e: + logger.warning("Error checking if user exists: %s", e) + return True # Assume exists on error to avoid false positives + + +def fetch_codeowners_from_github(gh, org, repo, codeowners_path=None): + """Fetch CODEOWNERS file from GitHub repository. + + Tries common CODEOWNERS locations if a specific path is not provided. + + Args: + gh: PyGithub Github instance + org: GitHub organization name + repo: Repository name + codeowners_path: Specific path to CODEOWNERS file (optional) + + Returns: + str or None: Content of the CODEOWNERS file, or None if not found + + Note: + If codeowners_path is not specified, tries these locations in order: + - CODEOWNERS (root) + - .github/CODEOWNERS + - docs/CODEOWNERS + """ + locations = [codeowners_path] if codeowners_path else ['CODEOWNERS', '.github/CODEOWNERS', 'docs/CODEOWNERS'] + + repository = gh.get_repo(f"{org}/{repo}") + + for location in locations: + try: + content = repository.get_contents(location) + if content and hasattr(content, 'decoded_content'): + return content.decoded_content.decode('utf-8') + + logger.debug("CODEOWNERS not found at %s", location) + except Exception as e: + logger.debug("Failed to fetch CODEOWNERS from %s: %s", location, e) + continue + + return None + + +def process_user_activity(items, item_label, usernames, author, since_date, until_date, + user_activity, activity_key, item_number): + """Process reviews or comments and track unique user activity. + + Args: + items: List of review or comment objects + item_label: 'review' or 'comment' for date handling and logging + usernames: Set of usernames to track + author: Author of PR/issue (skip self-activity) + since_date: Start of date range + until_date: End of date range + user_activity: Dictionary to update + activity_key: Key to increment in user_activity + item_number: PR/issue number for debug logging + """ + users_counted = set() + + for item in items: + if not item.user: + continue + user_login = item.user.login + + # Skip if: not a tracked codeowner, activity on own PR/issue, or already counted + if user_login not in usernames or user_login == author or user_login in users_counted: + continue + + if item_label == 'review': + item_date = item.submitted_at.replace(tzinfo=timezone.utc) if item.submitted_at else None + if not item_date: + continue + else: + item_date = item.created_at.replace(tzinfo=timezone.utc) + + if item_date < since_date or item_date > until_date: + continue + + users_counted.add(user_login) + user_activity[user_login][activity_key] += 1 + logger.debug("Found %s by %s on #%s", item_label, user_login, item_number) + + +def process_items_for_activity(gh, query, usernames, since_date, until_date, + user_activity, item_type): + """Process PRs or issues to track user activity. + + Counts the number of unique PRs/issues where each user participated + (via reviews or comments). Each user is counted at most once per + PR/issue. + + Args: + gh: PyGithub Github instance + query: GitHub search query string + usernames: Set of usernames to check for activity + since_date: Timezone-aware datetime to check activity from + until_date: Timezone-aware datetime to check activity until + user_activity: Dictionary to update with activity counts + item_type: Either 'pr' or 'issue' for logging and progress messages + """ + activity_key = f'{item_type}_commented' + item_label = item_type.upper() + + try: + items = gh.search_issues(query) + total_count = items.totalCount + + for idx, item in enumerate(items, 1): + print(f" Searching for {item_label} activity... {idx}/{total_count}", end='\r', flush=True) + + try: + author = item.user.login if item.user else None + + # Handle PR reviews + if item_type == 'pr': + reviews = list(item.as_pull_request().get_reviews()) + process_user_activity(reviews, 'review', usernames, author, since_date, until_date, + user_activity, 'pr_reviewed', item.number) + + # Handle PR/Issue comments + comments = list(item.get_comments()) + process_user_activity(comments, 'comment', usernames, author, since_date, until_date, + user_activity, activity_key, item.number) + except Exception as e: + logger.warning("Could not fetch data for %s #%s: %s", item_label, item.number, e) + + print() # Clear the progress line + except Exception as e: + logger.warning("Batch %s activity search failed: %s", item_label, e) + + +def check_repository(gh, org, repo, since_date, until_date, include_commits=False): + """Check a single repository for inactive codeowners. + + Uses batched GitHub API queries to efficiently check all codeowners + for activity (PR reviews, comments, and optionally commits). + + Args: + gh: PyGithub Github instance + org: GitHub organization name + repo: Repository name + since_date: Timezone-aware datetime to check activity from + until_date: Timezone-aware datetime to check activity until + include_commits: If True, include commits in activity check + + Returns: + dict or None: Dictionary with keys 'inactive', 'active', 'nonexistent': + - 'inactive': list of usernames with no activity + - 'active': list of tuples (username, activity_dict) where activity_dict contains: + - 'pr_commented': Number of PRs where user commented + - 'issue_commented': Number of issues where user commented + - 'pr_reviewed': Number of PRs where user left a review + - 'commits': Number of commits (only if include_commits=True) + - 'active': Boolean indicating if user has any activity + - 'nonexistent': list of usernames that don't exist on GitHub + Returns None if no CODEOWNERS file found or no codeowners defined. + """ + repository = gh.get_repo(f"{org}/{repo}") + + if repository.archived: + print(" Repository is archived, skipping") + return None + codeowners_content = fetch_codeowners_from_github(gh, org, repo) + if not codeowners_content: + print(" No CODEOWNERS file found, skipping") + return None + + usernames = parse_codeowners(codeowners_content) + if not usernames: + print(" CODEOWNERS file is empty, skipping") + return None + + print(f"Found {len(usernames)} codeowners, checking if users exist...") + valid_usernames = set() + nonexistent_users = [] + + for username in usernames: + if not check_user_exists(gh, username): + nonexistent_users.append(username) + else: + valid_usernames.add(username) + + nonexistent_users.sort() + + if nonexistent_users: + print(f" Non-existent users: {', '.join(nonexistent_users)}") + if valid_usernames: + print(f" Valid users: {', '.join(sorted(valid_usernames))}") + + if not valid_usernames: + return { + 'inactive': [], + 'active': [], + 'nonexistent': nonexistent_users + } + + user_activity = {username: { + 'pr_commented': 0, + 'issue_commented': 0, + 'pr_reviewed': 0, + } for username in valid_usernames} + + if include_commits: + print("Checking commits...") + for username in valid_usernames: + logger.debug("Checking commits for %s", username) + commits = repository.get_commits(author=username, since=since_date, until=until_date) + count = commits.totalCount + user_activity[username]['commits'] = count + logger.debug("Commits for %s: %s", username, count) + + reviewed_str = ' '.join([f'reviewed-by:{u}' for u in valid_usernames]) + commenter_str = ' '.join([f'commenter:{u}' for u in valid_usernames]) + date_range = f'{since_date.date().isoformat()}..{until_date.date().isoformat()}' + + # Batch search for all PR activity (reviews and comments) + print("Checking PR activity...") + pr_query = f'repo:{org}/{repo} is:pr {reviewed_str} {commenter_str} updated:{date_range}' + logger.debug("PR query: %s", pr_query) + process_items_for_activity(gh, pr_query, valid_usernames, since_date, until_date, + user_activity, 'pr') + + # Batch search for issue comments + print("Checking issue activity...") + issue_query = f'repo:{org}/{repo} is:issue {commenter_str} updated:{date_range}' + logger.debug("Issue query: %s", issue_query) + process_items_for_activity(gh, issue_query, valid_usernames, since_date, until_date, + user_activity, 'issue') + + inactive_users = [] + active_users = [] + + for username in sorted(valid_usernames): + activity = user_activity[username] + activity['active'] = (activity.get('commits', 0) > 0 or activity['pr_commented'] > 0 or + activity['issue_commented'] > 0 or activity['pr_reviewed'] > 0) + + if not activity['active']: + inactive_users.append(username) + else: + active_users.append((username, activity)) + + print("Results:") + if active_users: + print(" Active users:") + for username, activity in active_users: + print(f" {username}: {activity}") + + if inactive_users: + print(" Inactive users:") + for username in inactive_users: + print(f" {username}") + + if nonexistent_users: + print(" Non-existent users:") + for username in nonexistent_users: + print(f" {username}") + + return { + 'inactive': inactive_users, + 'active': active_users, + 'nonexistent': nonexistent_users + } + + +def main(): + """Main entry point for the script. + + Parses command-line arguments, authenticates with GitHub, and checks + one or more repositories for inactive codeowners. Outputs a summary + of findings. + + Returns: + int: Exit code (0 = success, 1 = inactive users found, 2 = errors) + """ + parser = argparse.ArgumentParser( + description='Check for inactive codeowners in GitHub repositories' + ) + parser.add_argument( + '--repos', + type=str, + default=None, + help='Comma-separated repository name(s) to check (if not specified, checks all repos in the organization)' + ) + parser.add_argument( + '--org', + default='rust-vmm', + help='GitHub organization (default: rust-vmm)' + ) + parser.add_argument( + '--days', + type=int, + default=365, + help='Number of days to check for activity (default: 365)' + ) + parser.add_argument( + '--until', + type=str, + default=None, + help='End date for activity check in YYYY-MM-DD format (default: today)' + ) + parser.add_argument( + '--verbose', + '-v', + action='store_true', + help='Enable detailed debug output including API queries and individual activity found' + ) + parser.add_argument( + '--rate-limit', + type=float, + default=0, + help='Delay in seconds between API requests (default: 0). ' + 'PyGithub automatically handles retries and backoff.' + ) + parser.add_argument( + '--include-commits', + action='store_true', + help='Include commit count in activity check (default: only reviews and comments)' + ) + + args = parser.parse_args() + + log_level = logging.DEBUG if args.verbose else logging.WARNING + + logging.basicConfig( + level=log_level, + format='%(levelname)s: %(message)s', + stream=sys.stderr + ) + + token = os.environ.get('GITHUB_TOKEN') + if not token: + logger.warning("GITHUB_TOKEN environment variable not set.") + logger.warning("API requests will be rate-limited. Set GITHUB_TOKEN for higher limits.") + + auth = Auth.Token(token) if token else None + + gh = Github( + auth=auth, + per_page=100, + seconds_between_requests=args.rate_limit + ) + + # Remove PyGithub's handlers to avoid duplicate output and set level + github_logger = logging.getLogger('github') + github_logger.handlers.clear() + github_logger.setLevel(logging.WARNING) + + if args.until: + try: + until_date = datetime.strptime(args.until, '%Y-%m-%d').replace(tzinfo=timezone.utc) + except ValueError: + logger.error("Invalid date format for --until. Use YYYY-MM-DD format.") + return 2 + else: + until_date = datetime.now(timezone.utc) + + since_date = until_date - timedelta(days=args.days) + print(f"Checking activity from {since_date.date()} to {until_date.date()} ({args.days} days)") + + if args.repos: + repos_to_check = [repo.strip() for repo in args.repos.split(',')] + else: + print(f"Fetching all repositories from {args.org}...") + org = gh.get_organization(args.org) + repos_to_check = [repo.name for repo in org.get_repos()] + + repos_to_check.sort() + + print(f"{len(repos_to_check)} repositories to check: {', '.join(repos_to_check)}") + + repos_status = {} + repos_skipped = [] + total_repos = len(repos_to_check) + for idx, repo_name in enumerate(repos_to_check, 1): + print(f"\nChecking {args.org}/{repo_name} ({idx}/{total_repos})...") + try: + status = check_repository(gh, args.org, repo_name, since_date, until_date, args.include_commits) + if status is None: + repos_skipped.append(repo_name) + continue + + repos_status[repo_name] = status + + except Exception as e: + logger.error("Failed to check repository %s: %s", repo_name, e) + repos_skipped.append(repo_name) + continue + + print("\n" + "="*80) + print(f"SUMMARY - Activity from {since_date.date()} to {until_date.date()} ({args.days} days)") + print("="*80 + "\n") + + print(f"Total repositories: {len(repos_to_check)}") + + repos_with_issues = {} + repos_fine = [] + for name, data in repos_status.items(): + if data['inactive'] or data['nonexistent']: + repos_with_issues[name] = data + else: + repos_fine.append(name) + + if repos_fine: + print(f"\nRepositories with all codeowners active ({len(repos_fine)}):") + for repo_name in repos_fine: + print(f" - {repo_name}") + + if repos_skipped: + print(f"\nRepositories skipped ({len(repos_skipped)}):") + for repo_name in repos_skipped: + print(f" - {repo_name}") + + if repos_with_issues: + print(f"\nRepositories with inactive or non-existent codeowners ({len(repos_with_issues)}):") + for repo_name in repos_with_issues: + issues = repos_with_issues[repo_name] + print(f" - {repo_name}:") + if issues['active']: + active_list = ', '.join([f"@{user}" for user, _ in issues['active']]) + print(f" Active: {active_list}") + if issues['inactive']: + inactive_list = ', '.join([f"@{user}" for user in issues['inactive']]) + print(f" Inactive: {inactive_list}") + if issues['nonexistent']: + nonexistent_list = ', '.join([f"@{user}" for user in issues['nonexistent']]) + print(f" Non-existent: {nonexistent_list}") + + if repos_with_issues: + return 1 + + if not repos_status: + print("\nNo repositories were processed!") + return 2 + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 0000000..e1cd731 --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1,16 @@ +# Python dependencies for rust-vmm-community scripts +# Install with: pip install -r requirements.txt + +# check_inactive_codeowners.py dependencies +certifi==2025.10.5 +cffi==2.0.0 +charset-normalizer==3.4.3 +cryptography==46.0.2 +idna==3.11 +pycparser==2.23 +PyGithub==2.8.1 +PyJWT==2.10.1 +PyNaCl==1.6.0 +requests==2.32.5 +typing_extensions==4.15.0 +urllib3==2.5.0