diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..5fdd4e3 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,21 @@ +name: Test actions scripts +on: + workflow_dispatch: + pull_request: + branches: + - main +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + - name: "Run tests" + run: | + python3 -m pip install uv pytest rich python-gnupg requests pytest-unordered + uv run pytest -vv \ No newline at end of file diff --git a/.github/workflows/test-actions.yml b/.github/workflows/test-actions.yml new file mode 100644 index 0000000..ed87a64 --- /dev/null +++ b/.github/workflows/test-actions.yml @@ -0,0 +1,153 @@ +name: Tes gh-svn-pypi-publisher +description: "Publish to PyPI" + +on: + workflow_dispatch: + inputs: + release-config: + description: "Path to the release config file" + required: true + default: "release-config.yml" + temp-dir: + description: > + Checkout directory of svn repo, this is used to checkout the svn repo. + required: false + default: "asf-dist" + mode: + description: "Mode to run the action" + required: false + default: "VERIFY" + if-no-files-found: + description: > + The desired behavior if no files are found using the provided path. + + Available Options: + warn: Output a warning but do not fail the action + error: Fail the action with an error message + ignore: Do not output any warnings or errors, the action does not fail + default: 'warn' + retention-days: + description: > + Duration after which artifact will expire in days. 0 means using default retention. + + Minimum 1 day. + Maximum 90 days unless changed from the repository settings page. + default: '5' + compression-level: + description: > + The level of compression for Zlib to be applied to the artifact archive. + The value can range from 0 to 9: + - 0: No compression + - 1: Best speed + - 6: Default compression (same as GNU Gzip) + - 9: Best compression + Higher levels will result in better compression, but will take longer to complete. + For large files that are not easily compressed, a value of 0 is recommended for significantly faster uploads. + default: '6' + overwrite: + description: > + If true, an artifact with a matching name will be deleted before a new one is uploaded. + If false, the action will fail if an artifact for the given name already exists. + Does not fail if the artifact does not exist. + default: 'false' + + artifact-name: + description: > + The name of the artifact to be uploaded. + required: false + default: "pypi-packages" + + + +jobs: + release-checks: + outputs: + publisher-name: ${{ steps.config-parser.outputs.publisher-name }} + runs-on: ubuntu-latest + steps: + - name: Checkout Code + uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.11" + + - name: "Config parser" + id: config-parser + uses: ./read-config + with: + release-config: ${{ inputs.release-config }} + + - name: "Checkout svn ${{ steps.config-parser.outputs.publisher-url }}" + id: "svn-checkout" + uses: ./init + with: + temp-dir: ${{ inputs.temp-dir }} + repo-url: ${{ steps.config-parser.outputs.publisher-url }} + repo-path: ${{ steps.config-parser.outputs.publisher-path }} + + - name: "Svn check" + id: "svn-check" + uses: ./svn + with: + svn-config: ${{ steps.config-parser.outputs.checks-svn }} + temp-dir: ${{ inputs.temp-dir }} + repo-path: ${{ steps.config-parser.outputs.publisher-path }} + + - name: "Checksum check" + id: "checksum-check" + uses: ./checksum + with: + checksum-config: ${{ steps.config-parser.outputs.checks-checksum }} + temp-dir: ${{ inputs.temp-dir }} + repo-path: ${{ steps.config-parser.outputs.publisher-path }} + + - name: "Signature check" + id: "signature-check" + uses: ./signature + with: + signature-config: ${{ steps.config-parser.outputs.checks-signature }} + temp-dir: ${{ inputs.temp-dir }} + repo-path: ${{ steps.config-parser.outputs.publisher-path }} + + - name: "Find ${{ steps.config-parser.outputs.publisher-name }} packages" + id: "upload-artifacts" + uses: ./artifacts + with: + publish-config: ${{ steps.config-parser.outputs.checks-publish }} + temp-dir: ${{ inputs.temp-dir }} + mode: ${{ inputs.mode }} + publisher-name: ${{ steps.config-parser.outputs.publisher-name }} + repo-path: ${{ steps.config-parser.outputs.publisher-path }} + if-no-files-found: ${{ inputs.if-no-files-found }} + retention-days: ${{ inputs.retention-days }} + compression-level: ${{ inputs.compression-level }} + overwrite: ${{ inputs.overwrite }} + + + publish-to-pypi: + name: Publish svn packages to PyPI + runs-on: ubuntu-latest + needs: + - release-checks + environment: + name: test + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + + steps: + - name: "Download release distributions for ${{ needs.release-checks.outputs.publisher-name }}" + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.artifact-name }} + merge-multiple: true + path: ./dist + + - name: "Publishing ${{ needs.release-checks.outputs.publisher-name }} to PyPI" + uses: pypa/gh-action-pypi-publish@release/v1 + if: inputs.mode == 'RELEASE' + with: + packages-dir: "./dist" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..905938d --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +*./__pycache__/.* \ No newline at end of file diff --git a/README.md b/README.md index a526eff..d5d123e 100644 --- a/README.md +++ b/README.md @@ -1 +1,442 @@ -# gh-svn-pypi-publisher \ No newline at end of file +# gh-svn-pypi-publisher + +**`gh-svn-pypi-publisher`** is a composite action repository used to validate artifacts and publish to PyPI from SVN. + +## Composite Actions Used in This Repo + +## read-config Action +This action reads the release configuration file and writes output to `GITHUB_OUTPUTS`. The configuration file is a YAML file containing rules to validate and config for the publish process. + +### Usage +```yaml +- name: "Config parser" + id: config-parser + uses: ./read-config + with: + release-config: ${{ inputs.release-config }} +``` + +**Example configuration file:** + +```yaml +project: + name: example-project + description: "Example project for publishing to PyPI" +publisher: + name: providers + url: "https://dist.apache.org/repos/dist/dev/airflow" + path: "providers/" +checks: + svn: + - id: extension + description: "Validate svn package extensions" + identifiers: + - type: regex + pattern: ".*(py3-none-any.whl|tar.gz.sha512|tar.gz.asc|tar.gz|py3-none-any.whl.asc|py3-none-any.whl.sha512)$" + + - id: package_name + description: "Validate svn package names" + identifiers: + - type: regex + pattern: ".*(apache_airflow.*)$" + + - type: regex + pattern: ".*(apache-airflow.*)$" + + checksum: + - id: checksum + description: "Validate check sum with SHA512" + algorithm: "sha512" + + signature: + - id: signature + description: "Validate signatures with GPG of packages" + method: gpg + keys: "https://dist.apache.org/repos/dist/release/airflow/KEYS" + + publish: + id: publish + description: "Publish provider packages to PyPI" + release-type: "RC_VERSION" + exclude_extensions: + - type: regex + pattern: ".*(.asc|.sha512)$" + compare: + url: "https://dist.apache.org/repos/dist/release/airflow/" + path: "providers/" + package_names: + - type: regex + pattern: "(apache_airflow_providers.*?)(?=rc)" +``` +#### Publisher +This section contains the publisher details like `name`, `url`, and `path` to identify the repository in SVN. + +- **`name`**: Configure any name for the publisher. A meaningful name is recommended. For example, if you are releasing providers, you can name it `providers`. +- **`url`**: URL of the SVN repository to checkout. +- **`path`**: Path to the directory where the artifacts are stored in the SVN repository. + +**Example**: +If you want to release providers, and the SVN repository structure is as follows: +- `https://dist.apache.org/repos/dist/dev/airflow/providers` +- `https://dist.apache.org/repos/dist/release/airflow/providers` + +To publish the packages from the `dev/providers` folder, set `url` and `path` in the `release-config.yml` as shown below: + +```yaml +url: https://dist.apache.org/repos/dist/dev/airflow +repo-path: providers/ +``` +## INIT Action +This action is used to checkout the SVN repository to a temporary directory in the runner. +It uses the configuration from the `read-config` action to checkout the repository. + +**Inputs to the action**: +- **`temp-dir`**: Temporary directory to checkout the repository. +- **`repo-url`**: URL of the SVN repository to checkout. +- **`repo-path`**: Path to the directory where the artifacts are stored in the SVN repository. + +### Usage +```yaml +- name: "Checkout svn ${{ steps.config-parser.outputs.publisher-url }}" + id: "svn-checkout" + uses: ./init + with: + temp-dir: ${{ inputs.temp-dir }} + repo-url: ${{ steps.config-parser.outputs.publisher-url }} + repo-path: ${{ steps.config-parser.outputs.publisher-path }} +``` + +## SVN Action +Action to validate the file name patterns and extensions of the artifacts in the SVN repository. + +This action uses the `svn` section from the `release-config.yml` to validate the artifacts. An example configuration is shown below. + +```yaml +checks: + svn: + - id: extension + description: "Validate svn package extensions" + identifiers: + - type: regex + pattern: ".*(py3-none-any.whl|tar.gz.sha512|tar.gz.asc|tar.gz|py3-none-any.whl.asc|py3-none-any.whl.sha512)$" + + - id: package_name + description: "Validate svn package names" + identifiers: + - type: regex + pattern: ".*(apache_airflow.*)$" + + - type: regex + pattern: ".*(apache-airflow.*)$" +``` +#### Extension +This rule is used to validate the package extension. +It checks whether each package has the required extension or not. Examples include: + +- `.tar.gz` +- `.tar.gz.asc` +- `.tar.gz.sha512` +- `-py3-none-any.whl` +- `-py3-none-any.whl.asc` +- `-py3-none-any.whl.sha512` + +--- + +#### Package Name +This rule is used to validate the package name. +It checks whether each package name matches the required pattern or not. + +At present, the **SVN Action** supports **only regex type identifiers** to validate the package names and extensions. + +### Usage +```yaml +- name: "Svn check" + id: "svn-check" + uses: ./svn + with: + svn-config: ${{ steps.config-parser.outputs.checks-svn }} + temp-dir: ${{ inputs.temp-dir }} + repo-path: ${{ steps.config-parser.outputs.publisher-path }} +``` + +## Checksum Action +Action to validate the checksum of the artifacts in the SVN repository. + +This action uses the `checksum` section from the `release-config.yml` to validate the artifacts. An example configuration is shown below. + +```yaml +checks: + checksum: + - id: checksum + description: "Validate check sum with SHA512" + algorithm: "sha512" +``` +#### Checksum +This rule is used to validate the checksum of the artifacts. + +It checks the checksum of the artifacts with the provided checksum type. + +Provide the checksum type in the `algorithm` field. eg: you may provide `sha512` or `sha256` as the checksum type. anything that is supported by the `hashlib` module in Python. + +### Usage +```yaml +- name: "Checksum check" + id: "checksum-check" + uses: ./checksum + with: + checksum-config: ${{ steps.config-parser.outputs.checks-checksum }} + temp-dir: ${{ inputs.temp-dir }} + repo-path: ${{ steps.config-parser.outputs.publisher-path }} +``` + +## Signature Action +Action to validate the signature of the artifacts in the SVN repository. + +This action uses the `signature` section from the `release-config.yml` to validate the artifacts. An example configuration is shown below. + +```yaml +checks: + signature: + - id: signature + description: "Validate signatures with GPG of packages" + method: gpg + keys: "https://dist.apache.org/repos/dist/release/airflow/KEYS" +``` +#### Signature +This rule is used to validate the signature of the artifacts. + +It checks the signature of the artifacts with the provided GPG keys file in the `keys` field. + +At present, the **Signature Action** supports **only GPG type identifiers** to validate the signature of the artifacts. + +### Usage +```yaml +- name: "Signature check" + id: "signature-check" + uses: ./signature + with: + signature-config: ${{ steps.config-parser.outputs.checks-signature }} + temp-dir: ${{ inputs.temp-dir }} + repo-path: ${{ steps.config-parser.outputs.publisher-path }} +``` + +### Artifacts Action +Action to publish the artifacts to PyPI. + +This action uses the `publish` section from the `release-config.yml` to publish the artifacts. An example configuration is shown below. + +```yaml +checks: + publish: + id: publish + description: "Publish provider packages to PyPI" + release-type: "RC_VERSION" + exclude_extensions: + - type: regex + pattern: ".*(.asc|.sha512)$" + compare: + url: "https://dist.apache.org/repos/dist/release/airflow/" + path: "providers/" + package_names: + - type: regex + pattern: "(apache_airflow_providers.*?)(?=rc)" +``` +#### Release Configuration +The `release-type` and `compare` sections are part of the validation and publishing configuration. + +##### `release-type` +- **`RC_VERSION`**: + It will consider packages from the `dev/` folder and publish to PyPI. + +- **`PYPI_VERSION`**: + It will consider packages from the `release/` folder and publish to PyPI. + +--- + +#### `compare` +This section contains the release svn folder configuration, +it compares the packages in the `dev/` folder with release folder and only matching packages will be published to PyPI. + +### Usage +```yaml +- name: "Find ${{ steps.config-parser.outputs.publisher-name }} packages" + id: "upload-artifacts" + uses: ./artifacts + with: + publish-config: ${{ steps.config-parser.outputs.checks-publish }} + temp-dir: ${{ inputs.temp-dir }} + mode: ${{ inputs.mode }} + publisher-name: ${{ steps.config-parser.outputs.publisher-name }} + repo-path: ${{ steps.config-parser.outputs.publisher-path }} + if-no-files-found: ${{ inputs.if-no-files-found }} + retention-days: ${{ inputs.retention-days }} + compression-level: ${{ inputs.compression-level }} + overwrite: ${{ inputs.overwrite }} +``` + +## Example Workflow +A sample github workflow file to use the composite actions is shown below: + +```yaml +name: Tes gh-svn-pypi-publisher +description: "Publish to PyPI" + +on: + workflow_dispatch: + inputs: + release-config: + description: "Path to the release config file" + required: true + default: "release-config.yml" + temp-dir: + description: > + Checkout directory of svn repo, this is used to checkout the svn repo. + required: false + default: "asf-dist" + mode: + description: "Mode to run the action" + required: false + default: "VERIFY" + if-no-files-found: + description: > + The desired behavior if no files are found using the provided path. + + Available Options: + warn: Output a warning but do not fail the action + error: Fail the action with an error message + ignore: Do not output any warnings or errors, the action does not fail + default: 'warn' + retention-days: + description: > + Duration after which artifact will expire in days. 0 means using default retention. + + Minimum 1 day. + Maximum 90 days unless changed from the repository settings page. + default: '5' + compression-level: + description: > + The level of compression for Zlib to be applied to the artifact archive. + The value can range from 0 to 9: + - 0: No compression + - 1: Best speed + - 6: Default compression (same as GNU Gzip) + - 9: Best compression + Higher levels will result in better compression, but will take longer to complete. + For large files that are not easily compressed, a value of 0 is recommended for significantly faster uploads. + default: '6' + overwrite: + description: > + If true, an artifact with a matching name will be deleted before a new one is uploaded. + If false, the action will fail if an artifact for the given name already exists. + Does not fail if the artifact does not exist. + default: 'false' + + artifact-name: + description: > + The name of the artifact to be uploaded. + required: false + default: "pypi-packages" + + + +jobs: + release-checks: + outputs: + publisher-name: ${{ steps.config-parser.outputs.publisher-name }} + runs-on: ubuntu-latest + steps: + - name: Checkout Code + uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.11" + + - name: "Config parser" + id: config-parser + uses: ./read-config + with: + release-config: ${{ inputs.release-config }} + + - name: "Checkout svn ${{ steps.config-parser.outputs.publisher-url }}" + id: "svn-checkout" + uses: ./init + with: + temp-dir: ${{ inputs.temp-dir }} + repo-url: ${{ steps.config-parser.outputs.publisher-url }} + repo-path: ${{ steps.config-parser.outputs.publisher-path }} + + - name: "Svn check" + id: "svn-check" + uses: ./svn + with: + svn-config: ${{ steps.config-parser.outputs.checks-svn }} + temp-dir: ${{ inputs.temp-dir }} + repo-path: ${{ steps.config-parser.outputs.publisher-path }} + + - name: "Checksum check" + id: "checksum-check" + uses: ./checksum + with: + checksum-config: ${{ steps.config-parser.outputs.checks-checksum }} + temp-dir: ${{ inputs.temp-dir }} + repo-path: ${{ steps.config-parser.outputs.publisher-path }} + + - name: "Signature check" + id: "signature-check" + uses: ./signature + with: + signature-config: ${{ steps.config-parser.outputs.checks-signature }} + temp-dir: ${{ inputs.temp-dir }} + repo-path: ${{ steps.config-parser.outputs.publisher-path }} + + - name: "Find ${{ steps.config-parser.outputs.publisher-name }} packages" + id: "upload-artifacts" + uses: ./artifacts + with: + publish-config: ${{ steps.config-parser.outputs.checks-publish }} + temp-dir: ${{ inputs.temp-dir }} + mode: ${{ inputs.mode }} + publisher-name: ${{ steps.config-parser.outputs.publisher-name }} + repo-path: ${{ steps.config-parser.outputs.publisher-path }} + if-no-files-found: ${{ inputs.if-no-files-found }} + retention-days: ${{ inputs.retention-days }} + compression-level: ${{ inputs.compression-level }} + overwrite: ${{ inputs.overwrite }} + + + publish-to-pypi: + name: Publish svn packages to PyPI + runs-on: ubuntu-latest + needs: + - release-checks + environment: + name: test + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + + steps: + - name: "Download release distributions for ${{ needs.release-checks.outputs.publisher-name }}" + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.artifact-name }} + merge-multiple: true + path: ./dist + + - name: "Publishing ${{ needs.release-checks.outputs.publisher-name }} to PyPI" + uses: pypa/gh-action-pypi-publish@release/v1 + if: inputs.mode == 'RELEASE' + with: + packages-dir: "./dist" +``` + +The `mode` input is used to run the action in different modes. +- **`VERIFY`**: + It will only validate the artifacts and not publish to PyPI. + +- **`RELEASE`**: + It will validate the artifacts and publish to PyPI. + diff --git a/action.yml b/action.yml deleted file mode 100644 index 7a58fa1..0000000 --- a/action.yml +++ /dev/null @@ -1,11 +0,0 @@ - -name: 'GH SVN PyPI Publisher' -description: 'Publishes artifacts to pypi' - -runs: - using: "composite" - steps: - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: '3.9' diff --git a/artifacts/__init__.py b/artifacts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/artifacts/action.yml b/artifacts/action.yml new file mode 100644 index 0000000..ec731ef --- /dev/null +++ b/artifacts/action.yml @@ -0,0 +1,125 @@ +name: "Upload as artifacts" +description: "Uploads the release distributions as artifacts" + +inputs: + publish-config: + description: > + Json config for publishing packages. it contains all the information required to publish the packages to PyPI. eg: + publish: + id: publish + description: "Publish provider packages to PyPI" + release-type: "RC_VERSION" + exclude_extensions: + - type: regex + pattern: ".*(.asc|.sha512)$" + compare: + url: "https://dist.apache.org/repos/dist/release/airflow/" + path: "providers/" + package_names: + - type: regex + pattern: "(apache_airflow_providers.*?)(?=rc)" + + release-type: RC_VERSION It will consider packages from dev/ folder and publish to PyPI. + release-type: PYPI_VERSION It will consider packages from release/ folder and publish to PyPI. + compare: Section contains the release packages, which is used to compare the packages in dev folder and only matching + packages will be published to PyPI. + required: true + + temp-dir: + description: > + Checkout directory of svn repo, this is used to checkout the svn repo. + required: false + default: "asf-dist" + + repo-path: + description: > + Path to the svn repo. Lets say to publish the packages from the dev folder. + eg: svn repo structure is https://dist.apache.org/repos/dist/ + dev/airflow/providers + release/airflow/providers + now to publish the packages from dev providers folder, set url and path like below in the release-config.yml + url: https://dist.apache.org/repos/dist/dev/airflow + repo-path: providers/ + required: true + + mode: + description: > + Mode to run the action, The default mode is 'VERIFY' which will only verify the packages and displays the what will be published. + to publish the packages to PyPI set the mode to 'RELEASE'. + required: false + default: "VERIFY" + + publisher-name: + description: > + Name of the publisher eg: providers, this is used to name the steps in the workflow + required: true + + if-no-files-found: + description: > + The desired behavior if no files are found using the provided path. + + Available Options: + warn: Output a warning but do not fail the action + error: Fail the action with an error message + ignore: Do not output any warnings or errors, the action does not fail + default: 'warn' + retention-days: + description: > + Duration after which artifact will expire in days. 0 means using default retention. + + Minimum 1 day. + Maximum 90 days unless changed from the repository settings page. + default: '5' + compression-level: + description: > + The level of compression for Zlib to be applied to the artifact archive. + The value can range from 0 to 9: + - 0: No compression + - 1: Best speed + - 6: Default compression (same as GNU Gzip) + - 9: Best compression + Higher levels will result in better compression, but will take longer to complete. + For large files that are not easily compressed, a value of 0 is recommended for significantly faster uploads. + default: '6' + overwrite: + description: > + If true, an artifact with a matching name will be deleted before a new one is uploaded. + If false, the action will fail if an artifact for the given name already exists. + Does not fail if the artifact does not exist. + default: 'false' + + artifact-name: + description: > + Name of the artifact to be uploaded + required: false + default: "pypi-packages" + +runs: + using: "composite" + steps: + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.11" + + - name: "Move release distributions to dist directory" + shell: bash + id: move-release-dists + env: + PUBLISH_PACKAGES_CONFIG: ${{ inputs.publish-config }} + DIST_PATH: "${{ github.workspace }}/${{ inputs.temp-dir }}/dist" + MODE: ${{ inputs.mode }} + run: | + python3 -m pip install uv + uv run $GITHUB_ACTION_PATH/publish_packages_finder.py + working-directory: "./${{ inputs.temp-dir }}/${{ inputs.repo-path }}" + + - name: "Upload ${{ inputs.publisher-name }} to artifacts" + uses: actions/upload-artifact@v4 + with: + name: ${{ inputs.artifact-name }} + path: "${{ github.workspace }}/${{ inputs.temp-dir }}/dist/*" + retention-days: ${{ inputs.retention-days }} + if-no-files-found: ${{ inputs.if-no-files-found }} + compression-level: ${{ inputs.compression-level }} + overwrite: ${{ inputs.overwrite }} \ No newline at end of file diff --git a/artifacts/publish_packages_finder.py b/artifacts/publish_packages_finder.py new file mode 100644 index 0000000..db1ce63 --- /dev/null +++ b/artifacts/publish_packages_finder.py @@ -0,0 +1,297 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "rich", +# ] +# /// + +import json +import os +import re +import subprocess +import sys +import tempfile +from functools import cached_property +from typing import Any + +from rich.console import Console + +console = Console(width=400, color_system="standard") + +# We always work on the path provided in the release config eg: below publisher.path is providers/ so +# the current working directory will be providers/ +# publisher: +# name: providers +# url: https://dist.apache.org/repos/dist/dev/airflow/" +# path: providers/ + + +class PublishPackagesFinder: + final_packages_to_publish: list[str] = [] + matched_packages_between_dev_and_release: list[str] = [] + publish_config = json.loads(os.environ.get("PUBLISH_PACKAGES_CONFIG", "{}")) + temp_svn_dist_release_dir = tempfile.TemporaryDirectory() + + @cached_property + def dev_svn_files(self): + return os.listdir() + + @cached_property + def svn_dist_release_dir(self): + return self.temp_svn_dist_release_dir.name + + @staticmethod + def is_extension_matched(file: str, pattern: str) -> bool: + match = re.match(pattern, file) + return match and file.endswith(match.group(1)) + + @cached_property + def dist_path(self): + # Path where the final packages will be moved and pushed to artifactory + if not os.path.exists(os.environ.get("DIST_PATH")): + os.makedirs(os.environ.get("DIST_PATH")) + return os.environ.get("DIST_PATH") + + @cached_property + def release_type(self): + return self.publish_config.get("release-type") + + @cached_property + def extension_exclude_config(self): + return self.publish_config.get("exclude_extensions") + + @cached_property + def github_workspace(self): + return os.environ.get("GITHUB_WORKSPACE") + + @staticmethod + def extract_package_names( + package_name_config: list[dict[str, Any]], lookup_packages: list[str] + ) -> list[str]: + """ + Extract the package names based on the regex pattern provided in the package_names config + :param package_name_config: + + package_names: + - type: regex + pattern: "(apache_airflow_providers.*?)(?=rc)" + + eg: for a rc package apache-airflow-1.0.0rc1.targ.gz it will extract the package name as "apache-airflow-providers-1.0.0" + :param lookup_packages: List of packages to check for the package names + :return: matched package names + """ + package_names: set[str] = set() + + for package_name_config in package_name_config: + if package_name_config.get("type") == "regex": + regex_pattern = package_name_config.get("pattern") + package_names.update( + match.group(1) + for file in lookup_packages + if (match := re.match(regex_pattern, file)) + ) + + return list(package_names) + + def find_matched_packages_between_dev_and_release( + self, compare_config: dict[str, Any] + ): + """ + Find the matched packages between dev and release folder based on the package names. the comparison works with config provided in compare + section of the release config. + compare: + url: "https://dist.apache.org/repos/dist/release/airflow/" + path: "providers/" + package_names: + - type: regex + pattern: "(apache_airflow_providers.*?)(?=rc)" + + Here the package names are extracted based on the regex pattern provided, here in this case for a rc package apache-airflow-1.0.0rc1.targ.gz + it will extract the package name as "apache-airflow-providers-1.0.0" and compare with the release folder packages. below it used startswith + to compare the package names. if it matches it considers the package to final publish list. + + :param compare_config: Configuration to compare the packages between dev and release folder, likely the dist + :return: None + """ + + # This dev_package_names contains all the packages without rc or based on regex pattern extracted name + # if dev package name is "apache-airflow-1.0.0rc1.targ.gz" and + # extract_package_names function returns package name like "apache-airflow-1.0.0" + # (it depends on regex pattern provided in package_names) + + dev_package_names = self.extract_package_names( + compare_config.get("package_names"), self.dev_svn_files + ) + + if not dev_package_names: + console.print( + f"[red]No package names found in {os.getcwd()} with {compare_config.get('package_names')} [/]" + ) + sys.exit(1) + + inner_path = compare_config.get("path") + path_to_lookup = os.path.join(self.svn_dist_release_dir, inner_path) + + release_folder_packages = os.listdir(path=path_to_lookup) + self.matched_packages_between_dev_and_release = [ + package + for package in release_folder_packages + if any( + package.startswith(package_name) for package_name in dev_package_names + ) + ] + + if not self.matched_packages_between_dev_and_release: + svn_full_path = os.path.join( + self.publish_config.get("compare").get("url"), inner_path + ).strip() + + console.print( + f"[red]No matched packages found between {os.getcwd()} and {svn_full_path}[/]" + ) + sys.exit(1) + + def exclude_packages_to_publish( + self, packages: list[str], exclude_config: list[dict[str, Any]] + ) -> list[str]: + """ + Exclude the packages based on the exclude config + + :param packages: List of packages to exclude + :param exclude_config: Configuration to exclude the final publish packages based on the extension, eg: .asc, .sha512 + :return: list of packages to publish + """ + + exclude_packages: set[str] = set() + for exclude_config in exclude_config: + if exclude_config.get("type") == "regex": + regex_pattern = exclude_config.get("pattern") + [ + exclude_packages.add(package) + for package in packages + if self.is_extension_matched(package, regex_pattern) + ] + if exclude_packages: + console.print("[blue]Following packages excluded: [/]") + console.print(f"[blue]{exclude_packages}[/]") + console.print("\n") + + return list(set(packages) - exclude_packages) + + def filter_rc_packages_to_publish( + self, exclude_extensions_config: list[dict[str, Any]] + ): + """ + Filter the packages to publish based on the release type RC_VERSION, for rc release we directly consider + packages from dev svn folder path provided in the release config + + :param exclude_extensions_config: Configuration to exclude the final publish packages based on the extension, eg: .asc, .sha512 + :return: + """ + packages_to_publish = self.exclude_packages_to_publish( + packages=self.dev_svn_files, exclude_config=exclude_extensions_config + ) + self.final_packages_to_publish.extend(packages_to_publish) + + def move_packages_to_dist_folder(self, packages_path: str): + """ + Move the packages to dist folder + + :param packages_path: location of the packages, where the packages are checked out + :return: + """ + + if not self.final_packages_to_publish: + console.print("[red]No packages found to publish[/]") + sys.exit(1) + + for package_name in self.final_packages_to_publish: + full_path = os.path.join(packages_path, package_name) + subprocess.run(["mv", full_path, self.dist_path], check=True) + + def filter_pypi_version_packages_to_publish( + self, + compare_config: dict[str, Any], + extension_exclude_config: list[dict[str, Any]], + ): + """ + :param compare_config: Configuration to compare the packages between dev and release folder, likely the dist + release svn folder + { + "url": "https://dist.apache.org/repos/dist/release/airflow/", + "path": "providers/", + "package_names": [ + { + "type": "regex", + "pattern": "(apache_airflow_providers.*?)(?=rc)" + } + ] + } + :param extension_exclude_config: Configuration to exclude the final publish packages based on the extension, eg: .asc, .sha512 + :return: None + """ + + self.find_matched_packages_between_dev_and_release(compare_config) + + # self.matched_packages_between_dev_and_release + # package names contains all the packages without + # rc or based on regex pattern extracted name + + self.final_packages_to_publish.extend( + self.exclude_packages_to_publish( + self.matched_packages_between_dev_and_release, extension_exclude_config + ) + ) + + @staticmethod + def checkout_svn_repo(repo_url: str, path_to_checkout: str): + console.print( + f"[blue]Checking out files from {repo_url} to {path_to_checkout}[/]" + ) + subprocess.run(["svn", "co", repo_url, path_to_checkout], check=True) + + def run(self): + try: + if self.release_type == "RC_VERSION": + self.filter_rc_packages_to_publish(self.extension_exclude_config) + + # For RC release we directly move the packages from the provided source path. + # also the current working directory is the source path + self.move_packages_to_dist_folder(os.getcwd()) + + elif self.release_type == "PYPI_VERSION": + compare_config = self.publish_config.get("compare") + repo_url = compare_config.get("url") + self.checkout_svn_repo(repo_url, self.svn_dist_release_dir) + self.filter_pypi_version_packages_to_publish( + compare_config, self.extension_exclude_config + ) + + # For PYPI_VERSION release we move the packages from the release folder to dist folder, + # only matched packages between dev and release folder packages will be moved to dist folder for final publishing + + release_files_path = os.path.join( + self.svn_dist_release_dir, compare_config.get("path") + ) + self.move_packages_to_dist_folder(release_files_path) + else: + console.print(f"[red]Invalid release type {self.release_type}[/]") + sys.exit(1) + + if os.environ.get("MODE") == "VERIFY": + console.print( + "[blue]To publish these packages to PyPI, set the mode=RELEASE in workflow and run[/]" + ) + else: + console.print("[blue]Following packages will be published to PyPI[/]") + + for package in self.final_packages_to_publish: + console.print(f"[blue]{package}[/]") + + except Exception as e: + console.print(f"[red]Error: {e}[/]") + sys.exit(1) + + +if __name__ == "__main__": + PublishPackagesFinder().run() diff --git a/artifacts/test_publish_packages_finder.py b/artifacts/test_publish_packages_finder.py new file mode 100644 index 0000000..434e481 --- /dev/null +++ b/artifacts/test_publish_packages_finder.py @@ -0,0 +1,421 @@ +import os.path +import tempfile + +import pytest +from pytest_unordered import unordered + +from artifacts.publish_packages_finder import PublishPackagesFinder + + +def write_data(files, path): + if not os.path.exists(path): + os.makedirs(path) + for file in files: + with open(os.path.join(path, file), "w") as f: + f.write("test") + + +class TestPublishPackagesFinder: + @pytest.mark.parametrize( + "packages, exclude_config, expected", + [ + pytest.param( + [ + "airflow-provider-1.0.0.tar.gz.asc", + "package3-1.0.0.tar.gz", + "package2-1.0.0.py3-none-any.whl.sha512", + "package4-1.0.0.tar.gz", + ], + [ + { + "type": "regex", + "pattern": r".*(tar.gz.asc|py3-none-any.whl.sha512)$", + }, + ], + [ + "package4-1.0.0.tar.gz", + "package3-1.0.0.tar.gz", + ], + id="exclude_few_package_extensions", + ), + pytest.param( + [ + "airflow-provider-1.0.0.tar.gz.asc", + "package2-1.0.0.py3-none-any.whl.sha512", + ], + [ + { + "type": "regex", + "pattern": r".*(tar.gz.asc|py3-none-any.whl.sha512)$", + }, + ], + [], + id="exclude_all_given_packages", + ), + ], + ) + def test_exclude_packages_to_publish(self, packages, exclude_config, expected): + publish_packages_finder = PublishPackagesFinder() + after_exclude_packages = publish_packages_finder.exclude_packages_to_publish( + packages=packages, exclude_config=exclude_config + ) + assert after_exclude_packages == unordered(expected) + + # + @pytest.mark.parametrize( + "packages, exclude_config, expected", + [ + pytest.param( + [ + "apache_airflow_providers_amazon-9.1.0rc1.tar.gz", + "apache_airflow_providers_amazon-9.1.0rc1.tar.gz.asc", + "apache_airflow_providers_amazon-9.1.0rc1.tar.gz.sha512", + "apache_airflow_providers_amazon-9.1.0rc1-py3-none-any.whl", + "apache_airflow_providers_amazon-9.1.0rc1-py3-none-any.whl.asc", + "apache_airflow_providers_amazon-9.1.0rc1-py3-none-any.whl.sha512", + ], + [ + { + "type": "regex", + "pattern": r".*(.asc|.sha512)$", + }, + ], + [ + "apache_airflow_providers_amazon-9.1.0rc1-py3-none-any.whl", + "apache_airflow_providers_amazon-9.1.0rc1.tar.gz", + ], + id="return_rc_packages", + ), + pytest.param( + [ + "apache_airflow_providers_amazon-9.1.0rc1.tar.gz.asc", + "apache_airflow_providers_amazon-9.1.0rc1.tar.gz.sha512", + ], + [ + { + "type": "regex", + "pattern": r".*(.asc|.sha512)$", + }, + ], + [], + id="no_rc_packages", + ), + ], + ) + def test_filter_rc_packages_to_publish(self, packages, exclude_config, expected): + publish_packages_finder = PublishPackagesFinder() + publish_packages_finder.final_packages_to_publish.clear() + + # Write some files to temporary dev svn folder + temp_dev_svn_folder = tempfile.TemporaryDirectory() + os.chdir(temp_dev_svn_folder.name) + write_data(packages, temp_dev_svn_folder.name) + publish_packages_finder.filter_rc_packages_to_publish( + exclude_extensions_config=exclude_config + ) + + assert publish_packages_finder.final_packages_to_publish == unordered(expected) + + @pytest.mark.parametrize( + "packages, package_name_config, expected", + [ + pytest.param( + [ + "apache_airflow_providers_amazon-9.1.0rc1.tar.gz", + "apache_airflow_providers_amazon-9.1.0rc1.tar.gz.asc", + "apache_airflow_providers_amazon-9.1.0rc1.tar.gz.sha512", + "apache_airflow_providers_amazon-9.1.0rc1-py3-none-any.whl", + "apache_airflow_providers_amazon-9.1.0rc1-py3-none-any.whl.asc", + "apache_airflow_providers_amazon-9.1.0rc1-py3-none-any.whl.sha512", + "apache_airflow_providers_airbyte-10.1.0rc1.tar.gz", + "apache_airflow_providers_airbyte-10.1.0rc1.tar.gz.asc", + "apache_airflow_providers_airbyte-10.1.0rc1.tar.gz.sha512", + "apache_airflow_providers_airbyte-10.1.0rc1-py3-none-any.whl", + "apache_airflow_providers_airbyte-10.1.0rc1-py3-none-any.whl.asc", + "apache_airflow_providers_airbyte-10.1.0rc1-py3-none-any.whl.sha512", + ], + [ + { + "type": "regex", + "pattern": "(apache_airflow_providers.*?)(?=rc)", + }, + ], + [ + "apache_airflow_providers_amazon-9.1.0", + "apache_airflow_providers_airbyte-10.1.0", + ], + id="return_package_name_without_rc", + ), + pytest.param( + [ + "apache-superset-incubating-0.34.0rc2-source.tar.gz", + "apache-superset-incubating-0.34.0rc2-source.tar.gz.asc", + "apache-superset-incubating-0.34.0rc2-source.tar.gz.sha512", + ], + [ + { + "type": "regex", + "pattern": "(apache-superset-incubating.*?)(?=rc)", + }, + ], + [ + "apache-superset-incubating-0.34.0", + ], + id="return_superset_package_name_without_rc", + ), + ], + ) + def test_extract_package_names(self, packages, package_name_config, expected): + publish_packages_finder = PublishPackagesFinder() + extracted_names = publish_packages_finder.extract_package_names( + package_name_config=package_name_config, lookup_packages=packages + ) + assert extracted_names == unordered(expected) + + @pytest.mark.parametrize( + "compare_config, temp_release_dir_files, temp_dev_svn_files, expected", + [ + pytest.param( + { + "url": "https://dist.apache.org/repos/dist/release/airflow/", + "path": "airflow/providers/", + "package_names": [ + { + "type": "regex", + "pattern": "(apache_airflow_providers.*?)(?=rc)", + } + ], + }, + [ + "apache_airflow_providers_amazon-9.1.0.tar.gz", + "apache_airflow_providers_amazon-9.1.0.tar.gz.asc", + "apache_airflow_providers_amazon-9.1.0.tar.gz.sha512", + "apache_airflow_providers_amazon-9.1.0-py3-none-any.whl", + "apache_airflow_providers_amazon-9.1.0-py3-none-any.whl.asc", + "apache_airflow_providers_amazon-9.1.0-py3-none-any.whl.sha512", + "apache_airflow_providers_airbyte-10.1.0.tar.gz", + "apache_airflow_providers_airbyte-10.1.0.tar.gz.asc", + "apache_airflow_providers_airbyte-10.1.0.tar.gz.sha512", + "apache_airflow_providers_airbyte-10.1.0-py3-none-any.whl", + "apache_airflow_providers_airbyte-10.1.0-py3-none-any.whl.asc", + "apache_airflow_providers_airbyte-10.1.0-py3-none-any.whl.sha512", + ], + [ + "apache_airflow_providers_amazon-9.1.0rc1.tar.gz", + "apache_airflow_providers_amazon-9.1.0rc1.tar.gz.asc", + "apache_airflow_providers_amazon-9.1.0rc1.tar.gz.sha512", + "apache_airflow_providers_amazon-9.1.0rc1-py3-none-any.whl", + "apache_airflow_providers_amazon-9.1.0rc1-py3-none-any.whl.asc", + "apache_airflow_providers_amazon-9.1.0rc1-py3-none-any.whl.sha512", + "apache_airflow_providers_airbyte-10.1.0rc1.tar.gz", + "apache_airflow_providers_airbyte-10.1.0rc1.tar.gz.asc", + "apache_airflow_providers_airbyte-10.1.0rc1.tar.gz.sha512", + "apache_airflow_providers_airbyte-10.1.0rc1-py3-none-any.whl", + "apache_airflow_providers_airbyte-10.1.0rc1-py3-none-any.whl.asc", + "apache_airflow_providers_airbyte-10.1.0rc1-py3-none-any.whl.sha512", + ], + [ + "apache_airflow_providers_amazon-9.1.0.tar.gz", + "apache_airflow_providers_amazon-9.1.0.tar.gz.asc", + "apache_airflow_providers_amazon-9.1.0.tar.gz.sha512", + "apache_airflow_providers_amazon-9.1.0-py3-none-any.whl", + "apache_airflow_providers_amazon-9.1.0-py3-none-any.whl.asc", + "apache_airflow_providers_amazon-9.1.0-py3-none-any.whl.sha512", + "apache_airflow_providers_airbyte-10.1.0.tar.gz", + "apache_airflow_providers_airbyte-10.1.0.tar.gz.asc", + "apache_airflow_providers_airbyte-10.1.0.tar.gz.sha512", + "apache_airflow_providers_airbyte-10.1.0-py3-none-any.whl", + "apache_airflow_providers_airbyte-10.1.0-py3-none-any.whl.asc", + "apache_airflow_providers_airbyte-10.1.0-py3-none-any.whl.sha512", + ], + id="find_matched_packages_between_dev_and_release", + ), + pytest.param( + { + "url": "https://dist.apache.org/repos/dist/release/airflow/", + "path": "airflow/providers/", + "package_names": [ + { + "type": "regex", + "pattern": "(apache_airflow_providers.*?)(?=rc)", + } + ], + }, + [ + "apache_airflow_providers_amazon-9.1.0.tar.gz", + "apache_airflow_providers_amazon-9.1.0.tar.gz.asc", + "apache_airflow_providers_amazon-9.1.0.tar.gz.sha512", + "apache_airflow_providers_amazon-9.1.0-py3-none-any.whl", + "apache_airflow_providers_amazon-9.1.0-py3-none-any.whl.asc", + "apache_airflow_providers_amazon-9.1.0-py3-none-any.whl.sha512", + "apache_airflow_providers_airbyte-10.1.0.tar.gz", + "apache_airflow_providers_airbyte-10.1.0.tar.gz.asc", + "apache_airflow_providers_airbyte-10.1.0.tar.gz.sha512", + "apache_airflow_providers_airbyte-10.1.0-py3-none-any.whl", + "apache_airflow_providers_airbyte-10.1.0-py3-none-any.whl.asc", + "apache_airflow_providers_airbyte-10.1.0-py3-none-any.whl.sha512", + ], + [ + "apache_airflow_providers_amazon-9.1.0rc1.tar.gz", + "apache_airflow_providers_amazon-9.1.0rc1.tar.gz.asc", + "apache_airflow_providers_amazon-9.1.0rc1.tar.gz.sha512", + "apache_airflow_providers_amazon-9.1.0rc1-py3-none-any.whl", + "apache_airflow_providers_amazon-9.1.0rc1-py3-none-any.whl.asc", + "apache_airflow_providers_amazon-9.1.0rc1-py3-none-any.whl.sha512", + ], + [ + "apache_airflow_providers_amazon-9.1.0.tar.gz", + "apache_airflow_providers_amazon-9.1.0.tar.gz.asc", + "apache_airflow_providers_amazon-9.1.0.tar.gz.sha512", + "apache_airflow_providers_amazon-9.1.0-py3-none-any.whl", + "apache_airflow_providers_amazon-9.1.0-py3-none-any.whl.asc", + "apache_airflow_providers_amazon-9.1.0-py3-none-any.whl.sha512", + ], + id="find_matched_packages_between_dev_and_release_should_return_one_provider", + ), + ], + ) + def test_find_matched_packages_between_dev_and_release( + self, + compare_config, + temp_release_dir_files, + temp_dev_svn_files, + expected, + ): + publish_packages_finder = PublishPackagesFinder() + + # Write some files to temporary release folder + write_data( + temp_release_dir_files, + os.path.join( + publish_packages_finder.svn_dist_release_dir, compare_config.get("path") + ), + ) + + # Write some files to temporary dev svn folder + temp_dev_svn_folder = tempfile.TemporaryDirectory() + os.chdir(temp_dev_svn_folder.name) + write_data(temp_dev_svn_files, temp_dev_svn_folder.name) + + publish_packages_finder.find_matched_packages_between_dev_and_release( + compare_config + ) + assert ( + publish_packages_finder.matched_packages_between_dev_and_release + == unordered(expected) + ) + + def test_find_matched_packages_between_dev_and_release_when_no_match_should_fail( + self, + ): + publish_packages_finder = PublishPackagesFinder() + files = [ + "apache_airflow_providers_amazon-9.1.0.tar.gz", + "apache_airflow_providers_amazon-9.1.0.tar.gz.asc", + "apache_airflow_providers_amazon-9.1.0.tar.gz.sha512", + ] + write_data(files, publish_packages_finder.svn_dist_release_dir) + + temp_dev_svn_folder = tempfile.TemporaryDirectory() + os.chdir(temp_dev_svn_folder.name) + write_data( + [ + "apache_airflow_providers-airbyte-9.1.0.tar.gz.sha512", + ], + temp_dev_svn_folder.name, + ) + + with pytest.raises(SystemExit): + publish_packages_finder.find_matched_packages_between_dev_and_release( + compare_config={ + "url": "https://someurl/", + "path": "airflow/providers/", + "package_names": [ + { + "type": "regex", + "pattern": "(apache_airflow_providers.*?)(?=rc)", + } + ], + } + ) + + @pytest.mark.parametrize( + "compare_config, temp_release_dir_files, temp_dev_svn_files, expected", + [ + pytest.param( + { + "url": "https://dist.apache.org/repos/dist/release/airflow/", + "path": "airflow/providers/", + "package_names": [ + { + "type": "regex", + "pattern": "(apache_airflow_providers.*?)(?=rc)", + } + ], + }, + [ + "apache_airflow_providers_amazon-9.1.0.tar.gz", + "apache_airflow_providers_amazon-9.1.0.tar.gz.asc", + "apache_airflow_providers_amazon-9.1.0.tar.gz.sha512", + "apache_airflow_providers_amazon-9.1.0-py3-none-any.whl", + "apache_airflow_providers_amazon-9.1.0-py3-none-any.whl.asc", + "apache_airflow_providers_amazon-9.1.0-py3-none-any.whl.sha512", + "apache_airflow_providers_airbyte-10.1.0.tar.gz", + "apache_airflow_providers_airbyte-10.1.0.tar.gz.asc", + "apache_airflow_providers_airbyte-10.1.0.tar.gz.sha512", + "apache_airflow_providers_airbyte-10.1.0-py3-none-any.whl", + "apache_airflow_providers_airbyte-10.1.0-py3-none-any.whl.asc", + "apache_airflow_providers_airbyte-10.1.0-py3-none-any.whl.sha512", + ], + [ + "apache_airflow_providers_amazon-9.1.0rc1.tar.gz", + "apache_airflow_providers_amazon-9.1.0rc1.tar.gz.asc", + "apache_airflow_providers_amazon-9.1.0rc1.tar.gz.sha512", + "apache_airflow_providers_amazon-9.1.0rc1-py3-none-any.whl", + "apache_airflow_providers_amazon-9.1.0rc1-py3-none-any.whl.asc", + "apache_airflow_providers_amazon-9.1.0rc1-py3-none-any.whl.sha512", + "apache_airflow_providers_airbyte-10.1.0rc1.tar.gz", + "apache_airflow_providers_airbyte-10.1.0rc1.tar.gz.asc", + "apache_airflow_providers_airbyte-10.1.0rc1.tar.gz.sha512", + "apache_airflow_providers_airbyte-10.1.0rc1-py3-none-any.whl", + "apache_airflow_providers_airbyte-10.1.0rc1-py3-none-any.whl.asc", + "apache_airflow_providers_airbyte-10.1.0rc1-py3-none-any.whl.sha512", + ], + [ + "apache_airflow_providers_amazon-9.1.0.tar.gz", + "apache_airflow_providers_amazon-9.1.0-py3-none-any.whl", + "apache_airflow_providers_airbyte-10.1.0.tar.gz", + "apache_airflow_providers_airbyte-10.1.0-py3-none-any.whl", + ], + id="find_matched_packages_between_dev_and_release", + ), + ], + ) + def test_filter_pypi_version_packages_to_publish( + self, compare_config, temp_release_dir_files, temp_dev_svn_files, expected + ): + # Test compare the dev and release packages and filter the packages to publish + publish_packages_finder = PublishPackagesFinder() + publish_packages_finder.final_packages_to_publish.clear() + + # Write some files to temporary dev svn folder + temp_dev_svn_folder = tempfile.TemporaryDirectory() + os.chdir(temp_dev_svn_folder.name) + write_data(temp_dev_svn_files, temp_dev_svn_folder.name) + + dist_folder = tempfile.TemporaryDirectory() + os.environ["DIST_PATH"] = dist_folder.name + + # Create temporary release folder files + write_data(temp_release_dir_files, publish_packages_finder.svn_dist_release_dir) + + publish_packages_finder.filter_pypi_version_packages_to_publish( + compare_config=compare_config, + extension_exclude_config=[ + { + "type": "regex", + "pattern": r".*(.asc|.sha512)$", + } + ], + ) + assert publish_packages_finder.final_packages_to_publish == unordered(expected) diff --git a/checksum/__init__.py b/checksum/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/checksum/action.yml b/checksum/action.yml new file mode 100644 index 0000000..f9ce585 --- /dev/null +++ b/checksum/action.yml @@ -0,0 +1,46 @@ +name: "Checksum Validator" +description: "Validate checksum of packages" + +inputs: + checksum-config: + description: > + Json config for checksum, it contains all the information required to validate the checksum of the packages. + It does validation for the checksum of the packages, all the packages should have checksum + files and hashed with sha algorithm. + required: true + + temp-dir: + description: > + Checkout directory of svn repo, this is used to checkout the svn repo. + required: false + default: "asf-dist" + + repo-path: + description: > + Path to the svn repo. Lets say to publish the packages from the dev folder. + eg: svn repo structure is https://dist.apache.org/repos/dist/ + dev/airflow/providers + release/airflow/providers + now to publish the packages from dev providers folder, set url and path like below in the release-config.yml + url: https://dist.apache.org/repos/dist/dev/airflow + repo-path: providers/ + required: true + +runs: + using: "composite" + steps: + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.11" + + - name: "Check sum validation" + shell: bash + id: check-sum + env: + REPO_PATH: ${{ inputs.repo-path }} + CHECK_SUM_CONFIG: ${{ inputs.checksum-config }} + run: | + python3 -m pip install uv + uv run $GITHUB_ACTION_PATH/checksum_check.py + working-directory: "./${{ inputs.temp-dir }}/${{ inputs.repo-path }}" \ No newline at end of file diff --git a/checksum/checksum_check.py b/checksum/checksum_check.py new file mode 100644 index 0000000..59d1693 --- /dev/null +++ b/checksum/checksum_check.py @@ -0,0 +1,90 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "rich", +# ] +# /// +import hashlib +import json +import os +import sys +from typing import Any + +from rich.console import Console + +console = Console(width=400, color_system="standard") + +svn_files = os.listdir() + +invalid_checksums = [] + + +def validate_checksum(check_sum_files: list[dict[str, str]], algorithm: str): + for file_dict in check_sum_files: + sha_file, check_file = file_dict.values() + + with open(check_file, "rb") as chk: + digest = hashlib.file_digest(chk, algorithm) + + actual_sha = digest.hexdigest() + + with open(sha_file, "rb") as shf: + content = shf.read().decode("utf-8").strip() + + expected_sha = content.split()[0] + + if actual_sha != expected_sha: + invalid_checksums.append( + { + "file": sha_file, + "expected_sha": expected_sha, + "actual_sha": actual_sha, + } + ) + + +def get_valid_files(algorithm: str, files: list[str]) -> list[dict[str, str]]: + eligible_files = [] + for file in files: + if file.endswith(algorithm): + eligible_files.append( + { + "sha_file": file, + "check_file": file.replace(algorithm, "").rstrip("."), + } + ) + return eligible_files + + +if __name__ == "__main__": + check_sum_config: list[dict[str, Any]] = json.loads( + os.environ.get("CHECK_SUM_CONFIG") + ) + + if not check_sum_config: + console.print( + "[red]Error: CHECK_SUM_CONFIG not set[/]\n" + "You must set `CHECK_SUM_CONFIG` environment variable to run this script" + ) + sys.exit(1) + + if not svn_files: + console.print( + f"[red]Error: No files found in SVN directory at {os.environ.get('REPO_PATH')}[/]" + ) + sys.exit(1) + + for check in check_sum_config: + console.print(f"[blue]{check.get('description')}[/]") + valid_files = get_valid_files(check.get("algorithm"), svn_files) + validate_checksum(valid_files, check.get("algorithm")) + + if invalid_checksums: + console.print("[red]Checksum validation failed[/]") + for invalid in invalid_checksums: + console.print(f"[red]File: {invalid.get('file')}[/]") + console.print(f"[red]Expected SHA: {invalid.get('expected_sha')}[/]") + console.print(f"[red]Actual SHA: {invalid.get('actual_sha')}[/]") + sys.exit(1) + + console.print("[blue]Checksum validation passed[/]") diff --git a/checksum/test_checksum_check.py b/checksum/test_checksum_check.py new file mode 100644 index 0000000..916033c --- /dev/null +++ b/checksum/test_checksum_check.py @@ -0,0 +1,119 @@ +import os +import tarfile +import tempfile +from unittest.mock import patch + +from checksum.checksum_check import ( + get_valid_files, + invalid_checksums, + validate_checksum, +) + + +def test_get_valid_files_sha512(): + files = [ + "apache-airflow-2.10.3-source.tar.gz.sha512", + "apache_airflow-2.10.3-py3-none-any.whl.asc", + "apache_airflow-2.10.3-py3-none-any.whl.sha512", + "apache_airflow-2.10.3.tar.gz", + ] + valida_files = get_valid_files("sha512", files) + assert valida_files == [ + { + "sha_file": "apache-airflow-2.10.3-source.tar.gz.sha512", + "check_file": "apache-airflow-2.10.3-source.tar.gz", + }, + { + "sha_file": "apache_airflow-2.10.3-py3-none-any.whl.sha512", + "check_file": "apache_airflow-2.10.3-py3-none-any.whl", + }, + ] + + +def test_get_valid_files_with_sha256(): + files = [ + "apache-airflow-2.10.3-source.tar.gz.sha256", + "apache_airflow-2.10.3-py3-none-any.whl.asc", + "apache_airflow-2.10.3-py3-none-any.whl.sha256", + "apache_airflow-2.10.3.tar.gz", + ] + valida_files = get_valid_files("sha256", files) + assert valida_files == [ + { + "sha_file": "apache-airflow-2.10.3-source.tar.gz.sha256", + "check_file": "apache-airflow-2.10.3-source.tar.gz", + }, + { + "sha_file": "apache_airflow-2.10.3-py3-none-any.whl.sha256", + "check_file": "apache_airflow-2.10.3-py3-none-any.whl", + }, + ] + + +@patch("hashlib.file_digest") +def test_validate_checksum(mock_file_digest): + mock_file_digest.return_value.hexdigest.return_value = "bbc759357eb1980e7f80ba0b016e9ed02120e26fcd008129b5777baf8086208c45e170e3c98cf35bd96a246d59484bde3220a897e5e6a7f688a69a40bcd451bd" + + invalid_checksums.clear() + temp_dir = tempfile.TemporaryDirectory() + temp_file = tempfile.NamedTemporaryFile() + os.chdir(temp_dir.name) + + with open(temp_file.name, "wb") as temp_data: + temp_data.write(b"some random data") + + with open( + temp_dir.name + "/apache-airflow-2.10.3-source.tar.gz.sha512", "wb" + ) as temp_file: + temp_file.write( + b"bbc759357eb1980e7f80ba0b016e9ed02120e26fcd008129b5777baf8086208c45e170e3c98cf35bd96a246d59484bde3220a897e5e6a7f688a69a40bcd451bd apache-airflow-2.10.3-source.tar.gz" + ) + tar = tarfile.open(temp_dir.name + "/apache-airflow-2.10.3-source.tar.gz", "w:gz") + tar.add(temp_file.name) + tar.close() + + check_sum_files = [ + { + "sha_file": "apache-airflow-2.10.3-source.tar.gz.sha512", + "check_file": "apache-airflow-2.10.3-source.tar.gz", + } + ] + validate_checksum(check_sum_files, "sha512") + assert not invalid_checksums + + +@patch("hashlib.file_digest") +def test_validate_checksum_invalid(mock_file_digest): + mock_file_digest.return_value.hexdigest.return_value = "bbc759357eb1980e7f80ba0b016e9ed02120e26fcd008129b5777baf8086208c45e170e3c98cf35bd96a246d59484bde3220a897e5e6a7f688a69a40bcd451bd12" + invalid_checksums.clear() + temp_dir = tempfile.TemporaryDirectory() + temp_file = tempfile.NamedTemporaryFile() + os.chdir(temp_dir.name) + + with open(temp_file.name, "wb") as temp_data: + temp_data.write(b"some random data") + + with open( + temp_dir.name + "/apache-airflow-2.10.3-source.tar.gz.sha512", "wb" + ) as temp_file: + temp_file.write( + b"bbc759357eb1980e7f80ba0b016e9ed02120e26fcd008129b5777baf8086208c45e170e3c98cf35bd96a246d59484bde3220a897e5e6a7f688a69a40bcd451bd apache-airflow-2.10.3-source.tar.gz" + ) + tar = tarfile.open(temp_dir.name + "/apache-airflow-2.10.3-source.tar.gz", "w:gz") + tar.add(temp_file.name) + tar.close() + + check_sum_files = [ + { + "sha_file": "apache-airflow-2.10.3-source.tar.gz.sha512", + "check_file": "apache-airflow-2.10.3-source.tar.gz", + } + ] + validate_checksum(check_sum_files, "sha512") + assert invalid_checksums == [ + { + "file": "apache-airflow-2.10.3-source.tar.gz.sha512", + "expected_sha": "bbc759357eb1980e7f80ba0b016e9ed02120e26fcd008129b5777baf8086208c45e170e3c98cf35bd96a246d59484bde3220a897e5e6a7f688a69a40bcd451bd", + "actual_sha": "bbc759357eb1980e7f80ba0b016e9ed02120e26fcd008129b5777baf8086208c45e170e3c98cf35bd96a246d59484bde3220a897e5e6a7f688a69a40bcd451bd12", + } + ] diff --git a/init/action.yml b/init/action.yml new file mode 100644 index 0000000..b9f1550 --- /dev/null +++ b/init/action.yml @@ -0,0 +1,38 @@ +name: "Svn checkout" +description: "Checkout svn repo" +inputs: + temp-dir: + description: > + Checkout directory of svn repo, this is used to checkout the svn repo. + required: false + default: "asf-dist" + + repo-url: + description: > + URL of the svn repo ex: https://dist.apache.org/repos/dist/release/airflow/ + this is used to checkout the svn repo. + required: true + + repo-path: + description: > + Path to the svn repo. Lets say to publish the packages from the dev folder. + eg: svn repo structure is https://dist.apache.org/repos/dist/ + dev/airflow/providers + release/airflow/providers + now to publish the packages from dev providers folder, set url and path like below in the release-config.yml + url: https://dist.apache.org/repos/dist/dev/airflow + repo-path: providers/ + required: true + +runs: + using: "composite" + steps: + - name: "Checkout svn repo ${{ inputs.repo-url }}" + shell: bash + env: + repo_url: ${{ inputs.repo-url }} + run: | + echo "Checking out SVN repo at $repo_url" + svn co $repo_url ./${{ inputs.temp-dir }} + echo "SVN repo checked out" + echo ls -lthr ./${{ inputs.temp-dir }} diff --git a/read-config/action.yml b/read-config/action.yml new file mode 100644 index 0000000..ae448e3 --- /dev/null +++ b/read-config/action.yml @@ -0,0 +1,51 @@ +name: "Config parser" +description: "Parse the release-config file" + +inputs: + release-config: + description: > + Path to the release config file, this is used to parse the release config file. + And outputs all the sections of the release config file. default will be release-config.yml in project root. + required: true + default: "release-config.yml" + +outputs: + publisher-name: + value: ${{ steps.config-parser.outputs.publisher-name }} + description: "Name of the publisher eg: providers" + publisher-url: + value: ${{ steps.config-parser.outputs.publisher-url }} + description: "URL of the svn repo" + publisher-path: + value: ${{ steps.config-parser.outputs.publisher-path }} + description: "Path to the svn repo" + checks-svn: + value: ${{ steps.config-parser.outputs.checks-svn }} + description: "Config for svn checks" + checks-checksum: + value: ${{ steps.config-parser.outputs.checks-checksum }} + description: "Config for checksum checks" + checks-signature: + value: ${{ steps.config-parser.outputs.checks-signature }} + description: "Config for signature checks" + checks-publish: + value: ${{ steps.config-parser.outputs.checks-publish }} + description: "Config for finding eligible packages and publishing packages pypi" + +runs: + using: "composite" + steps: + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: "Config parser" + shell: bash + id: config-parser + env: + RELEASE_CONFIG_FILE: ${{ inputs.release-config }} + RELEASE_CONFIG_SCHEMA: ${{ github.action_path }}/release-config-schema.yml.schema.json + run: | + python3 -m pip install uv + uv run $GITHUB_ACTION_PATH/config_parser.py \ No newline at end of file diff --git a/read-config/config_parser.py b/read-config/config_parser.py new file mode 100644 index 0000000..5bf7b7c --- /dev/null +++ b/read-config/config_parser.py @@ -0,0 +1,100 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "rich", +# "pyyaml", +# "jsonschema", +# ] +# /// +import json +import os +import sys + +import yaml +from jsonschema.validators import validator_for +from rich.console import Console + +console = Console(width=200, color_system="standard") +config_file = os.environ.get("RELEASE_CONFIG_FILE") +schema_path = os.environ.get("RELEASE_CONFIG_SCHEMA") + +if not config_file: + console.print( + "[red]Error: RELEASE_CONFIG_FILE not set[/]\n" + "You must set `RELEASE_CONFIG_FILE` environment variable to run this script" + ) + sys.exit(1) + + +def set_outputs(yml_config): + """ + Set the outputs to GITHUB_OUTPUT + :param yml_config: + :return: None + """ + + with open(os.environ["GITHUB_OUTPUT"], "a") as f: + for root_element, root_values in yml_config.items(): + for key, value in root_values.items(): + if isinstance(value, dict) or isinstance(value, list): + f.write(f"{root_element}-{key}={json.dumps(value)}\n") + else: + f.write(f"{root_element}-{key}={value}\n") + + +def read_file(path) -> dict: + """ + Read the file and return the data + :param path: + :return: + """ + if path.endswith(".yml") or path.endswith(".yaml"): + with open(path) as file: + return yaml.safe_load(file) + + if path.endswith(".json"): + with open(path) as file: + return json.load(file) + + +def validate_config(yml_config): + """ + Validate the release config against the schema + + :param yml_config: + :return: None + """ + exit_code = 0 + + with open(schema_path) as schema_file: + schema = json.loads(schema_file.read()) + + validator = validator_for(schema) + validator.check_schema(schema) + + for error in validator(schema).iter_errors(yml_config): + exit_code = 1 + console.print(f"[red]Error: {error}[/]") + + if exit_code: + console.print("[red]Release config validation failed[/]") + + +if __name__ == "__main__": + yml_config_data = read_file(config_file) + console.print("[blue]Release config validation started[/]") + validate_config(yml_config_data) + console.print("[blue]Release config validation passed[/]") + console.print("[blue]Setting outputs to GITHUB_OUTPUT[/]") + set_outputs(yml_config_data) + console.print("[blue]Completed setting outputs to GITHUB_OUTPUT[/]") + console.print("[blue]Release config validation completed successfully[/]") + console.print("") + console.print("[blue]Starting validations for:[/]") + console.print(f"[blue] Project: {yml_config_data.get('project').get('name')}[/]") + console.print( + f"[blue] Description: {yml_config_data.get('project').get('description')}[/]" + ) + console.print( + f"[blue] Publisher: {yml_config_data.get('publisher').get('name')}[/]" + ) diff --git a/read-config/release-config-schema.yml.schema.json b/read-config/release-config-schema.yml.schema.json new file mode 100644 index 0000000..bf81859 --- /dev/null +++ b/read-config/release-config-schema.yml.schema.json @@ -0,0 +1,214 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "project": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + } + }, + "required": [ + "description", + "name" + ] + }, + "publisher": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "url": { + "type": "string" + }, + "path": { + "type": "string" + } + }, + "required": [ + "name", + "path", + "url" + ] + }, + "checks": { + "type": "object", + "properties": { + "svn": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "description": { + "type": "string" + }, + "identifiers": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string" + }, + "pattern": { + "type": "string" + } + }, + "required": [ + "pattern", + "type" + ] + } + } + }, + "required": [ + "description", + "id", + "identifiers" + ] + } + }, + "checksum": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "description": { + "type": "string" + }, + "algorithm": { + "type": "string" + } + }, + "required": [ + "algorithm", + "description", + "id" + ] + } + }, + "signature": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "description": { + "type": "string" + }, + "method": { + "type": "string" + }, + "keys": { + "type": "string" + } + }, + "required": [ + "description", + "id", + "keys", + "method" + ] + } + }, + "publish": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "description": { + "type": "string" + }, + "release-type": { + "type": "string" + }, + "exclude_extensions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string" + }, + "pattern": { + "type": "string" + } + }, + "required": [ + "pattern", + "type" + ] + } + }, + "compare": { + "type": "object", + "properties": { + "url": { + "type": "string" + }, + "path": { + "type": "string" + }, + "package_names": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string" + }, + "pattern": { + "type": "string" + } + }, + "required": [ + "pattern", + "type" + ] + } + } + }, + "required": [ + "package_names", + "path", + "url" + ] + } + }, + "required": [ + "compare", + "description", + "exclude_extensions", + "id", + "release-type" + ] + } + }, + "required": [ + "checksum", + "publish", + "signature", + "svn" + ] + } + }, + "required": [ + "checks", + "project", + "publisher" + ] +} \ No newline at end of file diff --git a/release-config.yml b/release-config.yml new file mode 100644 index 0000000..6ffecf1 --- /dev/null +++ b/release-config.yml @@ -0,0 +1,48 @@ +project: + name: example-project + description: "Example project for publishing to PyPI" +publisher: + name: providers + url: "https://dist.apache.org/repos/dist/dev/airflow" + path: "providers/" +checks: + svn: + - id: extension + description: "Validate svn package extensions" + identifiers: + - type: regex + pattern: ".*(py3-none-any.whl|tar.gz.sha512|tar.gz.asc|tar.gz|py3-none-any.whl.asc|py3-none-any.whl.sha512)$" + + - id: package_name + description: "Validate svn package names" + identifiers: + - type: regex + pattern: ".*(apache_airflow.*)$" + + - type: regex + pattern: ".*(apache-airflow.*)$" + + checksum: + - id: checksum + description: "Validate check sum with SHA512" + algorithm: "sha512" + + signature: + - id: signature + description: "Validate signatures with GPG of packages" + method: gpg + keys: "https://dist.apache.org/repos/dist/release/airflow/KEYS" + + publish: + id: publish + description: "Publish provider packages to PyPI" + release-type: "RC_VERSION" + exclude_extensions: + - type: regex + pattern: ".*(.asc|.sha512)$" + compare: + url: "https://dist.apache.org/repos/dist/release/airflow/" + path: "providers/" + package_names: + - type: regex + pattern: "(apache_airflow_providers.*?)(?=rc)" diff --git a/signature/__init__.py b/signature/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/signature/action.yml b/signature/action.yml new file mode 100644 index 0000000..4418753 --- /dev/null +++ b/signature/action.yml @@ -0,0 +1,43 @@ +name: 'Signature Validator' +description: 'Validate signature of packages' +inputs: + signature-config: + description: > + Json config for signature validation, it contains all the information required to validate the signature of the packages. + It does validation for the signature of the packages, all the packages should have valid. + required: true + temp-dir: + description: > + Checkout directory of svn repo, this is used to checkout the svn repo. + required: false + default: "asf-dist" + + repo-path: + description: > + Path to the svn repo. Lets say to publish the packages from the dev folder. + eg: svn repo structure is https://dist.apache.org/repos/dist/ + dev/airflow/providers + release/airflow/providers + now to publish the packages from dev providers folder, set url and path like below in the release-config.yml + url: https://dist.apache.org/repos/dist/dev/airflow + repo-path: providers/ + required: true + +runs: + using: "composite" + steps: + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: "Signature validation" + shell: bash + id: signature-check + env: + REPO_PATH: ${{ inputs.repo-path }} + SIGNATURE_CHECK_CONFIG: ${{ inputs.signature-config }} + run: | + python3 -m pip install uv + uv run $GITHUB_ACTION_PATH/signature_check.py + working-directory: "./${{ inputs.temp-dir }}/${{ inputs.repo-path }}" \ No newline at end of file diff --git a/signature/signature_check.py b/signature/signature_check.py new file mode 100644 index 0000000..a30cbb0 --- /dev/null +++ b/signature/signature_check.py @@ -0,0 +1,91 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "rich", +# "requests", +# "python-gnupg", +# ] +# /// +import json +import os +import sys +import tempfile +from typing import Any + +import gnupg +import requests +from rich.console import Console + +console = Console(width=400, color_system="standard") + +svn_files = os.listdir() +temp_signature_key_file_path = tempfile.NamedTemporaryFile().name + +invalid_signature_files = [] + + +def download_keys(key_url: str): + response = requests.get(key_url) + if response.status_code != 200: + console.print( + f"[red]Error: Unable to download signature file from {key_url}: received: {response.status_code}[/]" + ) + sys.exit(1) + + with open(temp_signature_key_file_path, "w") as key_file: + key_file.write(response.text) + + +def validate_signature_with_gpg(signature_check: dict[str, Any]): + key_url = signature_check.get("keys") + + download_keys(key_url) + gpg = gnupg.GPG() + with open(temp_signature_key_file_path, "rb") as key_file: + gpg.import_keys(key_file.read()) + + for file in svn_files: + if file.endswith(".asc"): + with open(file, "rb") as singed_file: + status = gpg.verify_file( + fileobj_or_path=singed_file, data_filename=file.replace(".asc", "") + ) + if not status.valid: + invalid_signature_files.append( + {"file": file, "status": status.valid, "problems": status.problems} + ) + else: + console.print(f"[blue]File {file} signed by {status.username}[/]") + + +if __name__ == "__main__": + signature_check_config: list[dict[str, Any]] = json.loads( + os.environ.get("SIGNATURE_CHECK_CONFIG") + ) + + if not signature_check_config: + console.print( + "[red]Error: SIGNATURE_CHECK_CONFIG not set[/]\n" + "You must set `SIGNATURE_CHECK_CONFIG` environment variable to run this script" + ) + sys.exit(1) + + if not svn_files: + console.print( + f"[red]Error: No files found in SVN directory at {os.environ.get('REPO_PATH')}[/]" + ) + sys.exit(1) + + for check in signature_check_config: + console.print(f"[blue]{check.get('description')}[/]") + if check.get("method") == "gpg": + validate_signature_with_gpg(check) + + if invalid_signature_files: + for error in invalid_signature_files: + console.print( + f"[red]Error: Invalid signature found for {error.get('file')} status: {error.get('status')} problems: {error.get('problems')}[/]" + ) + sys.exit(1) + + console.print("[blue]All signatures are valid[/]") diff --git a/signature/test_signature_check.py b/signature/test_signature_check.py new file mode 100644 index 0000000..e43185b --- /dev/null +++ b/signature/test_signature_check.py @@ -0,0 +1,65 @@ +import tempfile +from unittest.mock import patch + +import gnupg + +from signature.signature_check import ( + invalid_signature_files, + svn_files, + temp_signature_key_file_path, + validate_signature_with_gpg, +) + + +@patch("signature.signature_check.download_keys") +def test_sign_file(mock_download_keys): + mock_download_keys.return_value = None + gpg = gnupg.GPG() + input_data = gpg.gen_key_input( + name_email="test@gmail.com", + passphrase="test", + ) + key = gpg.gen_key(input_data) + public_key = gpg.export_keys(key.fingerprint) + with open(temp_signature_key_file_path, "w") as f: + f.write(public_key) + + sample_file = tempfile.NamedTemporaryFile().name + with open(sample_file, "w") as f: + f.write("Hello World") + sig_file = sample_file + ".asc" + gpg.sign_file( + sample_file, + keyid=key.fingerprint, + passphrase="test", + detach=True, + output=sig_file, + ) + svn_files.extend([sample_file, sig_file]) + validate_signature_with_gpg({"keys": temp_signature_key_file_path}) + assert not invalid_signature_files + + +@patch("signature.signature_check.download_keys") +def test_sign_file_should_fail_when_not_signed(mock_download_keys): + mock_download_keys.return_value = None + gpg = gnupg.GPG() + input_data = gpg.gen_key_input( + name_email="test@gmail.com", + passphrase="test", + ) + key = gpg.gen_key(input_data) + + public_key = gpg.export_keys(key.fingerprint) + with open(temp_signature_key_file_path, "w") as f: + f.write(public_key) + + sample_file = tempfile.NamedTemporaryFile().name + with open(sample_file, "w") as f: + f.write("Hello World") + sig_file = sample_file + ".asc" + with open(sig_file, "wb") as f: + f.write(b"") + svn_files.extend([sample_file, sig_file]) + validate_signature_with_gpg({"keys": temp_signature_key_file_path}) + assert invalid_signature_files diff --git a/svn/__init__.py b/svn/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/svn/action.yml b/svn/action.yml new file mode 100644 index 0000000..79564e3 --- /dev/null +++ b/svn/action.yml @@ -0,0 +1,51 @@ +name: "Svn Validator" +description: "Validate svn files" + +inputs: + svn-config: + description: > + Json config for svn, it contains all the information required to validate the svn files. + It does validation for the svn files, like checking the file extension, package names. + required: true + + temp-dir: + description: > + Checkout directory of svn repo, this is used to checkout the svn repo. + required: false + default: "asf-dist" + + repo-url: + description: > + URL of the svn repo ex: https://dist.apache.org/repos/dist/release/airflow/ + this is used to checkout the svn repo. + required: true + + repo-path: + description: > + Path to the svn repo. Lets say to publish the packages from the dev folder. + eg: svn repo structure is https://dist.apache.org/repos/dist/ + dev/airflow/providers + release/airflow/providers + now to publish the packages from dev providers folder, set url and path like below in the release-config.yml + url: https://dist.apache.org/repos/dist/dev/airflow + repo-path: providers/ + required: true + +runs: + using: "composite" + steps: + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: "SVN validation" + shell: bash + id: svn-check + env: + REPO_PATH: ${{ inputs.repo-path }} + SVN_CHECK_CONFIG: ${{ inputs.svn-config }} + run: | + python3 -m pip install uv + uv run $GITHUB_ACTION_PATH/svn_check.py + working-directory: "./${{ inputs.temp-dir }}/${{ inputs.repo-path }}" \ No newline at end of file diff --git a/svn/svn_check.py b/svn/svn_check.py new file mode 100644 index 0000000..2070147 --- /dev/null +++ b/svn/svn_check.py @@ -0,0 +1,115 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "rich", +# ] +# /// +from __future__ import annotations + +import json +import os +import re +import sys +from typing import Any + +from rich.console import Console + +console = Console(width=400, color_system="standard") + +svn_files = os.listdir() + +unknown_files = [] +unknown_file_extensions = [] + + +def check_with_regex(file_to_check: str, pattern: str, check_type: str) -> bool | None: + """ + Check the file with the regex pattern that matches the file extension or package name + + :param file_to_check: + :param pattern: pattern to match the file eg: ".*(tar.gz)$" + :param check_type: Type of check to perform, eg: extension, package_name + :return: bool | None + + """ + match = re.match(pattern, file_to_check) + + if check_type == "extension": + return match and file_to_check.endswith(match.group(1)) + elif check_type == "package_name": + return match and match.group(1) in file_to_check + + return None + + +def check_files_with_identifiers( + identifiers: list[dict[str, Any]], dist_svn_files: list[str], check_type: str +): + """ + Check the files with the identifiers, an identifier can be a regex pattern to identify the file extension or package name + + :param identifiers: An array of identifiers to use for checking, eg: [{"type": "regex", "pattern": ".*(tar.gz)$"}] + :param dist_svn_files: List of files from the SVN directory + :param check_type: Type of check to perform, eg: extension, package_name + :return: None + """ + + dist_svn_files_copy = dist_svn_files.copy() + + for identifier in identifiers: + if identifier.get("type") == "regex": + regex_pattern = identifier.get("pattern") + [ + dist_svn_files_copy.remove(file) + for file in dist_svn_files + if check_with_regex(file, regex_pattern, check_type) + ] + + if check_type == "extension": + unknown_file_extensions.extend(dist_svn_files_copy) + + elif check_type == "package_name": + unknown_files.extend(dist_svn_files_copy) + + +if __name__ == "__main__": + svn_check_config: list[dict[str, Any]] = json.loads( + os.environ.get("SVN_CHECK_CONFIG") + ) + + if not svn_check_config: + console.print( + "[red]Error: SVN_CHECK_CONFIG not set[/]\n" + "You must set `SVN_CHECK_CONFIG` environment variable to run this script" + ) + sys.exit(1) + + if not svn_files: + console.print( + f"[red]Error: No files found in SVN directory at {os.environ.get('REPO_PATH')}[/]" + ) + sys.exit(1) + + for check in svn_check_config: + console.print(f"[blue]{check.get('description')}[/]") + check_files_with_identifiers( + check.get("identifiers"), svn_files, check.get("id") + ) + + exit_code = 0 + + if unknown_files: + for error in unknown_files: + console.print(f"[red]Error: unknown file found {error}[/]") + exit_code = 1 + + if unknown_file_extensions: + for error in unknown_file_extensions: + console.print(f"[red]Error: unknown file extension found {error}[/]") + exit_code = 1 + + if exit_code != 0: + console.print("[red]SVN check failed[/]") + sys.exit(exit_code) + + console.print("[blue]SVN check passed successfully[/]") diff --git a/svn/test_svn_check.py b/svn/test_svn_check.py new file mode 100644 index 0000000..ec3712a --- /dev/null +++ b/svn/test_svn_check.py @@ -0,0 +1,157 @@ +import pytest + +from svn.svn_check import ( + check_files_with_identifiers, + check_with_regex, + unknown_file_extensions, + unknown_files, +) + + +@pytest.mark.parametrize( + "file, pattern, check_type, expected", + [ + pytest.param( + "apache-airflow-2.10.3-source.tar.gz", + ".*(tar.gz)$", + "extension", + True, + id="valid_extension", + ), + pytest.param( + "apache-airflow-2.10.3-source.tar12.gz", + ".*(tar.gz)$", + "extension", + None, + id="invalid_extension", + ), + pytest.param( + "apache_airflow-2.10.3-source.tar.gz", + ".*(apache_airflow.*)$", + "package_name", + True, + id="valid_package_name", + ), + pytest.param( + "apache-airflow-2.10.3-source.tar.gz", + ".*(apache-airflow.*)$", + "package_name", + True, + id="valid_package_name", + ), + pytest.param( + "apacheairflow-2.10.3-source.tar.gz", + ".*(apache-airflow.*)$", + "package_name", + None, + id="invalid_valid_package_name", + ), + ], +) +def test_check_with_regex_extension_type(file, pattern, check_type, expected): + assert check_with_regex(file, pattern, check_type) == expected + + +def test_check_files_with_identifiers_for_extension(): + unknown_file_extensions.clear() + all_files = [ + "apache-airflow-2.10.3-source.tar.gz", + "apache-airflow-2.10.3-py3-none-any.whl.asc", + "apache-airflow-2.10.3-py3-none-any.whl.sha512", + "apache-airflow-2.10.3.tar.gz", + ] + identifiers = [ + { + "type": "regex", + "pattern": ".*(py3-none-any.whl|tar.gz.sha512|tar.gz.asc|tar.gz|py3-none-any.whl.asc|py3-none-any.whl.sha512)$", + } + ] + check_type = "extension" + check_files_with_identifiers(identifiers, all_files, check_type) + assert not unknown_file_extensions + + +def test_check_files_with_identifiers_for_invalid_extension(): + unknown_file_extensions.clear() + all_files = [ + "apache-airflow-2.10.3-source.tar.gz", + "apache-airflow-2.10.3-py3-none-any.whl.asc123", + "apache-airflow-2.10.3-py3-none-any.whl.sha512", + "apache-airflow-2.10.3.tar.jpeg", + ] + identifiers = [ + { + "type": "regex", + "pattern": ".*(py3-none-any.whl|tar.gz.sha512|tar.gz.asc|tar.gz|py3-none-any.whl.asc|py3-none-any.whl.sha512)$", + } + ] + check_type = "extension" + check_files_with_identifiers(identifiers, all_files, check_type) + assert unknown_file_extensions == [ + "apache-airflow-2.10.3-py3-none-any.whl.asc123", + "apache-airflow-2.10.3.tar.jpeg", + ] + + +def test_check_files_with_identifiers_for_package_name(): + unknown_files.clear() + all_files = [ + "apache-airflow-2.10.3-source.tar.gz", + "apache-airflow-2.10.3-py3-none-any.whl.asc", + "apache-airflow-2.10.3-py3-none-any.whl.sha512", + "apache-airflow-2.10.3.tar.gz", + ] + identifiers = [{"type": "regex", "pattern": ".*(apache-airflow.*)$"}] + check_type = "package_name" + check_files_with_identifiers(identifiers, all_files, check_type) + assert not unknown_files + + +def test_check_files_with_identifiers_for_invalid_package_name(): + unknown_files.clear() + all_files = [ + "apache-airflow-2.10.3-source.tar.gz", + "apache-airflow-2.10.3-py3-none-any.whl.asc", + "apache-airflow-2.10.3-py3-none-any.whl.sha512", + "apacheairflow-2.10.3.tar.gz", + ] + identifiers = [{"type": "regex", "pattern": ".*(apache-airflow.*)$"}] + check_type = "package_name" + check_files_with_identifiers(identifiers, all_files, check_type) + assert unknown_files == ["apacheairflow-2.10.3.tar.gz"] + + +def test_check_files_with_multiple_identifiers_for_package_name(): + unknown_files.clear() + all_files = [ + "apache-airflow-2.10.3-source.tar.gz", + "apache-airflow-2.10.3-py3-none-any.whl.asc", + "apache-airflow-2.10.3-py3-none-any.whl.sha512", + "apache_airflow-2.10.3.tar.gz", + ] + identifiers = [ + {"type": "regex", "pattern": ".*(apache-airflow.*)$"}, + {"type": "regex", "pattern": ".*(apache_airflow.*)$"}, + ] + check_type = "package_name" + check_files_with_identifiers(identifiers, all_files, check_type) + assert not unknown_files + + +def test_check_files_with_multiple_identifiers_for_invalid_package_name(): + unknown_files.clear() + all_files = [ + "apache-airflow-2.10.3-source.tar.gz", + "apache-airflow-2.10.3-py3-none-any.whl.asc", + "apache-airflow-2.10.3-py3-none-any.whl.sha512", + "apache_air-2.10.3.tar.gz", + ] + identifiers = [ + {"type": "regex", "pattern": ".*(apache-airflow.*)$"}, + {"type": "regex", "pattern": ".*(apache_airflow.*)$"}, + ] + check_type = "package_name" + check_files_with_identifiers(identifiers, all_files, check_type) + assert unknown_files == [ + "apache_air-2.10.3.tar.gz", + ]