Skip to content

Content Aggregator #201

Content Aggregator

Content Aggregator #201

Workflow file for this run

name: Content Aggregator
on:
workflow_dispatch:
schedule:
- cron: '20 8 * * *'
jobs:
summarize:
runs-on: ubuntu-latest
timeout-minutes: 120 # 2 hour timeout
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install system dependencies
run: |
# TODO:check if all dependencies are indeed needed
sudo apt-get update
sudo apt-get install -y \
python3-dev \
libxml2-dev \
libxslt-dev \
chromium-chromedriver \
jq
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install -e . # Install package with dependencies
- name: Install Playwright browsers
run: |
playwright install chromium
playwright install-deps
- name: Run summarizer
env:
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
GEMINI_MODEL_SUMMARIZE: ${{ secrets.GEMINI_MODEL_SUMMARIZE }}
GEMINI_MODEL_DATE_EXTRACT: ${{ secrets.GEMINI_MODEL_DATE_EXTRACT }}
DEBUG: "false"
ARTICLES_LIMIT: "500"
run: content-aggregator run
- name: Upload output files
uses: actions/upload-artifact@v4
with:
name: summaries
path: |
outputs/*.json
outputs/*.txt
retention-days: 1
- name: Create GitHub Issue
if: success() # Only run if previous steps succeeded
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
SUMMARY_FILE_NAME="summary.md"
./scripts/md_executive_summary.sh $SUMMARY_FILE_NAME
cat $SUMMARY_FILE_NAME
ISSUE_URL=$(gh issue create \
--title "Daily Content Summary $(date +'%Y-%m-%d')" \
--body-file $SUMMARY_FILE_NAME \
--label automated)
ISSUE_NUMBER=$(echo $ISSUE_URL | grep -oP '(?<=issues/)\d+$')
echo "Created issue number: $ISSUE_NUMBER from $ISSUE_URL"
# Add articles processed as comment
ARTICLES_FILE_NAME="articles_processed.md"
./scripts/md_processed_articles.sh $ARTICLES_FILE_NAME
cat $ARTICLES_FILE_NAME
gh issue comment "$ISSUE_NUMBER" --body-file "$ARTICLES_FILE_NAME"