Content Aggregator #204
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Content Aggregator | |
on: | |
workflow_dispatch: | |
schedule: | |
- cron: '20 8 * * *' | |
jobs: | |
summarize: | |
runs-on: ubuntu-latest | |
timeout-minutes: 120 # 2 hour timeout | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Python 3.10 | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10' | |
- name: Install system dependencies | |
run: | | |
# TODO:check if all dependencies are indeed needed | |
sudo apt-get update | |
sudo apt-get install -y \ | |
python3-dev \ | |
libxml2-dev \ | |
libxslt-dev \ | |
chromium-chromedriver \ | |
jq | |
- name: Install Python dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install -e . # Install package with dependencies | |
- name: Install Playwright browsers | |
run: | | |
playwright install chromium | |
playwright install-deps | |
- name: Run summarizer | |
env: | |
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} | |
GEMINI_MODEL_SUMMARIZE: ${{ secrets.GEMINI_MODEL_SUMMARIZE }} | |
GEMINI_MODEL_DATE_EXTRACT: ${{ secrets.GEMINI_MODEL_DATE_EXTRACT }} | |
DEBUG: "false" | |
ARTICLES_LIMIT: "500" | |
run: content-aggregator run | |
- name: Upload output files | |
uses: actions/upload-artifact@v4 | |
with: | |
name: summaries | |
path: | | |
outputs/*.json | |
outputs/*.txt | |
retention-days: 1 | |
- name: Create GitHub Issue | |
if: success() # Only run if previous steps succeeded | |
env: | |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
run: | | |
SUMMARY_FILE_NAME="summary.md" | |
./scripts/md_executive_summary.sh $SUMMARY_FILE_NAME | |
cat $SUMMARY_FILE_NAME | |
ISSUE_URL=$(gh issue create \ | |
--title "Daily Content Summary $(date +'%Y-%m-%d')" \ | |
--body-file $SUMMARY_FILE_NAME \ | |
--label automated) | |
ISSUE_NUMBER=$(echo $ISSUE_URL | grep -oP '(?<=issues/)\d+$') | |
echo "Created issue number: $ISSUE_NUMBER from $ISSUE_URL" | |
# Add articles processed as comment | |
ARTICLES_FILE_NAME="articles_processed.md" | |
./scripts/md_processed_articles.sh $ARTICLES_FILE_NAME | |
cat $ARTICLES_FILE_NAME | |
gh issue comment "$ISSUE_NUMBER" --body-file "$ARTICLES_FILE_NAME" | |