Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,6 @@ def handle(self, *args, **options):
else:
logger.info("Skipped related URL %s", verified_url)

chapter.leaders_raw = scraper.get_leaders()

chapter.invalid_urls = sorted(invalid_urls)
chapter.related_urls = sorted(related_urls)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def handle(self, *args, **options):
active_committees = Committee.active_committees.order_by("-created_at")
active_committees_count = active_committees.count()
offset = options["offset"]
chapters = []
committees = []
for idx, committee in enumerate(active_committees[offset:]):
prefix = f"{idx + offset + 1} of {active_committees_count}"
print(f"{prefix:<10} {committee.owasp_url}")
Expand Down Expand Up @@ -63,14 +63,12 @@ def handle(self, *args, **options):
else:
logger.info("Skipped related URL %s", verified_url)

committee.leaders_raw = scraper.get_leaders()

committee.invalid_urls = sorted(invalid_urls)
committee.related_urls = sorted(related_urls)

chapters.append(committee)
committees.append(committee)

time.sleep(0.5)

# Bulk save data.
Committee.bulk_save(chapters)
Committee.bulk_save(committees)
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,6 @@ def handle(self, *args, **options):
else:
logger.info("Skipped related URL %s", verified_url)

project.leaders_raw = scraper.get_leaders()

project.invalid_urls = sorted(invalid_urls)
project.related_urls = sorted(related_urls)

Expand Down
48 changes: 45 additions & 3 deletions backend/apps/owasp/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import yaml
from django.db import models
from django.db.models import Sum
from requests.exceptions import RequestException

from apps.common.open_ai import OpenAi
from apps.github.constants import (
Expand Down Expand Up @@ -40,6 +41,9 @@ class Meta:
)
is_active = models.BooleanField(verbose_name="Is active", default=True)

leaders_raw = models.JSONField(
verbose_name="Entity leaders list", default=list, blank=True, null=True
)
tags = models.JSONField(verbose_name="OWASP metadata tags", default=list)
topics = models.JSONField(
verbose_name="GitHub repository topics", default=list, blank=True, null=True
Expand All @@ -62,6 +66,16 @@ def github_url(self):
"""Get GitHub URL."""
return f"https://github.com/owasp/{self.key}"

@property
def leaders_md_raw_url(self):
"""Return entity's raw leaders.md GitHub URL."""
return (
"https://raw.githubusercontent.com/OWASP/"
f"{self.owasp_repository.key}/{self.owasp_repository.default_branch}/leaders.md"
if self.owasp_repository
else None
)

@property
def owasp_name(self):
"""Get OWASP name."""
Expand All @@ -79,6 +93,9 @@ def deactivate(self):

def from_github(self, field_mapping, repository):
"""Update instance based on GitHub repository data."""
# Get leaders.
self.leaders_raw = self.get_leaders()

# Normalize tags.
self.tags = (
[tag.strip(", ") for tag in self.tags.split("," if "," in self.tags else " ")]
Expand Down Expand Up @@ -126,6 +143,34 @@ def get_index_md_raw_url(self, repository=None):
else None
)

def get_leaders(self):
"""Get leaders from leaders.md file on GitHub."""
leaders = []

try:
content = get_repository_file_content(self.leaders_md_raw_url)
except (RequestException, ValueError) as e:
logger.exception(
"Failed to fetch leaders.md file",
extra={"URL": self.leaders_md_raw_url, "error": str(e)},
)
return leaders

if not content:
return leaders

try:
for line in content.split("\n"):
logger.debug("Processing line: %s", line)
# Match both standard Markdown list items with links and variations.
leaders.extend(re.findall(r"\*\s*\[([^\]]+)\](?:\([^)]*\))?", line))
except AttributeError:
logger.exception(
"Unable to parse leaders.md content", extra={"URL": self.leaders_md_raw_url}
)

return sorted(leaders)

def get_top_contributors(self, repositories=()):
"""Get top contributors."""
return [
Expand Down Expand Up @@ -153,9 +198,6 @@ class GenericEntityModel(models.Model):
class Meta:
abstract = True

leaders_raw = models.JSONField(
verbose_name="Entity leaders list", default=list, blank=True, null=True
)
related_urls = models.JSONField(
verbose_name="Entity related URLs", default=list, blank=True, null=True
)
Expand Down
10 changes: 0 additions & 10 deletions backend/apps/owasp/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,6 @@ def get_urls(self, domain=None):
else self.page_tree.xpath("//div[@class='sidebar']//a/@href")
)

def get_leaders(self):
"""Get leaders."""
leaders_header = self.page_tree.xpath("//div[@class='sidebar']//*[@id='leaders']")
if leaders_header:
leaders_ul = leaders_header[0].getnext()
if leaders_ul is not None and leaders_ul.tag == "ul":
return sorted(name.strip() for name in leaders_ul.xpath(".//li/a/text()"))

return []

def verify_url(self, url):
"""Verify URL."""
location = urlparse(url).netloc.lower()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def test_handle(self, mock_bulk_save, command, mock_chapter, offset, chapters):
"https://invalid.com/repo3",
]
mock_scraper.verify_url.side_effect = lambda url: None if "invalid" in url else url
mock_scraper.get_leaders.return_value = "Leaders data"
mock_scraper.page_tree = True

mock_chapter.get_related_url.side_effect = lambda url, **_: url
Expand Down Expand Up @@ -91,4 +90,3 @@ def test_handle(self, mock_bulk_save, command, mock_chapter, offset, chapters):
expected_related_urls = ["https://example.com/repo1", "https://example.com/repo2"]
assert chapter.invalid_urls == sorted(expected_invalid_urls)
assert chapter.related_urls == sorted(expected_related_urls)
assert chapter.leaders_raw == "Leaders data"
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def test_handle(self, mock_bulk_save, command, mock_committee, offset, committee
"https://invalid.com/repo3",
]
mock_scraper.verify_url.side_effect = lambda url: None if "invalid" in url else url
mock_scraper.get_leaders.return_value = "Leaders data"
mock_scraper.page_tree = True

mock_committee.get_related_url.side_effect = lambda url, **_: url
Expand Down Expand Up @@ -91,4 +90,3 @@ def test_handle(self, mock_bulk_save, command, mock_committee, offset, committee
expected_related_urls = ["https://example.com/repo1", "https://example.com/repo2"]
assert committee.invalid_urls == sorted(expected_invalid_urls)
assert committee.related_urls == sorted(expected_related_urls)
assert committee.leaders_raw == "Leaders data"
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ def test_handle(self, mock_github, mock_bulk_save, command, mock_project, offset
"https://invalid.com/repo3",
]
mock_scraper.verify_url.side_effect = lambda url: None if "invalid" in url else url
mock_scraper.get_leaders.return_value = "Leaders data"
mock_scraper.page_tree = True

mock_github_instance = mock.Mock()
Expand Down Expand Up @@ -107,4 +106,3 @@ def test_handle(self, mock_github, mock_bulk_save, command, mock_project, offset
]
assert project.invalid_urls == sorted(expected_invalid_urls)
assert project.related_urls == sorted(expected_related_urls)
assert project.leaders_raw == "Leaders data"
3 changes: 3 additions & 0 deletions backend/tests/owasp/models/chapter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pytest

from apps.github.models.repository import Repository
from apps.github.models.user import User
from apps.owasp.models.chapter import Chapter, Prompt


Expand Down Expand Up @@ -161,6 +162,8 @@ def test_from_github(self):
repository_mock.title = "Nest"
repository_mock.pitch = "Nest Pitch"
repository_mock.tags = ["react", "python"]
repository_mock.leaders = ["Leader1", "Leader2"]
repository_mock.owner = User(name="OWASP")

chapter = Chapter()

Expand Down
3 changes: 3 additions & 0 deletions backend/tests/owasp/models/committee_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from apps.common.index import IndexBase
from apps.github.models.repository import Repository
from apps.github.models.user import User
from apps.owasp.models.committee import Committee


Expand Down Expand Up @@ -70,6 +71,8 @@ def test_from_github(self):
repository_mock.title = "Nest"
repository_mock.pitch = "Nest Pitch"
repository_mock.tags = ["react", "python"]
repository_mock.leaders = ["Leader1, Leader2"]
repository_mock.owner = User(name="OWASP")

committee = Committee()

Expand Down
22 changes: 22 additions & 0 deletions backend/tests/owasp/models/common_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,28 @@ class Meta:


class TestRepositoryBasedEntityModel:
@pytest.mark.parametrize(
("content", "expected_leaders"),
[
("* [Leader1](https://example.com)", ["Leader1"]),
(
"* [Leader1](https://example.com)\n* [Leader2](https://example.com)",
["Leader1", "Leader2"],
),
("", []),
],
)
def test_get_leaders(self, content, expected_leaders):
model = EntityModel()
repository = MagicMock()
repository.name = "test-repo"
model.repository = repository

with patch("apps.owasp.models.common.get_repository_file_content", return_value=content):
leaders = model.get_leaders()

assert leaders == expected_leaders

@pytest.mark.parametrize(
("key", "expected_url"),
[
Expand Down
3 changes: 3 additions & 0 deletions backend/tests/owasp/models/project_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pytest

from apps.github.models.repository import Repository
from apps.github.models.user import User
from apps.owasp.models.project import Project


Expand Down Expand Up @@ -105,6 +106,8 @@ def test_from_github(self):
repository_mock.title = "Nest"
repository_mock.pitch = "Nest Pitch"
repository_mock.tags = "react, python"
repository_mock.leaders = ["Leader1", "Leader2"]
repository_mock.owner = User(name="OWASP")

project = Project()

Expand Down
10 changes: 0 additions & 10 deletions backend/tests/owasp/scraper_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,16 +108,6 @@ def test_initialization_not_found(self, mock_session):

assert scraper.page_tree is None

def test_get_leaders_no_leaders(self, mock_session):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this test needs to be refactored too based on the new logic and the code location.

invalid_html = b"<div class='sidebar'><div id='leaders'></div></div>"
mock_response = Mock()
mock_response.content = invalid_html
mock_session.get.return_value = mock_response

scraper = OwaspScraper("https://test.org")

assert scraper.get_leaders() == []

def test_verify_url_invalid_url(self, mock_session):
response = Mock()
response.status_code = codes.ok
Expand Down
4 changes: 2 additions & 2 deletions frontend/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ test-frontend: \

test-frontend-e2e:
@docker build -f frontend/docker/Dockerfile.e2e.test frontend -t nest-test-frontend-e2e
@docker run --env-file frontend/.env.example nest-test-frontend-e2e npm run test:e2e
@docker run --env-file frontend/.env.example nest-test-frontend-e2e pnpm run test:e2e

test-frontend-unit:
@docker build -f frontend/docker/Dockerfile.unit.test frontend -t nest-test-frontend-unit
@docker run --env-file frontend/.env.example nest-test-frontend-unit npm run test:unit
@docker run --env-file frontend/.env.example nest-test-frontend-unit pnpm run test:unit

update-frontend-dependencies:
@CMD="pnpm update" $(MAKE) exec-frontend-command-it
Loading