Skip to content

Commit

Permalink
Add tests for Conferences script (#404)
Browse files Browse the repository at this point in the history
* Add tests for parsing Conference Issues

* Refactor top-level logic into testable functions

* Run pre-commit linters

* Only add to conference list if the conference details could actually be parse

* Move gh token env var to function

We don't always need to use the GITHUB_TOKEN during tests, so let's make it an explicit call if we need it

* Add sleep command to playwright tests

This should allow playwright to finish setting up before running tests

* Add delay between each test to make CI tests more consistent

* Linter fixes

* Reduce startup time so tests don't take as long to run

* Reduce delay between tests to 1 second

* Update _conferences/__main__.py

Co-authored-by: Jay Miller <[email protected]>

* Fix linter issues

---------

Co-authored-by: Jay Miller <[email protected]>
  • Loading branch information
dragid10 and kjaymiller authored Sep 1, 2024
1 parent 9e90858 commit b6ce359
Show file tree
Hide file tree
Showing 3 changed files with 234 additions and 69 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/playwright.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,13 @@ jobs:
bundler-cache: true
- name: Jekyll detached and pytest
run: |
# Start up local copy of site
bundle exec jekyll serve --detach
# Sleep for 5 secs to allow Jekyll to start
startup_wait=5
echo "Sleeping for $startup_wait seconds"
sleep $startup_wait
# Run tests
python -m pytest
174 changes: 105 additions & 69 deletions _conferences/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,76 +6,112 @@

import yaml
from github import Auth, Github
from github.Issue import Issue
from github.PaginatedList import PaginatedList

TOKEN = os.getenv("GITHUB_TOKEN", "")
ROOT = Path(__file__).parent.parent
conferences_path = ROOT / "_data/conferences.yml"

auth = Auth.Token(TOKEN)
g = Github(auth=auth)

repo = g.get_repo("BlackPythonDevs/blackpythondevs.github.io")
open_issues = repo.get_issues(state="open", labels=["conference"])
conferences = []
today = datetime.combine(datetime.now(), time())

for issue in open_issues:
if "conference" in [label.name for label in issue.labels]:
# Extract fields from issue body
name_match = re.search(
r"Conference Name(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body
)
url_match = re.search(r"URL(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body)
dates_match = re.search(
r"Conference Dates(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body
)
type_match = re.search(
r"Conference Type(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body
)
location_match = re.search(
r"Conference Location(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body
)
summary_match = re.search(
r"Summary(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}",
issue.body,
re.DOTALL,
)
speaking_match = re.search(
r"Speaking(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}### Code of Conduct(?:\r\n|\n){2}",
issue.body,
re.DOTALL,
)

# Set a default value of None for when the url field isn't as expected
valid_url = None

# Ensure the url field is not blank and the url matches the regex
if url_match is not None and url_match[1].strip() != "":
# Parse the url and see if a scheme (`https`) is included in it
# If not, then prepend `https` to the url from the issue body
# This guards against the website thinking the passed in url is another page on https://blackpythondevs.com/
parsed_url = urlparse(url_match[1])
if "http" not in parsed_url.scheme.casefold():
valid_url = f"https://{url_match[1]}"

if dates_match:
conferenceDates = dates_match[1]
# Parse the end date of the conference
endDateStr = conferenceDates.split("-")[1].strip()
endDate = datetime.strptime(endDateStr, "%d %b %Y")
# Check if the conference end date is greater than today
if endDate >= today:
conference = {
"name": name_match[1],
"url": valid_url,
"dates": dates_match[1],
"type": type_match[1],
"location": location_match[1],
"summary": summary_match[1],
"speaking": speaking_match[1] if speaking_match else "",
}
conferences.append(conference)

# Write the conferences to the _data/conferences.yml file
with conferences_path.open("w") as f:
yaml.dump(conferences, f)

def create_github_client():
gh_token = os.getenv("GITHUB_TOKEN", "")
auth = Auth.Token(gh_token)
client = Github(auth=auth)
return client


def get_open_issues(gh: Github) -> PaginatedList[Issue]:
repo = gh.get_repo("BlackPythonDevs/blackpythondevs.github.io")
issues = repo.get_issues(state="open", labels=["conference"])
return issues


def parse_conference_details(issue_body: str) -> dict | None:
# Extract fields from issue body
name_match = re.search(
r"Conference Name(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body
)
url_match = re.search(r"URL(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body)
dates_match = re.search(
r"Conference Dates(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body
)
type_match = re.search(
r"Conference Type(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body
)
location_match = re.search(
r"Conference Location(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body
)
summary_match = re.search(
r"Summary(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}",
issue_body,
re.DOTALL,
)
speaking_match = re.search(
r"Speaking(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}### Code of Conduct(?:\r\n|\n){2}",
issue_body,
re.DOTALL,
)

# Set a default value of None for when the url field isn't as expected
valid_url = normalize_url() if not url_match else normalize_url(url_match[1])

if dates_match:
conferenceDates = dates_match[1]
# Parse the end date of the conference
endDateStr = conferenceDates.split("-")[1].strip()
endDate = datetime.strptime(endDateStr, "%d %b %Y")
# Check if the conference end date is greater than today
today = datetime.combine(datetime.now(), time())

if endDate >= today:
conference = {
"name": name_match[1],
"url": valid_url,
"dates": dates_match[1],
"type": type_match[1],
"location": location_match[1],
"summary": summary_match[1],
"speaking": speaking_match[1] if speaking_match else "",
}
return conference
return None


def normalize_url(url_match: str = None):
valid_url = None
# Ensure the url field is not blank and the url matches the regex
if url_match is not None and url_match.strip() != "":
# Parse the url and see if a scheme (`https`) is included in it
# If not, then prepend `https` to the url from the issue body
# This guards against the website thinking the passed in url is another page on https://blackpythondevs.com/
parsed_url = urlparse(url_match)
if "http" not in parsed_url.scheme.casefold():
valid_url = f"https://{url_match}"
else:
valid_url = url_match
return valid_url


def write_conferences_to_file(confs: list[dict]):
# Write the conferences to the _data/conferences.yml file
with conferences_path.open("w") as f:
yaml.dump(confs, f)


if __name__ == "__main__":
conferences = []

# Create Github client object
gh_client = create_github_client()

# Get open issues from repo
open_issues: PaginatedList[Issue] = get_open_issues(gh_client)

# Parse each conference issue so long as it has the "conference" label
for issue in open_issues:
if "conference" in [label.name for label in issue.labels]:
parsed_conf = parse_conference_details(issue_body=issue.body)
if parsed_conf:
conferences.append(parsed_conf)

write_conferences_to_file(conferences)
121 changes: 121 additions & 0 deletions tests/test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import time

import pytest
from playwright.sync_api import Page, expect

from _conferences.__main__ import parse_conference_details

live_server_url = "http://127.0.0.1:4000"

routes = [
Expand All @@ -11,6 +15,13 @@
]


# Add a delay to each test to help with playwright race conditions
@pytest.fixture(autouse=True)
def slow_down_tests():
yield
time.sleep(1)


@pytest.mark.parametrize("url", routes)
def test_destination(
page: Page,
Expand Down Expand Up @@ -101,3 +112,113 @@ def test_mailto_bpdevs(page: Page) -> None:
page.goto(f"{live_server_url}")
mailto = page.get_by_role("link", name="email")
expect(mailto).to_have_attribute("href", "mailto:[email protected]")


def test_conference_parsing_valid_url():
example_conf_issue = """### Conference Name
Test Conference Title
### URL
https://microsoft.com
### Conference Dates
10 - 15 Sep 2050
### Conference Type
both
### Conference Location
Redmond, WA, USA
### Summary
Test Conference Summary
### Speaking
* [Satya Nadella](https://www.linkedin.com/in/satyanadella/)
"""
expected_name = "Test Conference Title"
expected_url = "https://microsoft.com"
parsed_conf = parse_conference_details(issue_body=example_conf_issue)

assert parsed_conf["name"] == expected_name
assert parsed_conf["url"] == expected_url


def test_conference_parsing_logic_no_url_scheme():
example_conf_issue = """### Conference Name
Test Conference Title
### URL
microsoft.com
### Conference Dates
10 - 15 Sep 2050
### Conference Type
both
### Conference Location
Redmond, WA, USA
### Summary
Test Conference Summary
### Speaking
* [Satya Nadella](https://www.linkedin.com/in/satyanadella/)
"""
expected_name = "Test Conference Title"
expected_url = "https://microsoft.com"
parsed_conf = parse_conference_details(issue_body=example_conf_issue)

assert parsed_conf["name"] == expected_name
assert parsed_conf["url"] == expected_url


def test_conference_parsing_logic_no_url():
example_conf_issue = """### Conference Name
Test Conference Title
### URL
### Conference Dates
10 - 15 Sep 2050
### Conference Type
both
### Conference Location
Redmond, WA, USA
### Summary
Test Conference Summary
### Speaking
* [Satya Nadella](https://www.linkedin.com/in/satyanadella/)
"""
expected_name = "Test Conference Title"
expected_url = None
parsed_conf = parse_conference_details(issue_body=example_conf_issue)

assert parsed_conf["name"] == expected_name
assert parsed_conf["url"] == expected_url

0 comments on commit b6ce359

Please sign in to comment.