From b6ce35966aca0e432280872d44e1ec27e7e185ca Mon Sep 17 00:00:00 2001 From: Alex Oladele Date: Sat, 31 Aug 2024 20:52:34 -0400 Subject: [PATCH] Add tests for Conferences script (#404) * Add tests for parsing Conference Issues * Refactor top-level logic into testable functions * Run pre-commit linters * Only add to conference list if the conference details could actually be parse * Move gh token env var to function We don't always need to use the GITHUB_TOKEN during tests, so let's make it an explicit call if we need it * Add sleep command to playwright tests This should allow playwright to finish setting up before running tests * Add delay between each test to make CI tests more consistent * Linter fixes * Reduce startup time so tests don't take as long to run * Reduce delay between tests to 1 second * Update _conferences/__main__.py Co-authored-by: Jay Miller * Fix linter issues --------- Co-authored-by: Jay Miller --- .github/workflows/playwright.yml | 8 ++ _conferences/__main__.py | 174 +++++++++++++++++++------------ tests/test.py | 121 +++++++++++++++++++++ 3 files changed, 234 insertions(+), 69 deletions(-) diff --git a/.github/workflows/playwright.yml b/.github/workflows/playwright.yml index 42849a7..29a543f 100644 --- a/.github/workflows/playwright.yml +++ b/.github/workflows/playwright.yml @@ -31,5 +31,13 @@ jobs: bundler-cache: true - name: Jekyll detached and pytest run: | + # Start up local copy of site bundle exec jekyll serve --detach + + # Sleep for 5 secs to allow Jekyll to start + startup_wait=5 + echo "Sleeping for $startup_wait seconds" + sleep $startup_wait + + # Run tests python -m pytest diff --git a/_conferences/__main__.py b/_conferences/__main__.py index 4ff4d38..8aee6f6 100644 --- a/_conferences/__main__.py +++ b/_conferences/__main__.py @@ -6,76 +6,112 @@ import yaml from github import Auth, Github +from github.Issue import Issue +from github.PaginatedList import PaginatedList -TOKEN = os.getenv("GITHUB_TOKEN", "") ROOT = Path(__file__).parent.parent conferences_path = ROOT / "_data/conferences.yml" -auth = Auth.Token(TOKEN) -g = Github(auth=auth) - -repo = g.get_repo("BlackPythonDevs/blackpythondevs.github.io") -open_issues = repo.get_issues(state="open", labels=["conference"]) -conferences = [] -today = datetime.combine(datetime.now(), time()) - -for issue in open_issues: - if "conference" in [label.name for label in issue.labels]: - # Extract fields from issue body - name_match = re.search( - r"Conference Name(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body - ) - url_match = re.search(r"URL(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body) - dates_match = re.search( - r"Conference Dates(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body - ) - type_match = re.search( - r"Conference Type(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body - ) - location_match = re.search( - r"Conference Location(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body - ) - summary_match = re.search( - r"Summary(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", - issue.body, - re.DOTALL, - ) - speaking_match = re.search( - r"Speaking(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}### Code of Conduct(?:\r\n|\n){2}", - issue.body, - re.DOTALL, - ) - - # Set a default value of None for when the url field isn't as expected - valid_url = None - - # Ensure the url field is not blank and the url matches the regex - if url_match is not None and url_match[1].strip() != "": - # Parse the url and see if a scheme (`https`) is included in it - # If not, then prepend `https` to the url from the issue body - # This guards against the website thinking the passed in url is another page on https://blackpythondevs.com/ - parsed_url = urlparse(url_match[1]) - if "http" not in parsed_url.scheme.casefold(): - valid_url = f"https://{url_match[1]}" - - if dates_match: - conferenceDates = dates_match[1] - # Parse the end date of the conference - endDateStr = conferenceDates.split("-")[1].strip() - endDate = datetime.strptime(endDateStr, "%d %b %Y") - # Check if the conference end date is greater than today - if endDate >= today: - conference = { - "name": name_match[1], - "url": valid_url, - "dates": dates_match[1], - "type": type_match[1], - "location": location_match[1], - "summary": summary_match[1], - "speaking": speaking_match[1] if speaking_match else "", - } - conferences.append(conference) - -# Write the conferences to the _data/conferences.yml file -with conferences_path.open("w") as f: - yaml.dump(conferences, f) + +def create_github_client(): + gh_token = os.getenv("GITHUB_TOKEN", "") + auth = Auth.Token(gh_token) + client = Github(auth=auth) + return client + + +def get_open_issues(gh: Github) -> PaginatedList[Issue]: + repo = gh.get_repo("BlackPythonDevs/blackpythondevs.github.io") + issues = repo.get_issues(state="open", labels=["conference"]) + return issues + + +def parse_conference_details(issue_body: str) -> dict | None: + # Extract fields from issue body + name_match = re.search( + r"Conference Name(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body + ) + url_match = re.search(r"URL(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body) + dates_match = re.search( + r"Conference Dates(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body + ) + type_match = re.search( + r"Conference Type(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body + ) + location_match = re.search( + r"Conference Location(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body + ) + summary_match = re.search( + r"Summary(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", + issue_body, + re.DOTALL, + ) + speaking_match = re.search( + r"Speaking(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}### Code of Conduct(?:\r\n|\n){2}", + issue_body, + re.DOTALL, + ) + + # Set a default value of None for when the url field isn't as expected + valid_url = normalize_url() if not url_match else normalize_url(url_match[1]) + + if dates_match: + conferenceDates = dates_match[1] + # Parse the end date of the conference + endDateStr = conferenceDates.split("-")[1].strip() + endDate = datetime.strptime(endDateStr, "%d %b %Y") + # Check if the conference end date is greater than today + today = datetime.combine(datetime.now(), time()) + + if endDate >= today: + conference = { + "name": name_match[1], + "url": valid_url, + "dates": dates_match[1], + "type": type_match[1], + "location": location_match[1], + "summary": summary_match[1], + "speaking": speaking_match[1] if speaking_match else "", + } + return conference + return None + + +def normalize_url(url_match: str = None): + valid_url = None + # Ensure the url field is not blank and the url matches the regex + if url_match is not None and url_match.strip() != "": + # Parse the url and see if a scheme (`https`) is included in it + # If not, then prepend `https` to the url from the issue body + # This guards against the website thinking the passed in url is another page on https://blackpythondevs.com/ + parsed_url = urlparse(url_match) + if "http" not in parsed_url.scheme.casefold(): + valid_url = f"https://{url_match}" + else: + valid_url = url_match + return valid_url + + +def write_conferences_to_file(confs: list[dict]): + # Write the conferences to the _data/conferences.yml file + with conferences_path.open("w") as f: + yaml.dump(confs, f) + + +if __name__ == "__main__": + conferences = [] + + # Create Github client object + gh_client = create_github_client() + + # Get open issues from repo + open_issues: PaginatedList[Issue] = get_open_issues(gh_client) + + # Parse each conference issue so long as it has the "conference" label + for issue in open_issues: + if "conference" in [label.name for label in issue.labels]: + parsed_conf = parse_conference_details(issue_body=issue.body) + if parsed_conf: + conferences.append(parsed_conf) + + write_conferences_to_file(conferences) diff --git a/tests/test.py b/tests/test.py index 2653cb3..b31a506 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,6 +1,10 @@ +import time + import pytest from playwright.sync_api import Page, expect +from _conferences.__main__ import parse_conference_details + live_server_url = "http://127.0.0.1:4000" routes = [ @@ -11,6 +15,13 @@ ] +# Add a delay to each test to help with playwright race conditions +@pytest.fixture(autouse=True) +def slow_down_tests(): + yield + time.sleep(1) + + @pytest.mark.parametrize("url", routes) def test_destination( page: Page, @@ -101,3 +112,113 @@ def test_mailto_bpdevs(page: Page) -> None: page.goto(f"{live_server_url}") mailto = page.get_by_role("link", name="email") expect(mailto).to_have_attribute("href", "mailto:contact@blackpythondevs.com") + + +def test_conference_parsing_valid_url(): + example_conf_issue = """### Conference Name + +Test Conference Title + +### URL + +https://microsoft.com + +### Conference Dates + +10 - 15 Sep 2050 + +### Conference Type + +both + +### Conference Location + +Redmond, WA, USA + +### Summary + +Test Conference Summary + +### Speaking + +* [Satya Nadella](https://www.linkedin.com/in/satyanadella/) +""" + expected_name = "Test Conference Title" + expected_url = "https://microsoft.com" + parsed_conf = parse_conference_details(issue_body=example_conf_issue) + + assert parsed_conf["name"] == expected_name + assert parsed_conf["url"] == expected_url + + +def test_conference_parsing_logic_no_url_scheme(): + example_conf_issue = """### Conference Name + +Test Conference Title + +### URL + +microsoft.com + +### Conference Dates + +10 - 15 Sep 2050 + +### Conference Type + +both + +### Conference Location + +Redmond, WA, USA + +### Summary + +Test Conference Summary + +### Speaking + +* [Satya Nadella](https://www.linkedin.com/in/satyanadella/) +""" + expected_name = "Test Conference Title" + expected_url = "https://microsoft.com" + parsed_conf = parse_conference_details(issue_body=example_conf_issue) + + assert parsed_conf["name"] == expected_name + assert parsed_conf["url"] == expected_url + + +def test_conference_parsing_logic_no_url(): + example_conf_issue = """### Conference Name + +Test Conference Title + +### URL + + +### Conference Dates + +10 - 15 Sep 2050 + +### Conference Type + +both + +### Conference Location + +Redmond, WA, USA + +### Summary + +Test Conference Summary + +### Speaking + +* [Satya Nadella](https://www.linkedin.com/in/satyanadella/) +""" + expected_name = "Test Conference Title" + expected_url = None + parsed_conf = parse_conference_details(issue_body=example_conf_issue) + + assert parsed_conf["name"] == expected_name + assert parsed_conf["url"] == expected_url