From 6be8abbb9e6fe7f4c893a9def238c5337d3d0d4e Mon Sep 17 00:00:00 2001 From: Shunfan Shao Date: Tue, 30 Nov 2021 03:46:49 -0600 Subject: [PATCH 1/8] Add pr-check for forked projetc --- format_checker/pr_checker.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/format_checker/pr_checker.py b/format_checker/pr_checker.py index 4f3be933..8c66f1fa 100644 --- a/format_checker/pr_checker.py +++ b/format_checker/pr_checker.py @@ -1,6 +1,7 @@ """Implements rule checks for the pr-data.csv file.""" import re +import requests from utils import log_std_error, log_warning from common_checks import ( check_common_rules, @@ -52,6 +53,9 @@ "Notes": re.compile( r"(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})" ), + "Repo Link": re.compile( + r"(https:\/\/github.com\/([\w|\.|-]+)\/([\w|\.|-]+))" + ) } @@ -138,6 +142,25 @@ def check_pr_link(filename, row, i, log): ): log_std_error(filename, log, i, row, "PR Link") +def check_project_url(filename, row, i, log): + project_url = row["Project URL"] + match = pr_data["Repo Link"].fullmatch(project_url) + if not match: + log_std_error(filename, log, i, row, "Project URL") + return + + author = match.group(2) + repo = match.group(3) + + url = "https://api.github.com/repos/{}/{}".format(author, repo) + try: + resp = requests.get(url).json() + # Determine if it is a forked project + if "fork" in resp and resp["fork"]: + log_std_error(filename, log, i, row, "Project URL") + except requests.exceptions.RequestException as e: + # handle(e) + pass def run_checks_pr(log, commit_range): """Checks that pr-data.csv is properly formatted.""" @@ -149,6 +172,7 @@ def run_checks_pr(log, commit_range): check_category, check_status, check_status_consistency, + check_project_url ] run_checks(filename, pr_data, log, commit_range, checks) check_sort(filename, log) From 58352554ef1d9dff95a6d86fdb79f052ade14de0 Mon Sep 17 00:00:00 2001 From: Shunfan Shao Date: Tue, 30 Nov 2021 03:52:24 -0600 Subject: [PATCH 2/8] add deps --- format_checker/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/format_checker/requirements.txt b/format_checker/requirements.txt index 556ccd5a..253f6b01 100644 --- a/format_checker/requirements.txt +++ b/format_checker/requirements.txt @@ -1 +1,2 @@ errorhandler==2.0.1 +requests==2.18.4 \ No newline at end of file From acebe9901072653114839ea456f6acabbb63b616 Mon Sep 17 00:00:00 2001 From: shunfan Date: Tue, 30 Nov 2021 21:25:13 -0600 Subject: [PATCH 3/8] filter checked projects --- format_checker/common_checks.py | 28 +++++++++++++++++++++++++++- format_checker/pr_checker.py | 24 +----------------------- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/format_checker/common_checks.py b/format_checker/common_checks.py index a656a3e1..e20a333e 100644 --- a/format_checker/common_checks.py +++ b/format_checker/common_checks.py @@ -3,6 +3,7 @@ import re import csv import subprocess +import requests from utils import ( get_committed_lines, get_uncommitted_lines, @@ -14,7 +15,7 @@ # Contains regexes for columns that are commmon to pr-data and tic-fic-data common_data = { - "Project URL": re.compile(r"(https:\/\/github.com)(\/(\w|\.|-)+){2}"), + "Project URL": re.compile(r"(https:\/\/github.com\/([\w|\.|-]+)\/([\w|\.|-]+))"), "SHA": re.compile(r"\b[0-9a-f]{40}\b"), "Module Path": re.compile(r"((\w|\.|-)+(\/|\w|\.|-)*)|^$"), "Fully-Qualified Name": re.compile( @@ -23,6 +24,27 @@ } +def check_project_url(checked_projects, filename, row, i, log): + project_url = row["Project URL"] + if project_url in checked_projects: + return + checked_projects.add(project_url) + + match = common_data["Project URL"].fullmatch(project_url) + author = match.group(2) + repo = match.group(3) + + url = "https://api.github.com/repos/{}/{}".format(author, repo) + try: + resp = requests.get(url).json() + # Determine if it is a forked project + if "fork" in resp and resp["fork"]: + log_std_error(filename, log, i, row, "Project URL") + except requests.exceptions.RequestException as e: + # handle(e) + pass + + def check_header(header, valid_dict, filename, log): """Validates that the header is correct.""" @@ -108,6 +130,7 @@ def run_checks(file, data_dict, log, commit_range, checks): if "1" in uncommitted_lines or "1" in committed_lines: check_header(list(header.values()), data_dict, file, log) if uncommitted_lines != [] or committed_lines != []: + checkd_projects = set() for i, row in enumerate(info): i += 2 line = str(i) @@ -123,6 +146,9 @@ def run_checks(file, data_dict, log, commit_range, checks): if check_rule.__name__ == check_row_length.__name__: check_rule(len(header), *params) continue + if check_rule.__name__ == check_project_url.__name__: + check_rule(checkd_projects, *params) + continue check_rule(*params) else: log_info(file, log, "There are no changes to be checked") diff --git a/format_checker/pr_checker.py b/format_checker/pr_checker.py index 8c66f1fa..ae808f14 100644 --- a/format_checker/pr_checker.py +++ b/format_checker/pr_checker.py @@ -1,13 +1,13 @@ """Implements rule checks for the pr-data.csv file.""" import re -import requests from utils import log_std_error, log_warning from common_checks import ( check_common_rules, check_row_length, check_sort, run_checks, + check_project_url ) @@ -53,9 +53,6 @@ "Notes": re.compile( r"(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})" ), - "Repo Link": re.compile( - r"(https:\/\/github.com\/([\w|\.|-]+)\/([\w|\.|-]+))" - ) } @@ -142,25 +139,6 @@ def check_pr_link(filename, row, i, log): ): log_std_error(filename, log, i, row, "PR Link") -def check_project_url(filename, row, i, log): - project_url = row["Project URL"] - match = pr_data["Repo Link"].fullmatch(project_url) - if not match: - log_std_error(filename, log, i, row, "Project URL") - return - - author = match.group(2) - repo = match.group(3) - - url = "https://api.github.com/repos/{}/{}".format(author, repo) - try: - resp = requests.get(url).json() - # Determine if it is a forked project - if "fork" in resp and resp["fork"]: - log_std_error(filename, log, i, row, "Project URL") - except requests.exceptions.RequestException as e: - # handle(e) - pass def run_checks_pr(log, commit_range): """Checks that pr-data.csv is properly formatted.""" From e62acb784cd7908f850c39207a8e3efdb041858c Mon Sep 17 00:00:00 2001 From: Shunfan Shao Date: Wed, 1 Dec 2021 01:15:14 -0600 Subject: [PATCH 4/8] fix naming --- format_checker/common_checks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/format_checker/common_checks.py b/format_checker/common_checks.py index e20a333e..dccedef5 100644 --- a/format_checker/common_checks.py +++ b/format_checker/common_checks.py @@ -130,7 +130,7 @@ def run_checks(file, data_dict, log, commit_range, checks): if "1" in uncommitted_lines or "1" in committed_lines: check_header(list(header.values()), data_dict, file, log) if uncommitted_lines != [] or committed_lines != []: - checkd_projects = set() + checked_projects = set() for i, row in enumerate(info): i += 2 line = str(i) @@ -147,7 +147,7 @@ def run_checks(file, data_dict, log, commit_range, checks): check_rule(len(header), *params) continue if check_rule.__name__ == check_project_url.__name__: - check_rule(checkd_projects, *params) + check_rule(checked_projects, *params) continue check_rule(*params) else: From abd980cabda64ff4e1cdb3268074643e5473df66 Mon Sep 17 00:00:00 2001 From: Shunfan Shao Date: Wed, 1 Dec 2021 01:25:03 -0600 Subject: [PATCH 5/8] add a forked repo to valid correctness --- pr-data.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/pr-data.csv b/pr-data.csv index 49ad1823..9f2b7635 100644 --- a/pr-data.csv +++ b/pr-data.csv @@ -2736,3 +2736,4 @@ https://github.com/zalando/riptide,8277e11fc069d8e24df0d233ef2577cc75659b75,ript https://github.com/zalando/riptide,8277e11fc069d8e24df0d233ef2577cc75659b75,riptide-spring-boot-starter,org.zalando.riptide.spring.ObjectMapperOverrideTest.shouldOverride,ID,MovedOrRenamed,, https://github.com/zalando/riptide,8277e11fc069d8e24df0d233ef2577cc75659b75,riptide-spring-boot-starter,org.zalando.riptide.spring.url.UrlResolutionTest.shouldAppendUrl,UD,,, https://github.com/zhangxd1989/spring-boot-cloud,e3966d7cefa4fa429d13bbc8de7f4dafbae0de35,registry,cn.zhangxd.registry.ApplicationTests.catalogLoads,ID,,, +https://github.com/shunfan-shao/idoft,e3966d7cefa4fa429d13bbc8de7f4dafbae0de35,registry,cn.zhangxd.registry.ApplicationTests.catalogLoads,ID,,, From a2c0d8dee6b0eff2e45353f5c04c84e5936b3a80 Mon Sep 17 00:00:00 2001 From: Shunfan Shao Date: Wed, 1 Dec 2021 01:32:31 -0600 Subject: [PATCH 6/8] reorder --- pr-data.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr-data.csv b/pr-data.csv index 9f2b7635..ddfabbf2 100644 --- a/pr-data.csv +++ b/pr-data.csv @@ -1974,6 +1974,7 @@ https://github.com/schemacrawler/SchemaCrawler,18d6c5fb79c8e27cdeafc317ee63b2783 https://github.com/schemacrawler/SchemaCrawler,18d6c5fb79c8e27cdeafc317ee63b27837aa6fec,schemacrawler-utility,us.fatehi.utility.test.html.TableCellTest.td2,ID,InspiredAFix,https://github.com/schemacrawler/SchemaCrawler/pull/525, https://github.com/seata/seata,d334f85395887193f72fa57daea906a73a2da34e,common,io.seata.common.util.CollectionUtilsTest.encodeMap,ID,Accepted,https://github.com/seata/seata/pull/1802, https://github.com/seata/seata,d334f85395887193f72fa57daea906a73a2da34e,tm,io.seata.tm.api.transaction.TransactionInfoTest.testFieldGetSetFromJson,ID,Deleted,,https://github.com/seata/seata/commit/6c0c75939dedbf19c4a21a8ac7aa0fb98879a8fd +https://github.com/shunfan-shao/idoft,e3966d7cefa4fa429d13bbc8de7f4dafbae0de35,registry,cn.zhangxd.registry.ApplicationTests.catalogLoads,ID,,, https://github.com/skyscreamer/JSONassert,523009b2576b9f54ed78c6f4720fba87f79b1466,.,org.skyscreamer.jsonassert.comparator.CustomComparatorTest.testFullArrayComparison,ID,,, https://github.com/Slimefun/Slimefun4,d5e4149b4f4d83dd1620d4a4cbb6d876903c851c,.,io.github.thebusybiscuit.slimefun4.api.items.settings.TestMaterialTagSetting.testAllowedValue,ID,Accepted,https://github.com/Slimefun/Slimefun4/pull/3179, https://github.com/Slimefun/Slimefun4,282367d6ffaf60d79f1fa357d90b01ca154c44f2,.,io.github.thebusybiscuit.slimefun4.testing.tests.settings.TestMaterialTagSetting.testAllowedValue,ID,MovedOrRenamed,,https://github.com/TestingResearchIllinois/idoft/issues/51 @@ -2736,4 +2737,3 @@ https://github.com/zalando/riptide,8277e11fc069d8e24df0d233ef2577cc75659b75,ript https://github.com/zalando/riptide,8277e11fc069d8e24df0d233ef2577cc75659b75,riptide-spring-boot-starter,org.zalando.riptide.spring.ObjectMapperOverrideTest.shouldOverride,ID,MovedOrRenamed,, https://github.com/zalando/riptide,8277e11fc069d8e24df0d233ef2577cc75659b75,riptide-spring-boot-starter,org.zalando.riptide.spring.url.UrlResolutionTest.shouldAppendUrl,UD,,, https://github.com/zhangxd1989/spring-boot-cloud,e3966d7cefa4fa429d13bbc8de7f4dafbae0de35,registry,cn.zhangxd.registry.ApplicationTests.catalogLoads,ID,,, -https://github.com/shunfan-shao/idoft,e3966d7cefa4fa429d13bbc8de7f4dafbae0de35,registry,cn.zhangxd.registry.ApplicationTests.catalogLoads,ID,,, From c3e72c84bf772bddc6bc7a1b230ff8493343f0e2 Mon Sep 17 00:00:00 2001 From: Shunfan Shao Date: Wed, 1 Dec 2021 01:35:22 -0600 Subject: [PATCH 7/8] renaming --- format_checker/common_checks.py | 4 ++-- format_checker/pr_checker.py | 4 ++-- pr-data.csv | 1 - 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/format_checker/common_checks.py b/format_checker/common_checks.py index dccedef5..aac35be6 100644 --- a/format_checker/common_checks.py +++ b/format_checker/common_checks.py @@ -24,7 +24,7 @@ } -def check_project_url(checked_projects, filename, row, i, log): +def check_repo_sanity(checked_projects, filename, row, i, log): project_url = row["Project URL"] if project_url in checked_projects: return @@ -146,7 +146,7 @@ def run_checks(file, data_dict, log, commit_range, checks): if check_rule.__name__ == check_row_length.__name__: check_rule(len(header), *params) continue - if check_rule.__name__ == check_project_url.__name__: + if check_rule.__name__ == check_repo_sanity.__name__: check_rule(checked_projects, *params) continue check_rule(*params) diff --git a/format_checker/pr_checker.py b/format_checker/pr_checker.py index ae808f14..947f319c 100644 --- a/format_checker/pr_checker.py +++ b/format_checker/pr_checker.py @@ -7,7 +7,7 @@ check_row_length, check_sort, run_checks, - check_project_url + check_repo_sanity ) @@ -150,7 +150,7 @@ def run_checks_pr(log, commit_range): check_category, check_status, check_status_consistency, - check_project_url + check_repo_sanity ] run_checks(filename, pr_data, log, commit_range, checks) check_sort(filename, log) diff --git a/pr-data.csv b/pr-data.csv index ddfabbf2..49ad1823 100644 --- a/pr-data.csv +++ b/pr-data.csv @@ -1974,7 +1974,6 @@ https://github.com/schemacrawler/SchemaCrawler,18d6c5fb79c8e27cdeafc317ee63b2783 https://github.com/schemacrawler/SchemaCrawler,18d6c5fb79c8e27cdeafc317ee63b27837aa6fec,schemacrawler-utility,us.fatehi.utility.test.html.TableCellTest.td2,ID,InspiredAFix,https://github.com/schemacrawler/SchemaCrawler/pull/525, https://github.com/seata/seata,d334f85395887193f72fa57daea906a73a2da34e,common,io.seata.common.util.CollectionUtilsTest.encodeMap,ID,Accepted,https://github.com/seata/seata/pull/1802, https://github.com/seata/seata,d334f85395887193f72fa57daea906a73a2da34e,tm,io.seata.tm.api.transaction.TransactionInfoTest.testFieldGetSetFromJson,ID,Deleted,,https://github.com/seata/seata/commit/6c0c75939dedbf19c4a21a8ac7aa0fb98879a8fd -https://github.com/shunfan-shao/idoft,e3966d7cefa4fa429d13bbc8de7f4dafbae0de35,registry,cn.zhangxd.registry.ApplicationTests.catalogLoads,ID,,, https://github.com/skyscreamer/JSONassert,523009b2576b9f54ed78c6f4720fba87f79b1466,.,org.skyscreamer.jsonassert.comparator.CustomComparatorTest.testFullArrayComparison,ID,,, https://github.com/Slimefun/Slimefun4,d5e4149b4f4d83dd1620d4a4cbb6d876903c851c,.,io.github.thebusybiscuit.slimefun4.api.items.settings.TestMaterialTagSetting.testAllowedValue,ID,Accepted,https://github.com/Slimefun/Slimefun4/pull/3179, https://github.com/Slimefun/Slimefun4,282367d6ffaf60d79f1fa357d90b01ca154c44f2,.,io.github.thebusybiscuit.slimefun4.testing.tests.settings.TestMaterialTagSetting.testAllowedValue,ID,MovedOrRenamed,,https://github.com/TestingResearchIllinois/idoft/issues/51 From 552872702dc9dc9c0af83b83cdc21506d42b2310 Mon Sep 17 00:00:00 2001 From: Shunfan Shao Date: Fri, 3 Dec 2021 20:05:38 -0600 Subject: [PATCH 8/8] clean up --- format_checker/common_checks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/format_checker/common_checks.py b/format_checker/common_checks.py index aac35be6..44fb23e0 100644 --- a/format_checker/common_checks.py +++ b/format_checker/common_checks.py @@ -38,8 +38,8 @@ def check_repo_sanity(checked_projects, filename, row, i, log): try: resp = requests.get(url).json() # Determine if it is a forked project - if "fork" in resp and resp["fork"]: - log_std_error(filename, log, i, row, "Project URL") + if resp.get("fork"): + log_esp_error(filename, log, f"{author}/{repo} is a forked repo") except requests.exceptions.RequestException as e: # handle(e) pass