Skip to content

Commit

Permalink
Merge pull request #2198 from cms-sw/limit-pr-changed-files-2
Browse files Browse the repository at this point in the history
Limit how many files can be changed in a PR
  • Loading branch information
smuzaffar authored Jul 8, 2024
2 parents 3cf3e69 + 0665b81 commit cabf39c
Showing 1 changed file with 68 additions and 4 deletions.
72 changes: 68 additions & 4 deletions process_pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def format(s, **kwds):
)
REGEX_COMMITS_CACHE = re.compile(r"<!-- (?:commits|bot) cache: (.*) -->", re.DOTALL)
REGEX_IGNORE_COMMIT_COUNT = "\+commit-count"
REGEX_IGNORE_FILE_COUNT = "\+file-count"
TEST_WAIT_GAP = 720
ALL_CHECK_FUNCTIONS = None
EXTRA_RELVALS_TESTS = ["threading", "gpu", "high-stats", "nano"]
Expand Down Expand Up @@ -190,6 +191,9 @@ def format(s, **kwds):
BOT_CACHE_CHUNK_SIZE = 55000
TOO_MANY_COMMITS_WARN_THRESHOLD = 150
TOO_MANY_COMMITS_FAIL_THRESHOLD = 240
TOO_MANY_FILES_WARN_THRESHOLD = 1500
TOO_MANY_FILES_FAIL_THRESHOLD = 3001
CHANGED_FILES_FROM_DIFF_THRESHOLD = 500
L2_DATA = {}


Expand Down Expand Up @@ -748,7 +752,7 @@ def multiline_check_function(first_line, comment_lines, repository):


def get_changed_files(repo, pr, use_gh_patch=False):
if (not use_gh_patch) and (pr.changed_files <= 300):
if (not use_gh_patch) and (pr.changed_files <= CHANGED_FILES_FROM_DIFF_THRESHOLD):
pr_files = []
for f in pr.get_files():
pr_files.append(f.filename)
Expand All @@ -760,13 +764,25 @@ def get_changed_files(repo, pr, use_gh_patch=False):
print("PR Files: ", pr_files)
return pr_files
cmd = (
"curl -s -L https://patch-diff.githubusercontent.com/raw/%s/pull/%s.patch | grep '^diff --git ' | sed 's|.* a/||;s| *b/| |' | tr ' ' '\n' | sort | uniq"
"curl -s -L https://patch-diff.githubusercontent.com/raw/%s/pull/%s.diff | grep '^diff --git ' | sed 's|.* a/||;s| *b/| |' | tr ' ' '\n' | sort | uniq"
% (repo.full_name, pr.number)
)
e, o = run_cmd(cmd)
if e:
return []
return o.split("\n")
print(
"ERROR: Request to https://patch-diff.githubusercontent.com/raw/%s/pull/%s.diff failed"
% (repo.full_name, pr.number)
)
print(e)
exit(1)
if o:
return o.split("\n")
else:
print(
"ERROR: Request to https://patch-diff.githubusercontent.com/raw/%s/pull/%s.diff did not return any changes, is the PR too big?"
% (repo.full_name, pr.number)
)
exit(1)


def get_backported_pr(msg):
Expand Down Expand Up @@ -940,6 +956,8 @@ def process_pr(repo_config, gh, repo, issue, dryRun, cmsbuild_user=None, force=F
all_commit_shas = set()
ok_too_many_commits = False
warned_too_many_commits = False
ok_too_many_files = False
warned_too_many_files = False

if issue.pull_request:
pr = repo.get_pull(prId)
Expand Down Expand Up @@ -1326,6 +1344,11 @@ def process_pr(repo_config, gh, repo, issue, dryRun, cmsbuild_user=None, force=F
if commenter in CMSSW_ISSUES_TRACKERS:
ok_too_many_commits = pr.commits < TOO_MANY_COMMITS_FAIL_THRESHOLD
comment_emoji = "+1"
elif re.match(r"^\s*" + REGEX_IGNORE_FILE_COUNT + r"\s*$", first_line):
comment_emoji = "-1"
if commenter in CMSSW_ISSUES_TRACKERS:
ok_too_many_files = pr.changed_files < TOO_MANY_FILES_FAIL_THRESHOLD
comment_emoji = "+1"

if comment_emoji:
set_comment_emoji_cache(dryRun, bot_cache, comment, repository, emoji=comment_emoji)
Expand Down Expand Up @@ -1424,6 +1447,14 @@ def process_pr(repo_config, gh, repo, issue, dryRun, cmsbuild_user=None, force=F
) or "This PR contains too many commits" in first_line:
warned_too_many_commits = True
continue

if (
"This PR touches many files" in first_line
and pr.changed_files < TOO_MANY_FILES_FAIL_THRESHOLD
) or "This PR touches too many files" in first_line:
warned_too_many_files = True
continue

sec_line = comment_lines[1:2]
if not sec_line:
sec_line = ""
Expand Down Expand Up @@ -1579,6 +1610,39 @@ def process_pr(repo_config, gh, repo, issue, dryRun, cmsbuild_user=None, force=F
print("Commit count reached and not overridden, quitting")
return

if (
pr.changed_files >= TOO_MANY_FILES_WARN_THRESHOLD
and (not warned_too_many_files)
and (not ok_too_many_files)
):
if pr.changed_files < TOO_MANY_FILES_FAIL_THRESHOLD:
if not dryRun:
issue.create_comment(
"This PR touches many files ({0} >= {1}) and will not be processed. "
"Please ensure you have selected the correct target branch and consider splitting this PR into several.\n"
"{2}, to re-enable processing of this PR, you can write `+file-count` in a comment. Thanks.".format(
pr.changed_files,
TOO_MANY_FILES_WARN_THRESHOLD,
", ".join([gh_user_char + name for name in CMSSW_ISSUES_TRACKERS]),
)
)
else:
if not dryRun:
issue.create_comment(
"This PR touches too many files ({0} >= {1}) and will not be processed.\n"
"Please ensure you have selected the correct target branch and consider splitting this PR into several.\n"
"The processing of this PR will resume once the number of changed files drops below the limit.".format(
pr.changed_files,
TOO_MANY_FILES_FAIL_THRESHOLD,
)
)

return

if pr.changed_files >= TOO_MANY_FILES_WARN_THRESHOLD and not ok_too_many_files:
print("Changed file count reached and not overridden, quitting")
return

print("Processing commits")

# Make sure to mark squashed=False if a cached/squashed commit is added back
Expand Down

0 comments on commit cabf39c

Please sign in to comment.