diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 96b677037de75..b95bf1e3f91ee 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -315,6 +315,7 @@ jobs: with: source-directory: ${{ github.workspace }}/monorepo token: ${{ secrets.API_TOKEN_GITHUB }} + upstream-ref-since: '2024-04-10' # No point in checking 12 years of earlier commits from before we started adding "Upstream-Ref". username: matticbot working-directory: ${{ github.workspace }}/build - timeout-minutes: 5 # 2021-01-18: Successful runs seem to take about half a minute. + timeout-minutes: 10 # 2024-04-11: Successful runs seem to take about a minute. diff --git a/projects/github-actions/push-to-mirrors/README.md b/projects/github-actions/push-to-mirrors/README.md index dbb4376776303..78dc5df7352c9 100644 --- a/projects/github-actions/push-to-mirrors/README.md +++ b/projects/github-actions/push-to-mirrors/README.md @@ -41,14 +41,28 @@ This action is intended to be triggered by a `push` event. # `source-directory` to fetch the message used for the commit. commit-message: + # Set to `true` to suppress the addition of "Upstream-Ref" footers in the + # mirrored commits. + no-upstream-refs: + # Directory containing a checkout of the monorepo revision being mirrored. - # Used to fetch certain git metadata for the mirror commits. + # Used to fetch git metadata for the mirror commits, and to find the base + # commit for new mirror branches. source-directory: ${{ github.workspace }} # GitHub Access Token. This token must allow for pushing to all relevant # branches of all relevant mirror repos. token: + # When checking "Upstream-Ref" to find a base commit for a new mirror + # branch, only consider this many monorepo commits at most. + upstream-ref-count: + + # When checking "Upstream-Ref" to find a base commit for a new mirror + # branch, only consider monorepo commits since this date (in any format + # accepted by `git log`'s `--since` or `--since-as-filter` parameter). + upstream-ref-since: + # Name of the user the token belongs to. username: diff --git a/projects/github-actions/push-to-mirrors/action.yml b/projects/github-actions/push-to-mirrors/action.yml index b2e604d9b3064..444a5e62fde51 100644 --- a/projects/github-actions/push-to-mirrors/action.yml +++ b/projects/github-actions/push-to-mirrors/action.yml @@ -9,10 +9,15 @@ inputs: Commit message to use for the mirror commits. If omitted, it will be read from the monorepo checkout pointed at by `source-directory`. required: false + no-upstream-refs: + description: > + Set to `true` to suppress the "Upstream-Ref" footers in mirrored commits. + required: false source-directory: description: > - Directory containing a checkout of the commit being mirrored. Only used - if `commit-message` is not specified. + Directory containing a checkout of the commit being mirrored. Used to + fetch git metadata for the mirror commits, and to find the base commit + for new mirror branches. required: false default: ${{ github.workspace }} token: @@ -20,6 +25,17 @@ inputs: GitHub Access Token. This token must allow for pushing to all relevant branches of all relevant mirror repos. required: true + upstream-ref-count: + description: > + When checking "Upstream-Ref" to find a base commit for a new mirror + branch, only consider this many monorepo commits at most. + required: false + upstream-ref-since: + description: > + When checking "Upstream-Ref" to find a base commit for a new mirror + branch, only consider monorepo commits since this date (in any format + accepted by `git log`'s `--since` or `--since-as-filter` parameter). + required: false username: description: Name of the user the token belongs to. required: true @@ -44,7 +60,10 @@ runs: BUILD_BASE: ${{ inputs.working-directory }} COMMIT_MESSAGE: ${{ inputs.commit-message }} DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} + NO_UPSTREAM_REFS: ${{ inputs.no-upstream-refs }} SOURCE_DIR: ${{ inputs.source-directory }} + UPSTREAM_REF_COUNT: ${{ inputs.upstream-ref-count }} + UPSTREAM_REF_SINCE: ${{ inputs.upstream-ref-since }} USER_EMAIL: ${{ inputs.user-email }} USER_NAME: ${{ inputs.username }} run: $GITHUB_ACTION_PATH/push-to-mirrors.sh diff --git a/projects/github-actions/push-to-mirrors/changelog/add-push-to-mirrors-upstream-ref b/projects/github-actions/push-to-mirrors/changelog/add-push-to-mirrors-upstream-ref new file mode 100644 index 0000000000000..7a2f8b14f8918 --- /dev/null +++ b/projects/github-actions/push-to-mirrors/changelog/add-push-to-mirrors-upstream-ref @@ -0,0 +1,4 @@ +Significance: minor +Type: added + +Add footers like `Upstream-Ref: owner/repo@sha` to mirrored commits, to make it easy to find the source of any particular mirrored commit. New workflow parameter `no-upstream-refs` may be set to disable this. diff --git a/projects/github-actions/push-to-mirrors/changelog/add-push-to-mirrors-upstream-ref#2 b/projects/github-actions/push-to-mirrors/changelog/add-push-to-mirrors-upstream-ref#2 new file mode 100644 index 0000000000000..a6ec19dff27af --- /dev/null +++ b/projects/github-actions/push-to-mirrors/changelog/add-push-to-mirrors-upstream-ref#2 @@ -0,0 +1,4 @@ +Significance: minor +Type: added + +Use the `Upstream-Ref` footers to find better base commits for newly-mirrored branches. Add workflow parameters `upstream-ref-count` and `upstream-ref-since` to control how many commits are searched. diff --git a/projects/github-actions/push-to-mirrors/push-to-mirrors.sh b/projects/github-actions/push-to-mirrors/push-to-mirrors.sh index c464b1ebb9b19..81cd439f6f500 100755 --- a/projects/github-actions/push-to-mirrors/push-to-mirrors.sh +++ b/projects/github-actions/push-to-mirrors/push-to-mirrors.sh @@ -8,22 +8,31 @@ # - BUILD_BASE: Path to the build directory, which contains "mirrors.txt" and directories for each repo to mirror to. # - GITHUB_ACTOR: GitHub username for the commit being mirrored. # - GITHUB_REF: Git ref being mirrored from, e.g. "refs/heads/main". Must begin with "refs/heads/". +# - GITHUB_REPOSITORY: GH repository. # - GITHUB_SERVER_URL: The URL of the GitHub server. For example: https://github.com +# - SOURCE_DIR: Source checkout being mirrored from. # # Other: # - API_TOKEN_GITHUB: Personal access token to use when accessing GitHub. # - CI: If unset or empty, the commits will be prepared but the actual push will not happen. -# - COMMIT_MESSAGE: Commit message to use for the mirror commits. Will be read from HEAD in `SOURCE_DIR` if not specified. -# - GITHUB_REPOSITORY: GH repository, used in the commit message if `COMMIT_MESSAGE` is not specified. +# - COMMIT_MESSAGE: Commit message to use for the mirror commits. Will be read from commit `GITHUB_SHA` in `SOURCE_DIR` if not specified. +# - DEFAULT_BRANCH: Branch to base new commits on if the `GITHUB_REF` branch doesn't already exist in the mirror and a better commit can't be found. # - GITHUB_RUN_ID: GH Actions run ID, used in the commit message if `COMMIT_MESSAGE` is not specified. -# - GITHUB_SHA: Head SHA1 from which to fetch the commit message for the commit being mirrored. HEAD will be assumed if not specified. -# - SOURCE_DIR: Source directory, used when `COMMIT_MESSAGE` is not specified. +# - GITHUB_SHA: Head SHA1. HEAD will be assumed if not specified. +# - NO_UPSTREAM_REFS: Set to 'true' to suppress the "Upstream-Ref" footer in commit messages. Note this will make `DEFAULT_BRANCH` be used more often. +# - UPSTREAM_REF_COUNT: When checking Upstream-Ref to find a base commit for a new mirror branch, only consider this many monorepo commits at most. +# - UPSTREAM_REF_SINCE: When checking Upstream-Ref to find a base commit for a new mirror branch, only consider monorepo commits since this date (in any format accepted by `git log`'s `--since` or `--since-as-filter` parameter). # - USER_NAME: Git user name to use when making the commit to the mirror repo. # - USER_EMAIL: Email address to use when making the commit to the mirror repo. Defaults to "$USER_NAME@users.noreply.github.com" # Halt on error set -eo pipefail +: "${GITHUB_ACTOR:?Must be set and not empty}" +: "${GITHUB_REF:?Must be set and not empty}" +: "${GITHUB_REPOSITORY:?Must be set and not empty}" +: "${GITHUB_SERVER_URL:?Must be set and not empty}" + if [[ -n "$CI" ]]; then export GIT_AUTHOR_NAME="$USER_NAME" export GIT_AUTHOR_EMAIL="${USER_EMAIL:-${USER_NAME}@users.noreply.github.com}" @@ -39,11 +48,8 @@ elif [[ ! -d "$BUILD_BASE" ]]; then exit 1 fi -if [[ -z "$COMMIT_MESSAGE" ]]; then - MONOREPO_COMMIT_MESSAGE=$(cd "${SOURCE_DIR:-.}" && git show -s --format=%B $GITHUB_SHA) - COMMIT_MESSAGE=$( printf "%s\n\nCommitted via a GitHub action: %s/%s/actions/runs/%s\n" "$MONOREPO_COMMIT_MESSAGE" "$GITHUB_SERVER_URL" "$GITHUB_REPOSITORY" "$GITHUB_RUN_ID" ) -fi -COMMIT_ORIGINAL_AUTHOR="${GITHUB_ACTOR} <${GITHUB_ACTOR}@users.noreply.github.com>" +cd "${SOURCE_DIR:-.}" +SOURCE_DIR="$PWD" if [[ "$GITHUB_REF" =~ ^refs/heads/ ]]; then BRANCH=${GITHUB_REF#refs/heads/} @@ -62,6 +68,68 @@ fi : > "$BUILD_BASE/changes.diff" +if [[ -z "$COMMIT_MESSAGE" ]]; then + MONOREPO_COMMIT_MESSAGE=$( git show -s --format=%B "${GITHUB_SHA:-HEAD}" ) + COMMIT_MESSAGE=$( printf "%s\n\nCommitted via a GitHub action: %s/%s/actions/runs/%s\n" "$MONOREPO_COMMIT_MESSAGE" "$GITHUB_SERVER_URL" "$GITHUB_REPOSITORY" "$GITHUB_RUN_ID" ) +fi +COMMIT_ORIGINAL_AUTHOR="${GITHUB_ACTOR} <${GITHUB_ACTOR}@users.noreply.github.com>" + +UPSTREAM_REF= +if [[ "$NO_UPSTREAM_REFS" != 'true' ]]; then + if [[ -z "$GITHUB_SHA" ]]; then + SHA=$(git rev-parse HEAD) + fi + UPSTREAM_REF="Upstream-Ref: $GITHUB_REPOSITORY@${GITHUB_SHA:-$SHA}" + + if [[ -f .git/shallow ]]; then + echo "::group::Fetching treeless commits for source repo" + git -c protocol.version=2 fetch --unshallow --filter=tree:0 --no-tags --progress --no-recurse-submodules origin HEAD + echo "::endgroup::" + fi + + ARGS=() + if [[ -n "$UPSTREAM_REF_SINCE" ]]; then + # GitHub may not have an up-to-date git + if git log --max-count=1 --since-as-filter='now' &>/dev/null; then + ARGS+=( --since-as-filter="$UPSTREAM_REF_SINCE" ) + else + ARGS+=( --since="$UPSTREAM_REF_SINCE" ) + fi + fi + if [[ -n "$UPSTREAM_REF_COUNT" ]]; then + ARGS+=( --max-count="$UPSTREAM_REF_COUNT" ) + fi + mapfile -t REFS < <( cd "$SOURCE_DIR" && git log "${ARGS[@]}" --format=%H "${GITHUB_SHA:-HEAD}" || true ) + echo "Considering ${#REFS[@]} monorepo commits for Upstream-Ref matching." + + # Batch the commits into sets of 3180 to keep each call later under the 128KiB limit on argument length. + # 3180 40-byte shas + 3179 separators + 23 bytes of static text leaves 670 bytes for $GITHUB_REPOSITORY. + # Current max repo name seems to be either 140 or 557 (39 for the owner, 100 for the name, and '/'), depending on whether they've started allowing non-ASCII alphanumerics yet. + UPSTREAM_REGEXES=() + NL=$'\n' + for (( i=0; i<"${#REFS[@]}"; i+=3180 )); do + UPSTREAM_REGEXES+=( "${NL}Upstream-Ref: $GITHUB_REPOSITORY@($( IFS="|"; echo "${REFS[*]:$i:3180}" ))($|${NL})" ) + done +fi + +function get_upstream_sha { + if [[ "$NO_UPSTREAM_REFS" != 'true' ]] && + git -c protocol.version=2 fetch --filter=tree:0 --tags --progress --no-recurse-submodules origin >&2 + then + local regex + for regex in "${UPSTREAM_REGEXES[@]}"; do + local dstsha + if dstsha=$( git rev-parse --verify --quiet ":/$regex" ) && + git -c protocol.version=2 fetch --no-tags --prune --progress --no-recurse-submodules --depth=1 origin "$dstsha" >&2 + then + echo "$dstsha" + return 0 + fi + done + fi + return 1 +} + EXIT=0 while read -r GIT_SLUG; do printf "\n\n\e[7m Mirror: %s \e[0m\n" "$GIT_SLUG" @@ -72,7 +140,7 @@ while read -r GIT_SLUG; do git init -b "$BRANCH" . git remote add origin "${GITHUB_SERVER_URL}/${GIT_SLUG}" if [[ -n "$API_TOKEN_GITHUB" ]]; then - git config --local http.${GITHUB_SERVER_URL}/.extraheader "AUTHORIZATION: basic $(printf "x-access-token:%s" "$API_TOKEN_GITHUB" | base64)" + git config --local "http.${GITHUB_SERVER_URL}/.extraheader" "AUTHORIZATION: basic $(printf "x-access-token:%s" "$API_TOKEN_GITHUB" | base64 -w 0)" fi # Check if a remote exists for that mirror. @@ -84,14 +152,19 @@ while read -r GIT_SLUG; do fi echo "::group::Fetching ${GIT_SLUG}" + FORCE_COMMIT= if git -c protocol.version=2 fetch --no-tags --prune --progress --no-recurse-submodules --depth=1 origin "$BRANCH"; then git reset --soft FETCH_HEAD - echo "Fetched revision $(git rev-parse HEAD)" + echo "Fetched revision $(git rev-parse HEAD) for branch $BRANCH" + elif UPSTREAM_SHA=$( get_upstream_sha ); then + FORCE_COMMIT=--allow-empty + git reset --soft "$UPSTREAM_SHA" + echo "Found parent comment $UPSTREAM_SHA" elif [[ -n "$DEFAULT_BRANCH" ]] && git -c protocol.version=2 fetch --no-tags --prune --progress --no-recurse-submodules --depth=1 origin "$DEFAULT_BRANCH"; then FORCE_COMMIT=--allow-empty git reset --soft FETCH_HEAD - echo "Fetched revision $(git rev-parse HEAD)" + echo "Fetched revision $(git rev-parse HEAD) for branch $DEFAULT_BRANCH" else echo "Failed to find a branch to branch from, just creating an empty one." FORCE_COMMIT=--allow-empty @@ -101,7 +174,7 @@ while read -r GIT_SLUG; do if [[ -n "$FORCE_COMMIT" || -n "$(git status --porcelain)" ]]; then echo "Committing to $GIT_SLUG" - if git commit --quiet $FORCE_COMMIT --author="${COMMIT_ORIGINAL_AUTHOR}" -m "${COMMIT_MESSAGE}" && + if git commit --quiet $FORCE_COMMIT --author="${COMMIT_ORIGINAL_AUTHOR}" -m "${COMMIT_MESSAGE}" -m "${UPSTREAM_REF}" && { [[ -z "$CI" ]] || git push origin "$BRANCH"; } # Only do the actual push from the GitHub Action then git show --pretty= --src-prefix="a/$GIT_SLUG/" --dst-prefix="b/$GIT_SLUG/" >> "$BUILD_BASE/changes.diff"