testing pr - Np jw test illumina genotyping arrays #312
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Test Illumina Genotyping Array | |
# Controls when the workflow will run | |
on: | |
#run on push to feature branch "kp_GHA_Terra_auth_PD-2682" - REMOVE WHEN DONE TESTING | |
# push: | |
# branches: | |
# - kp_GHA_Terra_auth_PD-2682 | |
pull_request: | |
branches: [ "develop", "staging", "master" ] | |
# Only run if files in these paths changed: pipelines/broad/genotyping/illumina, tasks, verification, .github/workflows/test_illumina_genotyping_array.yml | |
#################################### | |
# SET PIPELINE SPECIFIC PATHS HERE # | |
#################################### | |
paths: | |
- 'pipelines/broad/genotyping/illumina/**' | |
- 'tasks/**' | |
- 'verification/**' | |
- '.github/workflows/test_illumina_genotyping_array.yml' | |
# Allows you to run this workflow manually from the Actions tab | |
workflow_dispatch: | |
inputs: | |
useCallCache: | |
description: 'Use call cache (default: true)' | |
required: false | |
default: "true" | |
updateTruth: | |
description: 'Update truth files (default: false)' | |
required: false | |
default: "false" | |
testType: | |
description: 'Specify the type of test (Plumbing or Scientific)' | |
required: true | |
truthBranch: | |
description: 'Specify the branch for truth files (default: master)' | |
required: false | |
default: "master" | |
env: | |
PROJECT_NAME: WARP | |
# Github repo name | |
REPOSITORY_NAME: ${{ github.event.repository.name }} | |
jobs: | |
run_pipeline: | |
runs-on: ubuntu-latest | |
# Add "id-token" with the intended permissions. | |
permissions: | |
contents: 'read' | |
id-token: 'write' | |
steps: | |
# actions/checkout MUST come before auth | |
- uses: actions/checkout@v3 | |
with: | |
ref: ${{ github.ref }} | |
- id: 'auth' | |
name: 'Authenticate to Google Cloud' | |
uses: 'google-github-actions/auth@v2' | |
with: | |
token_format: 'access_token' | |
# Centralized in dsp-tools-k8s; ask in #dsp-devops-champions for help troubleshooting | |
# This is provided by the DevOps team - do not change! | |
workload_identity_provider: 'projects/1038484894585/locations/global/workloadIdentityPools/github-wi-pool/providers/github-wi-provider' | |
# This is our tester service account | |
service_account: '[email protected]' | |
access_token_lifetime: '3600' # seconds, default is 3600 | |
access_token_scopes: 'profile, email, openid' | |
# Set the branch name. | |
# github.head_ref contains the name of the branch in the context of a pull request | |
# if github.head_ref is empty, it implies the workflow was triggered manually | |
# ${GITHUB_REF##*/} extracts the branch name from GITHUB_REF. | |
# The ##*/ is a parameter expansion that removes the refs/heads/ prefix, leaving just the branch name. | |
- name: Set Branch Name | |
id: set_branch | |
run: | | |
if [ -z "${{ github.head_ref }}" ]; then | |
echo "Branch name is missing, using ${GITHUB_REF##*/}" | |
echo "branch_name=${GITHUB_REF##*/}" >> $GITHUB_ENV | |
else | |
echo "Branch name from PR: ${{ github.head_ref }}" | |
echo "branch_name=${{ github.head_ref }}" >> $GITHUB_ENV | |
fi | |
- name: Set Test Type for PRs | |
if: ${{ github.event_name == 'pull_request' }} | |
id: set_test_type | |
run: | | |
# Default to "Scientific" if targeting master | |
if [ "${{ github.base_ref }}" == "master" ]; then | |
echo "testType=Scientific" >> $GITHUB_ENV | |
else | |
echo "testType=Plumbing" >> $GITHUB_ENV | |
fi | |
- name: Use Provided Test Type | |
if: ${{ github.event_name == 'workflow_dispatch' }} | |
id: use_provided_test_type | |
run: | | |
# Use the testType provided by the user | |
echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV | |
- name: Submit job, poll status, and get outputs | |
id: pipeline_run | |
run: | | |
# Set common environment variables | |
TOKEN="${{ steps.auth.outputs.access_token }}" | |
NAMESPACE="warp-pipelines" | |
WORKSPACE="WARP Tests" | |
USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" | |
UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" | |
#TEST_TYPE="${{ github.event.inputs.testType || 'Plumbing' }}" | |
TEST_TYPE="${{ env.testType }}" | |
TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" | |
CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") | |
echo "truth branch: $TRUTH_BRANCH" | |
######################################## | |
# SET PIPELINE SPECIFIC VARIABLES HERE # | |
######################################## | |
PIPELINE_NAME="TestIlluminaGenotypingArray" | |
PIPELINE_DIR="pipelines/broad/genotyping/illumina" | |
# TODO: Need to set the truth and result paths appropriately | |
# TODO: Need to dynamically set the truth branch, for now it is hardcoded to master branch | |
# We may want to keep the truth and resuts buckets separate for TTL reasons | |
TRUTH_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" | |
RESULTS_PATH="gs://broad-gotc-test-storage/IlluminaGenotypingArray/results/$CURRENT_TIME" | |
# Function to call the Firecloud API using the firecloud_api.py script | |
firecloud_action() { | |
python3 scripts/firecloud_api/firecloud_api.py --token "$TOKEN" --namespace "$NAMESPACE" --workspace "$WORKSPACE" --action "$1" "${@:2}" | |
} | |
# Convert USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) | |
if [ "$USE_CALL_CACHE" == "true" ]; then | |
USE_CALL_CACHE_BOOL=true | |
else | |
USE_CALL_CACHE_BOOL=false | |
fi | |
# Convert UPDATE_TRUTH to a boolean-friendly format ("true" -> true, "false" -> false) | |
if [ "$UPDATE_TRUTH" = "true" ]; then | |
UPDATE_TRUTH_BOOL=true | |
else | |
UPDATE_TRUTH_BOOL=false | |
fi | |
# Create the submission_data.json file which will be the same for all inputs | |
SUBMISSION_DATA_FILE="submission_data.json" | |
# Use a heredoc to generate the JSON file content dynamically | |
cat <<EOF > "$SUBMISSION_DATA_FILE" | |
{ | |
"methodConfigurationNamespace": "warp-pipelines", | |
"methodConfigurationName": "$PIPELINE_NAME", | |
"useCallCache": $USE_CALL_CACHE_BOOL, | |
"deleteIntermediateOutputFiles": false, | |
"useReferenceDisks": true, | |
"memoryRetryMultiplier": 1.2, | |
"workflowFailureMode": "NoNewCalls", | |
"userComment": "Automated submission", | |
"ignoreEmptyOutputs": false | |
} | |
EOF | |
echo "Created submission data file: $SUBMISSION_DATA_FILE" | |
# Initialize variables to aggregate statuses and outputs | |
ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" | |
ALL_OUTPUTS="" | |
# Initialize arrays to track submission and workflow statuses | |
declare -a SUBMISSION_IDS | |
declare -A WORKFLOW_STATUSES | |
# Loop through each file in the appropriate test inputs directory | |
INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" | |
echo "Running tests with test type: $TEST_TYPE" | |
MAX_RETRIES=2 | |
RETRY_DELAY=300 # 300 seconds = 5 minutes | |
for input_file in "$INPUTS_DIR"/*.json; do | |
echo "Processing input file: $input_file" | |
test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ | |
--results_path "$RESULTS_PATH" \ | |
--inputs_json "$input_file" \ | |
--update_truth "$UPDATE_TRUTH_BOOL") | |
echo "Uploading the test input file: $test_input_file" | |
echo "Branch name: $branch_name" | |
firecloud_action upload_test_inputs --pipeline_name $PIPELINE_NAME --test_input_file "$test_input_file" --branch_name $branch_name | |
attempt=1 | |
while [ $attempt -le $MAX_RETRIES ]; do | |
echo "Attempt $attempt: Submitting job for input file: $input_file" | |
#echo "Submitting job for input file: $input_file" | |
cat "$SUBMISSION_DATA_FILE" | |
SUBMISSION_ID=$(firecloud_action submit --submission_data_file "$SUBMISSION_DATA_FILE") | |
if [[ "$SUBMISSION_ID" == *"404"* ]]; then | |
echo "Error: Dockstore method not found. Retrying in $RETRY_DELAY seconds..." | |
sleep $RETRY_DELAY | |
((attempt++)) | |
elif [ -z "$SUBMISSION_ID" ]; then | |
echo "Submission failed for input file: $input_file. No submission ID received." | |
break | |
else | |
echo "Submission successful. Submission ID: $SUBMISSION_ID" | |
SUBMISSION_IDS+=("$SUBMISSION_ID") | |
break | |
fi | |
if [ $attempt -gt $MAX_RETRIES ]; then | |
echo "Max retries reached. Exiting..." | |
fi | |
done | |
done | |
#echo "Submission ID: $SUBMISSION_ID" | |
#SUBMISSION_IDS+=("$SUBMISSION_ID") | |
echo "Monitoring the status of submitted workflows..." | |
for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do | |
echo "Polling submission status for Submission ID: $SUBMISSION_ID" | |
RESPONSE=$(firecloud_action poll_status --submission_id "$SUBMISSION_ID") | |
if [ -z "$RESPONSE" ]; then | |
echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" | |
continue | |
fi | |
# Parse and store workflow statuses | |
WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') | |
echo "Statuses for submission $SUBMISSION_ID:" | |
echo "$WORKFLOW_STATUSES_FOR_SUBMISSION" | |
# Append to aggregate statuses | |
WORKFLOW_STATUSES["$SUBMISSION_ID"]=$WORKFLOW_STATUSES_FOR_SUBMISSION | |
# retrieve workflow outputs | |
echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." | |
for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do | |
WORKFLOW_OUTPUT=$(firecloud_action get_outputs --submission_id "$SUBMISSION_ID" --workflow_id "$WORKFLOW_ID" --pipeline_name "$PIPELINE_NAME") | |
ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' | |
done | |
done | |
# Generate final summary tables with hyperlinks for Submission IDs | |
echo "## Combined Workflow Statuses" >> $GITHUB_STEP_SUMMARY | |
for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do | |
# Generate the Terra URL for the submission | |
SUBMISSION_URL="https://app.terra.bio/#workspaces/$NAMESPACE/${WORKSPACE// /%20}/job_history/$SUBMISSION_ID" | |
# Add the Submission ID as a hyperlink | |
echo "[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)" >> $GITHUB_STEP_SUMMARY | |
# Add the workflows and statuses for this submission | |
echo "${WORKFLOW_STATUSES[$SUBMISSION_ID]}" >> $GITHUB_STEP_SUMMARY | |
# Add a blank line for separation | |
echo "" >> $GITHUB_STEP_SUMMARY | |
done | |
- name: Print Summary on Success | |
if: success() | |
run: | | |
echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY | |
- name: Print Summary on Failure | |
if: failure() | |
run: | | |
echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY |