From 93774197773f9dce2aacb1fb8583e19cc18122db Mon Sep 17 00:00:00 2001 From: Hatem Nawar Date: Tue, 24 Oct 2023 13:32:55 +0100 Subject: [PATCH] Initial commit to update Cromwell to v86 and Batch API Signed-off-by: Hatem Nawar --- modules/genomics_cromwell/budget.tf | 8 +-- modules/genomics_cromwell/main.tf | 6 +- .../scripts/build/cromwell.conf | 58 ++++--------------- modules/genomics_cromwell/variables.tf | 13 ++--- 4 files changed, 22 insertions(+), 63 deletions(-) diff --git a/modules/genomics_cromwell/budget.tf b/modules/genomics_cromwell/budget.tf index dc8b42f0..35b1c4bd 100644 --- a/modules/genomics_cromwell/budget.tf +++ b/modules/genomics_cromwell/budget.tf @@ -19,15 +19,15 @@ locals { } resource "google_monitoring_notification_channel" "email_notif" { - count = var.create_budget ? length(local.emails) : 0 + count = var.create_budget ? length(local.emails) : 0 display_name = "Billing Budget Notification Channel - ${element(local.emails, count.index)}" project = local.project.project_id type = "email" - labels = { + labels = { email_address = "${element(local.emails, count.index)}" } - - depends_on = [ + + depends_on = [ time_sleep.wait_120_seconds ] } diff --git a/modules/genomics_cromwell/main.tf b/modules/genomics_cromwell/main.tf index b6acc4fd..f8961295 100644 --- a/modules/genomics_cromwell/main.tf +++ b/modules/genomics_cromwell/main.tf @@ -114,10 +114,10 @@ resource "google_storage_bucket_object" "config" { content = templatefile("scripts/build/cromwell.conf", { CROMWELL_PROJECT = local.project.project_id, CROMWELL_ROOT_BUCKET = google_storage_bucket.cromwell_workflow_bucket.url, - CROMWELL_VPC = var.network_name + CROMWELL_VPC = var.network_name, + CROMWELL_REGION = var.region, CROMWELL_SERVICE_ACCOUNT = module.cromwell_service_account.email, - CROMWELL_PAPI_LOCATION = var.cromwell_PAPI_location, - CROMWELL_PAPI_ENDPOINT = var.cromwell_PAPI_endpoint, + CROMWELL_BATCH_LOCATION = var.cromwell_batch_location, REQUESTER_PAY_PROJECT = local.project.project_id, CROMWELL_ZONES = "[${join(", ", var.cromwell_zones)}]" CROMWELL_PORT = var.cromwell_port, diff --git a/modules/genomics_cromwell/scripts/build/cromwell.conf b/modules/genomics_cromwell/scripts/build/cromwell.conf index 9f37190e..1cfd52d5 100644 --- a/modules/genomics_cromwell/scripts/build/cromwell.conf +++ b/modules/genomics_cromwell/scripts/build/cromwell.conf @@ -40,12 +40,12 @@ engine { } backend { - default = PAPIv2 + default = GCPBATCH providers { - PAPIv2 { - actor-factory = "cromwell.backend.google.pipelines.v2beta.PipelinesApiLifecycleActorFactory" + GCPBATCH { + actor-factory = "cromwell.backend.google.batch.GcpBatchBackendLifecycleActorFactory" config { # Google project project = "${CROMWELL_PROJECT}" @@ -53,21 +53,10 @@ backend { # Base bucket for workflow executions root = "${CROMWELL_ROOT_BUCKET}" - # Make the name of the backend used for call caching purposes insensitive to the PAPI version. - name-for-call-caching-purposes: PAPI - # Emit a warning if jobs last longer than this amount of time. This might indicate that something got stuck in PAPI. + # Emit a warning if jobs last longer than this amount of time. This might indicate that something got stuck in Batch API. slow-job-warning-time: 24 hours - # Set this to the lower of the two values "Queries per 100 seconds" and "Queries per 100 seconds per user" for - # your project. - # - # Used to help determine maximum throughput to the Google Genomics API. Setting this value too low will - # cause a drop in performance. Setting this value too high will cause QPS based locks from Google. - # 1000 is the default "Queries per 100 seconds per user", 50000 is the default "Queries per 100 seconds" - # See https://cloud.google.com/genomics/quotas for more information - genomics-api-queries-per-100-seconds = 1000 - # Polling for completion backs-off gradually for slower-running jobs. # This is the maximum polling interval (in seconds): maximum-polling-interval = 600 @@ -84,8 +73,8 @@ backend { # Optional configuration to use high security network (Virtual Private Cloud) for running jobs. # See https://cromwell.readthedocs.io/en/stable/backends/Google/ for more details. virtual-private-cloud { - network-name = "${CROMWELL_VPC}" - subnetwork-name = "${CROMWELL_VPC}" + network-name = "projects/${CROMWELL_PROJECT}/global/networks/${CROMWELL_VPC}" + subnetwork-name = "regions/${CROMWELL_REGION}/subnetworks/${CROMWELL_VPC}" } # Global pipeline timeout # Defaults to 7 days; max 30 days @@ -95,31 +84,22 @@ backend { # A reference to an auth defined in the `google` stanza at the top. This auth is used to create # Pipelines and manipulate auth JSONs. auth = "application-default" - + + # Currently Cloud Life Sciences API is available now in more locations for the full list check + # https://cloud.google.com/life-sciences/docs/concepts/locations + location = "${CROMWELL_BATCH_LOCATION}" // alternative service account to use on the launched compute instance // NOTE: If combined with service account authorization, both that serivce account and this service account // must be able to read and write to the 'root' GCS path compute-service-account = "${CROMWELL_SERVICE_ACCOUNT}" + - # Currently Cloud Life Sciences API is available now in more locations for the full list check - # https://cloud.google.com/life-sciences/docs/concepts/locations - location = "${CROMWELL_PAPI_LOCATION}" - - # Endpoint for APIs. For locations other than us-central1, the endpoint needs to be updated to match the location - # For example for "europe-west4" location the endpoint-url should be "https://europe-west4-lifesciences.googleapis.com" - endpoint-url = "${CROMWELL_PAPI_ENDPOINT}" - # Restrict access to VM metadata. Useful in cases when untrusted containers are running under a service # account not owned by the submitting user restrict-metadata-access = false - # Pipelines v2 only: specify the number of times localization and delocalization operations should be attempted - # There is no logic to determine if the error was transient or not, everything is retried upon failure - # Defaults to 3 - localization-attempts = 3 - # Specifies the minimum file size for `gsutil cp` to use parallel composite uploads during delocalization. # Parallel composite uploads can result in a significant improvement in delocalization speed for large files # but may introduce complexities in downloading such files from GCS, please see @@ -130,20 +110,6 @@ backend { parallel-composite-upload-threshold="150M" } - # Controls how batched requests to PAPI are handled: - batch-requests { - timeouts { - # Timeout when attempting to connect to PAPI to make requests: - # read = 10 seconds - - # Timeout waiting for batch responses from PAPI: - # - # Note: Try raising this value if you see errors in logs like: - # WARN - PAPI request worker PAPIQueryWorker-[...] terminated. 99 run creation requests, 0 status poll requests, and 0 abort requests will be reconsidered. If any of those succeeded in the cloud before the batch request failed, they might be run twice. - # ERROR - Read timed out - # connect = 10 seconds - } - } filesystems { gcs { # A reference to a potentially different auth for manipulating files via engine functions. @@ -178,8 +144,6 @@ backend { preemptible: 0 zones: ${CROMWELL_ZONES} } - - include "papi_v2_reference_image_manifest.conf" } } } diff --git a/modules/genomics_cromwell/variables.tf b/modules/genomics_cromwell/variables.tf index f82f8160..1a7dd572 100644 --- a/modules/genomics_cromwell/variables.tf +++ b/modules/genomics_cromwell/variables.tf @@ -118,14 +118,9 @@ variable "cromwell_db_tier" { } -variable "cromwell_PAPI_endpoint" { - description = "Endpoint for Life Sciences APIs. For locations other than us-central1, the endpoint needs to be updated to match the location For example for \"europe-west4\" location the endpoint-url should be \"https://europe-west4-lifesciences.googleapi/\". {{UIMeta group=3 order=9 }}" - type = string - default = "https://lifesciences.googleapis.com" -} -variable "cromwell_PAPI_location" { - description = "Google Cloud region or multi-region where the Life Sciences API endpoint will be used. This does not affect where worker instances or data will be stored. {{UIMeta group=3 order=10 }}" +variable "cromwell_batch_location" { + description = "Google Cloud region where the Batch API endpoint will be used. This does not affect where worker instances or data will be stored. {{UIMeta group=3 order=10 }}" type = string default = "us-central1" } @@ -157,14 +152,14 @@ variable "cromwell_server_instance_type" { variable "cromwell_version" { description = "Cromwell version that will be downloaded, for the latest release version, please check https://github.com/broadinstitute/cromwell/releases for the latest releases. {{UIMeta group=3 order=6 }}" type = string - default = "72" + default = "86" } variable "cromwell_zones" { description = "GCP Zones that will be set as the default runtime in Cromwell config file. {{UIMeta group=3 order=7 }}" type = list(any) - default = ["us-central1-a", "us-central1-b"] + default = ["us-central1-a"] } variable "db_service_network_cidr_range" {