diff --git a/.github/workflows/aws_integration_test.yml b/.github/workflows/aws_integration_test.yml new file mode 100644 index 00000000..e8e5acb2 --- /dev/null +++ b/.github/workflows/aws_integration_test.yml @@ -0,0 +1,154 @@ +name: AWS Conformance Test + +on: + push: + branches: + - main + +# This prevents two workflows from running at the same time. +# This workflows calls terragrunt, which does not allow concurrent runs. +concurrency: + group: aws-conformance + cancel-in-progress: false + +permissions: + contents: read + +env: + TF_VERSION: "1.10.0" + TG_VERSION: "0.67.0" + TG_DIR: "deployment/live/aws/conformance/ci/" + TESSERA_PREFIX_NAME: trillian-tessera + ECR_REGISTRY: 864981736166.dkr.ecr.us-east-1.amazonaws.com + ECR_REPOSITORY_CONFORMANCE: trillian-tessera/conformance:latest + ECR_REPOSITORY_HAMMER: trillian-tessera/hammer:latest + AWS_REGION: us-east-1 + +jobs: + aws-integration: + runs-on: ubuntu-latest + + steps: + ## Authenticate to AWS with the credentials stored in Github Secrets. + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + # TODO(phboneff): use a better form of authentication + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + + - name: Checkout code + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + ## Authenticate with ECR to push the conformance and hammer images. + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 + + ## Build the conformance image and push it to ECR. This will be used + ## later on by Terragrunt. + - name: Build, tag, and push Conformance image to Amazon ECR + id: build-publish-conformance + shell: bash + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + ECR_REPOSITORY: ${{ env.ECR_REPOSITORY_CONFORMANCE }} + run: | + docker build -f ./cmd/conformance/aws/Dockerfile . -t "$ECR_REGISTRY/$ECR_REPOSITORY" + docker push "$ECR_REGISTRY/$ECR_REPOSITORY" + echo "Pushed image to $ECR_REGISTRY/$ECR_REPOSITORY" + + ## Build the hammer image and push it to ECR. This will be used + ## later on by Terragrunt. + - name: Build, tag, and push Hammer image to Amazon ECR + id: build-publish-hammer + shell: bash + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + ECR_REPOSITORY: ${{ env.ECR_REPOSITORY_HAMMER }} + run: | + docker build -f ./internal/hammer/Dockerfile . -t "$ECR_REGISTRY/$ECR_REPOSITORY" + docker push "$ECR_REGISTRY/$ECR_REPOSITORY" + echo "Pushed image to $ECR_REGISTRY/$ECR_REPOSITORY" + + ## Destroy any pre-existing deployment/live/aws/conformance/ci env. + ## This might happen if a previous integration test workflow has failed. + - name: Terragrunt destroy pre conformance test + id: terragrunt-destroy-pre + uses: gruntwork-io/terragrunt-action@v2 + with: + tf_version: ${{ env.TF_VERSION }} + tg_version: ${{ env.TG_VERSION }} + tg_dir: ${{ env.TG_DIR }} + tg_command: "destroy" + env: + TESSERA_SIGNER: unused + TESSERA_VERIFIER: unused + + ## Generate a new keys for the log to use, and export them to environment + ## variables for Terragrunt to use. + - name: Generate Tessera keys + id: generate-keys + shell: bash + run: | + go run github.com/transparency-dev/serverless-log/cmd/generate_keys@80334bc9dc573e8f6c5b3694efad6358da50abd4 \ + --key_name=tessera/test/conformance \ + --out_priv=${{ runner.temp }}/key.sec \ + --out_pub=${{ runner.temp }}/key.pub + cat ${{ runner.temp }}/key.pub + echo "TESSERA_SIGNER=$(cat ${{ runner.temp }}/key.sec)" >> "$GITHUB_ENV" + echo "TESSERA_VERIFIER=$(cat ${{ runner.temp }}/key.pub)" >> "$GITHUB_ENV" + + ## Apply the deployment/live/aws/conformance/ci terragrunt config. + ## This will bring up the conformance infrastructure which consists of: + ## - the storage module + ## - a private S3 <--> ECS network link for the hammer to read the log + ## - an ECS cluster to run Fargate tasks + ## - a conformance service, with multiple conformance binary instances + ## - a hammer task definition (but no execution) + # TODO(phboneff): AuroraDB takes a long time to be brought up and down + # consider keeping it around between tests / using Aurora Serveless + - name: Terragrunt apply + id: terragrunt-apply + uses: gruntwork-io/terragrunt-action@v2 + with: + tf_version: ${{ env.TF_VERSION }} + tg_version: ${{ env.TG_VERSION }} + tg_dir: ${{ env.TG_DIR }} + tg_command: "apply" + env: + INPUT_POST_EXEC_1: | + echo "ECS_CLUSTER=$(terragrunt output -raw ecs_cluster)" >> "$GITHUB_ENV" + INPUT_POST_EXEC_2: | + echo "VPC_SUBNETS=$(terragrunt output -json vpc_subnets)" >> "$GITHUB_ENV" + + ## Now we can run the hammer using the task definition, against the + ## conformance service. This step returns the hammer task's exit code. + - name: Run Hammer + id: hammer + shell: bash + run: | + cat ${{ runner.temp }}/key.pub + echo "Will launch a hammer ECS task." + HAMMER_ARN=$(aws ecs run-task \ + --cluster="$ECS_CLUSTER" \ + --task-definition=hammer \ + --count=1 \ + --launch-type=FARGATE \ + --network-configuration='{"awsvpcConfiguration": {"assignPublicIp":"ENABLED","subnets": '$VPC_SUBNETS'}}' \ + --query 'tasks[0].taskArn') + echo "Hammer task running, ARN: $HAMMER_ARN." + echo "Waiting for task to stop..." + aws ecs wait tasks-stopped --cluster="$ECS_CLUSTER" --tasks=[$HAMMER_ARN] + echo "The task has stopped. Fetching exit code and returning this action with it." + exit $(aws ecs describe-tasks --cluster="$ECS_CLUSTER" --tasks=[$HAMMER_ARN] --query 'tasks[0].containers[0].exitCode') + + - name: Terragrunt destroy post conformance test + id: terragrunt-destroy-post + uses: gruntwork-io/terragrunt-action@v2 + with: + tf_version: ${{ env.TF_VERSION }} + tg_version: ${{ env.TG_VERSION }} + tg_dir: ${{ env.TG_DIR }} + tg_command: "destroy" diff --git a/deployment/live/aws/conformance/ci/terragrunt.hcl b/deployment/live/aws/conformance/ci/terragrunt.hcl index 80c48bfa..19cbfa39 100644 --- a/deployment/live/aws/conformance/ci/terragrunt.hcl +++ b/deployment/live/aws/conformance/ci/terragrunt.hcl @@ -1,7 +1,3 @@ -terraform { - source = "${get_repo_root()}/deployment/modules/aws//storage" -} - include "root" { path = find_in_parent_folders() expose = true diff --git a/deployment/live/aws/conformance/terragrunt.hcl b/deployment/live/aws/conformance/terragrunt.hcl index 65b4d5e4..2b2cd3e3 100644 --- a/deployment/live/aws/conformance/terragrunt.hcl +++ b/deployment/live/aws/conformance/terragrunt.hcl @@ -1,15 +1,22 @@ terraform { - source = "${get_repo_root()}/deployment/modules/aws//storage" + source = "${get_repo_root()}/deployment/modules/aws//conformance" } locals { - env = path_relative_to_include() - account_id = "${get_aws_account_id()}" - region = get_env("AWS_REGION", "us-east-1") - profile = get_env("AWS_PROFILE", "default") - base_name = get_env("TESSERA_BASE_NAME", "${local.env}-conformance") - prefix_name = get_env("TESSERA_PREFIX_NAME", "trillian-tessera") - ephemeral = true + env = path_relative_to_include() + account_id = "${get_aws_account_id()}" + region = get_env("AWS_REGION", "us-east-1") + base_name = get_env("TESSERA_BASE_NAME", "${local.env}-conformance") + prefix_name = get_env("TESSERA_PREFIX_NAME", "trillian-tessera") + ecr_registry = get_env("ECR_REGISTRY", "${local.account_id}.dkr.ecr.${local.region}.amazonaws.com") + ecr_repository_conformance = get_env("ECR_REPOSITORY_CONFORMANCE", "trillian-tessera/conformance:latest") + ecr_repository_hammer = get_env("ECR_REPOSITORY_HAMMER", "trillian-tessera/hammer:latest") + signer = get_env("TESSERA_SIGNER") + verifier = get_env("TESSERA_VERIFIER") + # Roles are defined externally + ecs_execution_role = "arn:aws:iam::864981736166:role/ecsTaskExecutionRole" + ecs_conformance_task_role = "arn:aws:iam::864981736166:role/ConformanceECSTaskRolePolicy" + ephemeral = true } remote_state { @@ -17,7 +24,6 @@ remote_state { config = { region = local.region - profile = local.profile bucket = "${local.prefix_name}-${local.base_name}-terraform-state" key = "${local.env}/terraform.tfstate" dynamodb_table = "${local.prefix_name}-${local.base_name}-terraform-lock" diff --git a/deployment/modules/aws/conformance/main.tf b/deployment/modules/aws/conformance/main.tf new file mode 100644 index 00000000..a31c6509 --- /dev/null +++ b/deployment/modules/aws/conformance/main.tf @@ -0,0 +1,274 @@ +# Header ###################################################################### +terraform { + backend "s3" {} + required_providers { + aws = { + source = "hashicorp/aws" + version = "5.76.0" + } + } +} + +locals { + name = "${var.prefix_name}-${var.base_name}" + port = 2024 +} + +provider "aws" { + region = var.region +} + +module "storage" { + source = "../storage" + + prefix_name = var.prefix_name + base_name = var.base_name + region = var.region + ephemeral = true +} + +# Resources #################################################################### +## ECS cluster ################################################################# +# This will be used to run the conformance and hammer binaries on Fargate. +resource "aws_ecs_cluster" "ecs_cluster" { + name = "${local.name}" +} + +resource "aws_ecs_cluster_capacity_providers" "ecs_capacity" { + cluster_name = aws_ecs_cluster.ecs_cluster.name + + capacity_providers = ["FARGATE"] +} + +## Virtual private network ##################################################### +# This will be used for the containers to communicate between themselves, and +# the S3 bucket. +resource "aws_default_vpc" "default" { + tags = { + Name = "Default VPC" + } +} + +data "aws_subnets" "subnets" { + filter { + name = "vpc-id" + values = [aws_default_vpc.default.id] + } +} + +## Service discovery ########################################################### +# This will by the hammer to contact multiple conformance tasks with a single +# dns name. +resource "aws_service_discovery_private_dns_namespace" "internal" { + name = "internal" + vpc = aws_default_vpc.default.id +} + +resource "aws_service_discovery_service" "conformance_discovery" { + name = "conformance-discovery" + + dns_config { + namespace_id = aws_service_discovery_private_dns_namespace.internal.id + + dns_records { + ttl = 10 + type = "A" + } + + // TODO(phboneff): make sure that the hammer uses multiple IPs + // otherwise, set a low TTL and use WEIGHTED. + routing_policy = "MULTIVALUE" + } + + health_check_custom_config { + failure_threshold = 1 + } +} + +## Connect S3 bucket to VPC #################################################### +# This allows the hammer to talk to a non public S3 bucket over HTTP. +resource "aws_vpc_endpoint" "s3" { + vpc_id = aws_default_vpc.default.id + service_name = "com.amazonaws.${var.region}.s3" +} + + +resource "aws_vpc_endpoint_route_table_association" "private_s3" { + vpc_endpoint_id = aws_vpc_endpoint.s3.id + route_table_id = aws_default_vpc.default.default_route_table_id +} + +resource "aws_s3_bucket_policy" "allow_access_from_vpce" { + bucket = module.storage.log_bucket.id + policy = data.aws_iam_policy_document.allow_access_from_vpce.json +} + +data "aws_iam_policy_document" "allow_access_from_vpce" { + statement { + principals { + type = "*" + identifiers = ["*"] + } + + actions = [ + "s3:GetObject", + ] + + resources = [ + "${module.storage.log_bucket.arn}/*", + ] + + condition { + test = "StringEquals" + variable = "aws:sourceVpce" + values = [aws_vpc_endpoint.s3.id] + } + } + depends_on = [aws_vpc_endpoint.s3] +} + +## Conformance task and service ################################################ +# This will start multiple conformance tasks on Fargate within a service. +resource "aws_ecs_task_definition" "conformance" { + family = "conformance" + requires_compatibilities = ["FARGATE"] + # Required network_mode for tasks running on Fargate. + network_mode = "awsvpc" + cpu = 1024 + memory = 2048 + execution_role_arn = var.ecs_execution_role + # We need a special role that has access to S3. + task_role_arn = var.ecs_conformance_task_role + container_definitions = jsonencode([{ + "name": "${local.name}-conformance", + "image": "${var.ecr_registry}/${var.ecr_repository_conformance}", + "cpu": 0, + "portMappings": [{ + "name": "conformance-${local.port}-tcp", + "containerPort": local.port, + "hostPort": local.port, + "protocol": "tcp", + "appProtocol": "http" + }], + "essential": true, + "command": [ + "--signer=${var.signer}", + "--bucket=${module.storage.log_bucket.id}", + "--db_user=root", + "--db_password=password", + "--db_name=tessera", + "--db_host=${module.storage.log_rds_db.endpoint}", + "-v=2" + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/${local.name}", + "mode": "non-blocking", + "awslogs-create-group": "true", + "max-buffer-size": "25m", + "awslogs-region": "us-east-1", + "awslogs-stream-prefix": "ecs" + }, + }, + }]) + + runtime_platform { + operating_system_family = "LINUX" + cpu_architecture = "X86_64" + } + + depends_on = [module.storage] +} + +resource "aws_ecs_service" "conformance_service" { + name = "${local.name}" + task_definition = aws_ecs_task_definition.conformance.arn + cluster = aws_ecs_cluster.ecs_cluster.arn + launch_type = "FARGATE" + desired_count = 3 + wait_for_steady_state = true + + network_configuration { + subnets = data.aws_subnets.subnets.ids + # required to access container registry + assign_public_ip = true + } + + # connect the service with the service discovery defined above + service_registries { + registry_arn = aws_service_discovery_service.conformance_discovery.arn + } + + depends_on = [ + aws_service_discovery_private_dns_namespace.internal, + aws_service_discovery_service.conformance_discovery, + aws_ecs_cluster.ecs_cluster, + aws_ecs_task_definition.conformance, + ] +} + +## Hammer task definition and execution ######################################## +# The hammer can also be launched manually with the following command: +# aws ecs run-task \ +# --cluster="$(terragrunt output -raw ecs_cluster)" \ +# --task-definition=hammer \ +# --count=1 \ +# --launch-type=FARGATE \ +# --network-configuration='{"awsvpcConfiguration": {"assignPublicIp":"ENABLED","subnets": '$(terragrunt output -json vpc_subnets)'}}' + +resource "aws_ecs_task_definition" "hammer" { + family = "hammer" + requires_compatibilities = ["FARGATE"] + # Required network_mode for tasks running on Fargate + network_mode = "awsvpc" + cpu = 1024 + memory = 2048 + execution_role_arn = var.ecs_execution_role + container_definitions = jsonencode([{ + "name": "${local.name}-hammer", + "image": "${var.ecr_registry}/${var.ecr_repository_hammer}", + "cpu": 0, + "portMappings": [{ + "name": "hammer-80-tcp", + "containerPort": 80, + "hostPort": 80, + "protocol": "tcp", + "appProtocol": "http" + }], + "essential": true, + "command": [ + "--log_public_key=${var.verifier}", + "--log_url=https://${module.storage.log_bucket.bucket_regional_domain_name}", + "--write_log_url=http://${aws_service_discovery_service.conformance_discovery.name}.${aws_service_discovery_private_dns_namespace.internal.name}:${local.port}", + "-v=3", + "--show_ui=false", + "--logtostderr", + "--num_writers=1100", + "--max_write_ops=1500", + "--leaf_min_size=1024", + "--leaf_write_goal=50000" + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/${local.name}-hammer", + "mode": "non-blocking", + "awslogs-create-group": "true", + "max-buffer-size": "25m", + "awslogs-region": "us-east-1", + "awslogs-stream-prefix": "ecs" + }, + }, + }]) + + runtime_platform { + operating_system_family = "LINUX" + cpu_architecture = "X86_64" + } + + depends_on = [ + module.storage, + aws_ecs_cluster.ecs_cluster, + ] +} diff --git a/deployment/modules/aws/conformance/outputs.tf b/deployment/modules/aws/conformance/outputs.tf new file mode 100644 index 00000000..e3b45d3d --- /dev/null +++ b/deployment/modules/aws/conformance/outputs.tf @@ -0,0 +1,9 @@ +output "ecs_cluster" { + description = "ECS cluster name" + value = aws_ecs_cluster.ecs_cluster.id +} + +output "vpc_subnets" { + description = "VPC subnets list" + value = data.aws_subnets.subnets.ids +} diff --git a/deployment/modules/aws/conformance/variables.tf b/deployment/modules/aws/conformance/variables.tf new file mode 100644 index 00000000..56d5ec71 --- /dev/null +++ b/deployment/modules/aws/conformance/variables.tf @@ -0,0 +1,54 @@ +variable "prefix_name" { + description = "Common prefix to use when naming resources, ensures unicity of the s3 bucket name." + type = string +} + +variable "base_name" { + description = "Common name to use when naming resources." + type = string +} + +variable "region" { + description = "Region in which to create resources." + type = string +} + +variable "ephemeral" { + description = "Set to true if this is a throwaway/temporary log instance. Will set attributes on created resources to allow them to be disabled/deleted more easily." + type = bool +} + +variable "ecr_registry" { + description = "Container registry address, with the conformance and hammer repositories." + type = string +} + +variable "ecr_repository_conformance" { + description = "Container repository for the conformance binary, with the tag." + type = string +} + +variable "ecr_repository_hammer" { + description = "Container repository for the hammer binary, with the tag." + type = string +} + +variable "signer" { + description = "The note signer which used to sign checkpoints." + type = string +} + +variable "verifier" { + description = "The note verifier used to verify checkpoints." + type = string +} + +variable "ecs_execution_role" { + description = "Role used to run the ECS task." + type = string +} + +variable "ecs_conformance_task_role" { + description = "Role assumed by conformance containers when they run." + type = string +}