diff --git a/catalogs/data/airbyte/README.md b/catalogs/data/airbyte/README.md index 41dbfc5e..d273998b 100644 --- a/catalogs/data/airbyte/README.md +++ b/catalogs/data/airbyte/README.md @@ -6,7 +6,7 @@ This is a baseline, prod ready airbyte installation using Plural. It includes a * RDS/Google Cloud Sql, Azure Flexible Server to handle postgres. This gives you a robust RDBMS service to hold airbyte's core transactional data. * Plural OIDC to handle authentication to Airbyte. Airbyte does not support this natively, and so we use oauth-proxy as a middleware to handle authentication. -In addtion, there are a few common customizations you might want to do. +In addition, there are a few common customizations you might want to do. ## Configure Basic Auth @@ -80,4 +80,4 @@ when building your application. ## Contributing -If there are any features or documentation you'd like to add to this setup, please feel free to contribute back at https://github.com/pluralsh/scaffolds \ No newline at end of file +If there are any features or documentation you'd like to add to this setup, please feel free to contribute back at https://github.com/pluralsh/scaffolds diff --git a/catalogs/data/airbyte/airbyte-servicedeployment.yaml b/catalogs/data/airbyte/airbyte-servicedeployment.yaml index 37b69926..94ee5553 100644 --- a/catalogs/data/airbyte/airbyte-servicedeployment.yaml +++ b/catalogs/data/airbyte/airbyte-servicedeployment.yaml @@ -28,6 +28,7 @@ spec: cluster: {{ context.cluster }} hostname: {{ context.hostname }} bucket: {{ context.bucket }} + region: {{ context.region }} clusterRef: kind: Cluster name: {{ context.cluster }} diff --git a/catalogs/data/airbyte/helm/airbyte.yaml.liquid b/catalogs/data/airbyte/helm/airbyte.yaml.liquid index 4051f35e..c4dfff80 100644 --- a/catalogs/data/airbyte/helm/airbyte.yaml.liquid +++ b/catalogs/data/airbyte/helm/airbyte.yaml.liquid @@ -11,7 +11,7 @@ global: type: S3 storageSecretName: airbyte-airbyte-secrets s3: - region: us-east-2 + region: {{ configuration.region }} authenticationType: credentials accessKeyId: {{ imports[imports_airbyte_key].access_key_id }} accessKeyIdSecretKey: AWS_ACCESS_KEY_ID diff --git a/catalogs/data/mlflow/README.md b/catalogs/data/mlflow/README.md new file mode 100644 index 00000000..929995a0 --- /dev/null +++ b/catalogs/data/mlflow/README.md @@ -0,0 +1,7 @@ +# Mlflow + +This is a baseline, prod-ready Mlflow installation using Plural. + +## Contributing + +If there are any features or documentation you'd like to add to this setup, please feel free to contribute back at https://github.com/pluralsh/scaffolds. diff --git a/catalogs/data/mlflow/helm/mlflow.yaml.liquid b/catalogs/data/mlflow/helm/mlflow.yaml.liquid new file mode 100644 index 00000000..760f7c10 --- /dev/null +++ b/catalogs/data/mlflow/helm/mlflow.yaml.liquid @@ -0,0 +1,43 @@ +{% raw %} +{% assign imports_mlflow_key = 'mlflow-' | append: configuration.cluster %} + +tracking: + enabled: true + podAnnotations: + security.plural.sh/oauth-env-secret: mlflow-proxy-config + podLabels: + security.plural.sh/inject-oauth-sidecar: "true" + auth: + enabled: false + service: + type: ClusterIP + +externalDatabase: + dialectDriver: "postgresql" + host: {{ imports[imports_mlflow_key].postgres_host }} + port: '5432' + user: 'mlflow' + database: 'mlflow' + authDatabase: 'mlflow_auth' + password: {{ imports[imports_airbyte_key].postgres_password }} + +externalS3: + host: {{ configuration.region }}.amazonaws.com + port: 443 + useCredentialsInSecret: true + accessKeyID: {{imports[imports_airbyte_key].access_key_id}} + accessKeySecret: {{imports[imports_airbyte_key].secret_access_key}} + existingSecret: ~ + existingSecretAccessKeyIDKey: AWS_ACCESS_KEY_ID + existingSecretKeySecretKey: AWS_SECRET_ACCESS_KEY + protocol: "https" + bucket: {{ configuration.bucket }} + serveArtifacts: true + +postgresql: + enabled: false + +minio: + enabled: false + +{% endraw %} diff --git a/catalogs/data/mlflow/helm/oauth-proxy-config.yaml.liquid b/catalogs/data/mlflow/helm/oauth-proxy-config.yaml.liquid new file mode 100644 index 00000000..f9897a12 --- /dev/null +++ b/catalogs/data/mlflow/helm/oauth-proxy-config.yaml.liquid @@ -0,0 +1,28 @@ +{% raw %} +{% assign imports_mlflow_key = 'mlflow-' | append: configuration.cluster %} + +service: + name: mlflow-oauth2-proxy + selector: + app.kubernetes.io/instance: mlflow + app.kubernetes.io/name: webapp + +secret: + clientID: {{ imports[imports_mlflow_key].oidc_client_id }} + clientSecret: {{ imports[imports_mlflow_key].oidc_client_secret }} + cookieSecret: {{ imports[imports_mlflow_key].oidc_cookie_secret }} + issuer: https://oidc.plural.sh/ + upstream: http://localhost:8080 + name: mlflow-proxy-config + env: + OAUTH2_PROXY_UPSTREAM_TIMEOUT: '120s' + +{% if configuration["basicAuth"] %} +{% assign basicAuth = configuration["basicAuth"] | from_json %} +users: +{% for user in basicAuth %} + {{ user[0] }}: {{ user[1] }} +{% endfor %} +{% endif %} + +{% endraw %} diff --git a/catalogs/data/mlflow/mlflow-raw-servicedeployment.yaml b/catalogs/data/mlflow/mlflow-raw-servicedeployment.yaml new file mode 100644 index 00000000..9aca6769 --- /dev/null +++ b/catalogs/data/mlflow/mlflow-raw-servicedeployment.yaml @@ -0,0 +1,20 @@ +apiVersion: deployments.plural.sh/v1alpha1 +kind: ServiceDeployment +metadata: + name: mlflow-raw-{{ context.cluster }} + namespace: apps +spec: + namespace: mlflow + git: + folder: services/apps/mlflow + ref: main + repositoryRef: + kind: GitRepository + name: infra + namespace: infra + configuration: + hostname: {{ context.hostname }} + clusterRef: + kind: Cluster + name: {{ context.cluster }} + namespace: infra diff --git a/catalogs/data/mlflow/mlflow-servicedeployment.yaml b/catalogs/data/mlflow/mlflow-servicedeployment.yaml new file mode 100644 index 00000000..ce8a801f --- /dev/null +++ b/catalogs/data/mlflow/mlflow-servicedeployment.yaml @@ -0,0 +1,35 @@ +apiVersion: deployments.plural.sh/v1alpha1 +kind: ServiceDeployment +metadata: + name: mlflow-{{ context.cluster }} + namespace: apps +spec: + namespace: mflow + git: + folder: helm/mlflow/{{ context.cluster }} + ref: main + repositoryRef: + kind: GitRepository + name: infra + namespace: infra + helm: + version: "2.x.x" + chart: mlflow + release: mlflow + ignoreHooks: false + url: https://charts.bitnami.com/bitnami + valuesFiles: + - mlflow.yaml.liquid + imports: + - stackRef: + name: mlflow-{{ context.cluster }} + namespace: apps + configuration: + cluster: {{ context.cluster }} + hostname: {{ context.hostname }} + bucket: {{ context.bucket }} + region: {{ context.region }} + clusterRef: + kind: Cluster + name: {{ context.cluster }} + namespace: infra diff --git a/catalogs/data/mlflow/mlflow-stack.yaml b/catalogs/data/mlflow/mlflow-stack.yaml new file mode 100644 index 00000000..ae6a2e4e --- /dev/null +++ b/catalogs/data/mlflow/mlflow-stack.yaml @@ -0,0 +1,22 @@ +apiVersion: deployments.plural.sh/v1alpha1 +kind: InfrastructureStack +metadata: + name: mlflow-{{ context.cluster }} + namespace: apps +spec: + detach: false + type: TERRAFORM + approval: true + manageState: true + actor: console@plural.sh + git: + ref: main + folder: terraform/apps/mlflow/{{ context.cluster }} + repositoryRef: + name: infra + namespace: infra + configuration: + version: '1.8' + clusterRef: + name: {{ context.cluster }} + namespace: infra diff --git a/catalogs/data/mlflow/oauth-proxy-config-servicedeployment.yaml b/catalogs/data/mlflow/oauth-proxy-config-servicedeployment.yaml new file mode 100644 index 00000000..e6f07ddd --- /dev/null +++ b/catalogs/data/mlflow/oauth-proxy-config-servicedeployment.yaml @@ -0,0 +1,31 @@ +apiVersion: deployments.plural.sh/v1alpha1 +kind: ServiceDeployment +metadata: + name: mlflow-oauth-proxy-config-{{ context.cluster }} + namespace: apps +spec: + namespace: mlflow + git: + folder: helm/mlflow/{{ context.cluster }} + ref: main + repositoryRef: + kind: GitRepository + name: infra + namespace: infra + helm: + version: "x.x.x" + chart: oidc-config + url: https://pluralsh.github.io/module-library + valuesFiles: + - oauth-proxy-config.yaml.liquid + imports: + - stackRef: + name: mlflow-{{ context.cluster }} + namespace: apps + configuration: + cluster: {{ context.cluster }} + hostname: {{ context.hostname }} + clusterRef: + kind: Cluster + name: {{ context.cluster }} + namespace: infra diff --git a/catalogs/data/mlflow/services/oauth-proxy-ingress.yaml.liquid b/catalogs/data/mlflow/services/oauth-proxy-ingress.yaml.liquid new file mode 100644 index 00000000..44eeb700 --- /dev/null +++ b/catalogs/data/mlflow/services/oauth-proxy-ingress.yaml.liquid @@ -0,0 +1,33 @@ +{% raw %} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: mlflow-webapp-proxy + namespace: mlflow + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + kubernetes.io/tls-acme: "true" + # Extend timeout to allow long running queries. + nginx.ingress.kubernetes.io/proxy-connect-timeout: "300" + nginx.ingress.kubernetes.io/proxy-read-timeout: "300" + nginx.ingress.kubernetes.io/proxy-send-timeout: "300" + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + nginx.ingress.kubernetes.io/use-regex: "true" +spec: + ingressClassName: nginx + rules: + - host: {{ configuration.hostname }} + http: + paths: + - backend: + service: + name: mlflow-oauth2-proxy + port: + number: 80 + path: /.* + pathType: ImplementationSpecific + tls: + - hosts: + - {{ configuration.hostname }} + secretName: mlflow-tls +{% endraw %} diff --git a/catalogs/data/mlflow/terraform/aws/iam.tf b/catalogs/data/mlflow/terraform/aws/iam.tf new file mode 100644 index 00000000..b3af2f6c --- /dev/null +++ b/catalogs/data/mlflow/terraform/aws/iam.tf @@ -0,0 +1,37 @@ + +resource "aws_iam_policy" "mlflow" { + name_prefix = "mlflow" + description = "policy for the plural admin mlflow" + policy = data.aws_iam_policy_document.mlflow.json +} + +resource "aws_iam_user" "mlflow" { + name = "${data.plural_cluster.cluster.name}-mlflow" + + depends_on = [ data.plural_cluster.cluster ] +} + +resource "aws_iam_access_key" "mlflow" { + user = aws_iam_user.mlflow.name +} + +data "aws_iam_policy_document" "mlflow" { + statement { + sid = "admin" + effect = "Allow" + actions = ["s3:*"] + + resources = [ + "arn:aws:s3:::${var.mlflow_bucket}", + "arn:aws:s3:::${var.mlflow_bucket}/*", + ] + } +} + +resource "aws_iam_policy_attachment" "mlflow-user" { + name = "${data.plural_cluster.cluster.name}-mlflow-policy" + users = [aws_iam_user.mlflow.name] + policy_arn = aws_iam_policy.mlflow.arn + + depends_on = [ data.plural_cluster.cluster ] +} diff --git a/catalogs/data/mlflow/terraform/aws/oidc.tf b/catalogs/data/mlflow/terraform/aws/oidc.tf new file mode 100644 index 00000000..7da02f53 --- /dev/null +++ b/catalogs/data/mlflow/terraform/aws/oidc.tf @@ -0,0 +1,15 @@ +resource "random_password" "oidc_cookie" { + length = 24 + min_lower = 1 + min_numeric = 1 + min_upper = 1 + special = false +} + +resource "plural_oidc_provider" "mlflow" { + name = "mlflow-{{ context.cluster }}" + auth_method = "BASIC" + type = "PLURAL" + description = "OIDC provider for mlflow deployed to the {{ context.cluster }} cluster" + redirect_uris = ["https://{{ context.hostname }}/oauth2/callback"] +} diff --git a/catalogs/data/mlflow/terraform/aws/outputs.tf b/catalogs/data/mlflow/terraform/aws/outputs.tf new file mode 100644 index 00000000..c807cedf --- /dev/null +++ b/catalogs/data/mlflow/terraform/aws/outputs.tf @@ -0,0 +1,32 @@ +output "access_key_id" { + value = aws_iam_access_key.mlflow.id +} + +output "secret_access_key" { + value = aws_iam_access_key.mlflow.secret + sensitive = true +} + +output "postgres_host" { + value = try(module.db.db_instance_address, "") +} + +output "postgres_password" { + value = random_password.password.result + sensitive = true +} + +output "oidc_cookie_secret" { + value = random_password.oidc_cookie.result + sensitive = true +} + +output "oidc_client_id" { + value = plural_oidc_provider.mlflow.client_id + sensitive = true +} + +output "oidc_client_secret" { + value = plural_oidc_provider.mlflow.client_secret + sensitive = true +} diff --git a/catalogs/data/mlflow/terraform/aws/plural.tf b/catalogs/data/mlflow/terraform/aws/plural.tf new file mode 100644 index 00000000..c30d03d4 --- /dev/null +++ b/catalogs/data/mlflow/terraform/aws/plural.tf @@ -0,0 +1,3 @@ +data "plural_cluster" "cluster" { + handle = var.cluster_name +} diff --git a/catalogs/data/mlflow/terraform/aws/postgres.tf b/catalogs/data/mlflow/terraform/aws/postgres.tf new file mode 100644 index 00000000..5a9acf73 --- /dev/null +++ b/catalogs/data/mlflow/terraform/aws/postgres.tf @@ -0,0 +1,87 @@ +resource "random_password" "password" { + length = 20 + min_lower = 1 + min_numeric = 1 + min_upper = 1 + special = false +} + +data "aws_eks_cluster" "mgmt" { + name = data.plural_cluster.cluster.name + + depends_on = [ data.plural_cluster.cluster ] +} + +data "aws_vpc" "mgmt" { + id = one(data.aws_eks_cluster.mgmt.vpc_config).vpc_id +} + +module "db" { + source = "terraform-aws-modules/rds/aws" + version = "~> 6.3" + + identifier = var.db_name + + engine = "postgres" + engine_version = var.postgres_vsn + family = "postgres14" + major_engine_version = var.postgres_vsn + instance_class = var.db_instance_class + allocated_storage = var.db_storage + + db_name = "mlflow" + username = "mlflow" + password = random_password.password.result + manage_master_user_password = false + + maintenance_window = "Mon:00:00-Mon:03:00" + backup_window = "03:00-06:00" + backup_retention_period = var.backup_retention_period + + monitoring_interval = "30" + monitoring_role_name = "${var.db_name}-PluralRDSMonitoringRole" + create_monitoring_role = true + apply_immediately = true + + multi_az = true + + create_db_subnet_group = true + subnet_ids = one(data.aws_eks_cluster.mgmt.vpc_config).subnet_ids + vpc_security_group_ids = [module.security_group.security_group_id] + + create_cloudwatch_log_group = true + enabled_cloudwatch_logs_exports = ["postgresql"] + + parameters = [ + { + name = "autovacuum" + value = 1 + }, + { + name = "client_encoding" + value = "utf8" + } + ] + + # Database Deletion Protection + deletion_protection = var.deletion_protection +} + +module "security_group" { + source = "terraform-aws-modules/security-group/aws" + version = "~> 5.0" + + name = "${var.db_name}-db-security-group" + description = "security group for your plural console db" + vpc_id = data.aws_vpc.mgmt.id + + ingress_with_cidr_blocks = [ + { + from_port = 5432 + to_port = 5432 + protocol = "tcp" + description = "PostgreSQL access from within VPC" + cidr_blocks = data.aws_vpc.mgmt.cidr_block + }, + ] +} diff --git a/catalogs/data/mlflow/terraform/aws/s3.tf b/catalogs/data/mlflow/terraform/aws/s3.tf new file mode 100644 index 00000000..1436b2b2 --- /dev/null +++ b/catalogs/data/mlflow/terraform/aws/s3.tf @@ -0,0 +1,14 @@ +resource "aws_s3_bucket" "mlflow" { + bucket = var.mlflow_bucket + force_destroy = var.force_destroy_bucket +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "mlflow" { + bucket = aws_s3_bucket.mlflow.id + + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" + } + } +} diff --git a/catalogs/data/mlflow/terraform/aws/variables.tf b/catalogs/data/mlflow/terraform/aws/variables.tf new file mode 100644 index 00000000..2f447826 --- /dev/null +++ b/catalogs/data/mlflow/terraform/aws/variables.tf @@ -0,0 +1,41 @@ +variable "cluster_name" { + type = string + default = "{{ context.cluster }}" +} + +variable "mlflow_bucket" { + type = string + default = "{{ context.bucket }}" +} + +variable "force_destroy_bucket" { + type = bool + default = false + description = "If true, the bucket will be deleted even if it contains objects." +} + +variable "db_name" { + default = "plrl-{{ context.cluster }}-mlflow" +} + +variable "postgres_vsn" { + default = "14" +} + +variable "db_storage" { + default = 20 +} + +variable "deletion_protection" { + type = bool + default = true +} + +variable "backup_retention_period" { + type = number + default = 7 +} + +variable "db_instance_class" { + default = "db.t4g.large" +} diff --git a/catalogs/data/mlflow/terraform/aws/versions.tf b/catalogs/data/mlflow/terraform/aws/versions.tf new file mode 100644 index 00000000..54f47de0 --- /dev/null +++ b/catalogs/data/mlflow/terraform/aws/versions.tf @@ -0,0 +1,19 @@ + +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 4.57" + } + plural = { + source = "pluralsh/plural" + version = ">= 0.2.1" + } + } +} + +provider "plural" {} + +provider "aws" { + region = "{{ context.region }}" +} \ No newline at end of file diff --git a/setup/catalogs/data/mlflow.yaml b/setup/catalogs/data/mlflow.yaml new file mode 100644 index 00000000..1ed9dacf --- /dev/null +++ b/setup/catalogs/data/mlflow.yaml @@ -0,0 +1,67 @@ +apiVersion: deployments.plural.sh/v1alpha1 +kind: PrAutomation +metadata: + name: mlflow +spec: + name: mlflow + icon: https://cdn.icon-icons.com/icons2/3913/PNG/512/mlflow_logo_icon_248445.png + documentation: | + Sets up an mlflow instance for a given cloud + creates: + git: + ref: sebastian/prod-2981-set-up-catalog-pipeline # TODO set to main + folder: catalogs/data/mlflow + templates: + - source: helm + destination: helm/mlflow/{{ context.cluster }} + external: true + - source: services/oauth-proxy-ingress.yaml.liquid + destination: services/apps/mlflow/oauth-proxy-ingress.yaml.liquid + external: true + - source: "terraform/{{ context.cloud }}" + destination: "terraform/apps/mlflow/{{ context.cluster }}" + external: true + - source: mlflow-raw-servicedeployment.yaml + destination: "bootstrap/apps/mlflow/{{ context.cluster }}/mlflow-raw-servicedeployment.yaml" + external: true + - source: mlflow-servicedeployment.yaml + destination: "bootstrap/apps/mlflow/{{ context.cluster }}/mlflow-servicedeployment.yaml" + external: true + - source: mlflow-stack.yaml + destination: "bootstrap/apps/mlflow/{{ context.cluster }}/mlflow-stack.yaml" + external: true + - source: oauth-proxy-config-servicedeployment.yaml + destination: "bootstrap/apps/mlflow/{{ context.cluster }}/oauth-proxy-config-servicedeployment.yaml" + external: true + - source: README.md + destination: documentation/mlflow/README.md + external: true + repositoryRef: + name: scaffolds + catalogRef: + name: data-engineering + scmConnectionRef: + name: plural # you'll need to add this ScmConnection manually before this is functional + title: "Setting up mlflow on cluster {{ context.cluster }} for {{ context.cloud }}" + message: | + Set up mlflow on {{ context.cluster }} ({{ context.cloud }}) + + Will set up an mlflow deployment, including object storage and postgres setup + configuration: + - name: cluster + type: STRING + documentation: Handle of the cluster you want to deploy mlflow to. + - name: cloud + type: ENUM + documentation: Cloud provider you want to deploy mlflow to. + values: + - aws + - name: bucket + type: STRING + documentation: The name of the S3/GCS/Azure Blob bucket you'll use for mlflow logs. This must be globally unique. + - name: hostname + type: STRING + documentation: The DNS name you'll host mlflow under. + - name: region + type: STRING + documentation: The cloud provider region you're going to use to deploy cloud resources.