diff --git a/automation/terraform/install_kubeflow/.gitignore b/automation/terraform/install_kubeflow/.gitignore deleted file mode 100644 index 5f104da..0000000 --- a/automation/terraform/install_kubeflow/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -kubeflow -.terraform -env.tfvars -*.cfg -*.tfstate -*.tfstate.backup \ No newline at end of file diff --git a/automation/terraform/install_kubeflow/README.md b/automation/terraform/install_kubeflow/README.md deleted file mode 100644 index df8b81e..0000000 --- a/automation/terraform/install_kubeflow/README.md +++ /dev/null @@ -1,3 +0,0 @@ -How to run? - -
./run.sh
\ No newline at end of file diff --git a/automation/terraform/install_kubeflow/main.tf b/automation/terraform/install_kubeflow/main.tf deleted file mode 100644 index 3c93ff5..0000000 --- a/automation/terraform/install_kubeflow/main.tf +++ /dev/null @@ -1,127 +0,0 @@ -provider "nutanix" { - username = var.prism_central_username - password = var.prism_central_password - endpoint = var.prism_central_endpoint - insecure = var.insecure - port = var.prism_central_port - wait_timeout = var.wait_timeout -} - -resource "null_resource" "now" { - triggers = { - always_run = timestamp() - } -} - -# Download kubeconfig for input cluster -module "karbon_kube_config" { - source = "../karbon_kube_config" - - prism_central_username = var.prism_central_username - prism_central_password = var.prism_central_password - prism_central_endpoint = var.prism_central_endpoint - insecure = var.insecure - prism_central_port = var.prism_central_port - wait_timeout = var.wait_timeout - karbon_cluster_name = var.karbon_cluster_name - kubeconfig_filename = var.kubeconfig_filename -} - -# provider "kubernetes" { -# config_path = var.kubeconfig_filename -# } - -# # Get all k8s namespaces -# data "kubernetes_all_namespaces" "allns" {} - -# output "all-ns" { -# value = data.kubernetes_all_namespaces.allns.namespaces -# } - -# output "kubeflow-ns-present" { -# value = contains(data.kubernetes_all_namespaces.allns.namespaces, "kubeflow") -# } - -# Configure the GitHub Provider -# provider "github" {} - -# data "github_repository" "kubeflow_manifests" { -# full_name = "kubeflow/manifests" -# } - -# locals { -# http_clone_url = data.github_repository.kubeflow_manifests.http_clone_url -# } - -resource "null_resource" "download_manifest_repo" { - - triggers = { - always_run = timestamp() - kubeconfig_filename = var.kubeconfig_filename - kubeflow_version = var.kubeflow_version - } - provisioner "local-exec" { - # command = "wget https://github.com/kubeflow/manifests/archive/refs/heads/master.zip && unzip -n ./master.zip" - command = "wget https://github.com/kubeflow/manifests/archive/refs/tags/v${self.triggers.kubeflow_version}.zip && mkdir -p kubeflow && unzip -n ./v${self.triggers.kubeflow_version}.zip -d ./kubeflow" - } - - # provisioner "local-exec" { - # when = destroy - # # command = "rm ./master.zip && rm -rf manifests-master" - # command = "rm ./v${self.triggers.kubeflow_version}.zip && rm -rf kubeflow" - # } - depends_on = [ - null_resource.now, - module.karbon_kube_config - ] -} - -# TODO Enable following block after error once following error is resolved---------- -# │ Error: Invalid for_each argument -# │ -# │ on main.tf line 72, in resource "kustomization_resource" "install_kubeflow": -# │ 72: for_each = tolist(data.kustomization_build.kubeflow_manifests.ids) -# │ ├──────────────── -# │ │ data.kustomization_build.kubeflow_manifests.ids is a set of string, known only after apply -# │ -# │ The "for_each" value depends on resource attributes that cannot be determined until apply, so Terraform cannot predict how many instances will be created. To work around this, use the -target argument to -# │ first apply only the resources that the for_each depends on. - -# provider "kustomization" { -# kubeconfig_path = var.kubeconfig_filename -# } - -# data "kustomization_build" "kubeflow_manifests" { -# path = "manifests-master/example" - -# depends_on = [ -# null_resource.download_manifest_repo -# ] -# } - -# resource "kustomization_resource" "install_kubeflow_kustomize" { -# for_each = tolist(data.kustomization_build.kubeflow_manifests.ids) - -# manifest = data.kustomization_build.kubeflow_manifests.manifests[each.value] -# } -# TODO ---------- - -resource "null_resource" "install_kubeflow_local_exec" { - triggers = { - always_run = timestamp() - kubeconfig_filename = var.kubeconfig_filename - kubeflow_version = var.kubeflow_version - } - provisioner "local-exec" { - command = "while ! kustomize build kubeflow/manifests-${self.triggers.kubeflow_version}/example | kubectl --kubeconfig=${self.triggers.kubeconfig_filename} apply -f -; do echo 'Retrying to apply resources'; sleep 10; done" - } - - # provisioner "local-exec" { - # when = destroy - # command = "while ! kustomize build kubeflow/manifests-${self.triggers.kubeflow_version}/example | kubectl --kubeconfig=${self.triggers.kubeconfig_filename} delete -f -; do echo 'Retrying to delete resources'; sleep 10; done" - # } - - depends_on = [ - null_resource.download_manifest_repo - ] -} diff --git a/automation/terraform/install_kubeflow/monitoring.env.yaml b/automation/terraform/install_kubeflow/monitoring.env.yaml deleted file mode 100644 index e21d010..0000000 --- a/automation/terraform/install_kubeflow/monitoring.env.yaml +++ /dev/null @@ -1,113 +0,0 @@ -namespaceOverride: "kubeflow-monitoring" - -kubeProxy: - enabled: false -nodeExporter: - enabled: false -kubeStateMetrics: - enabled: false - -prometheusOperator: - serviceMonitorSelectorNilUsesHelmValues: false - namespaces: - releaseNamespace: true - additional: - - kubeflow - -prometheus: - prometheusSpec: - additionalScrapeConfigs: - # Custom monitoring: monitoring ml-pipeline (the Kubeflow Pipelines API server) - # TODO: remove when fixed: https://github.com/kubeflow/manifests/issues/2011 - - job_name: 'kubeflow-ml-pipeline' - scrape_interval: 15s - static_configs: - - targets: ['ml-pipeline.kubeflow:8888'] - - # Scan for all services in "kubeflow..." namespaces with prometheus.io annotations (scrape=true, port, address). - # Both HTTP and HTTPS - - job_name: 'kubeflow-services' - scrape_interval: 15s - authorization: - credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token - tls_config: - insecure_skip_verify: true - kubernetes_sd_configs: - - role: service - relabel_configs: - # Only "kubeflow..." namespaces - - source_labels: [__meta_kubernetes_namespace] - action: keep - regex: kubeflow(.*) - # Only with prometheus.io/scrape = true - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] - action: keep - regex: true - # Use scheme - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] - action: replace - target_label: __scheme__ - regex: (https?) - # Use path (/metrics) - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] - action: replace - target_label: __metrics_path__ - regex: (.+) - # Use address & port - - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] - action: replace - target_label: __address__ - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:$2 - # Remove prefix from labels - - action: labelmap - regex: __meta_kubernetes_service_label_(.+) - # Save namespace label - - source_labels: [__meta_kubernetes_namespace] - action: replace - target_label: kubernetes_namespace - # Save service name label - - source_labels: [__meta_kubernetes_service_name] - action: replace - target_label: service_name - - # Scan for all pods in "kubeflow..." namespaces with prometheus.io annotations (scrape=true, port, address). - # Both HTTP and HTTPS - - job_name: 'kubeflow-pods' - scrape_interval: 15s - authorization: - credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token - tls_config: - insecure_skip_verify: true - kubernetes_sd_configs: - - role: pod - relabel_configs: - - source_labels: [__meta_kubernetes_namespace] - action: keep - regex: kubeflow(.*) - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] - action: keep - regex: true - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] - action: replace - target_label: __metrics_path__ - regex: (.+) - - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] - action: replace - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:$2 - target_label: __address__ - - action: labelmap - regex: __meta_kubernetes_pod_(.+) - - source_labels: [__meta_kubernetes_namespace] - action: replace - target_label: kubernetes_namespace - - source_labels: [__meta_kubernetes_service_name] - action: replace - target_label: service_name - - source_labels: [__meta_kubernetes_pod_node_name] - action: replace - target_label: hostname - -grafana: - namespaceOverride: "kubeflow-monitoring" diff --git a/automation/terraform/install_kubeflow/monitoring.tf b/automation/terraform/install_kubeflow/monitoring.tf deleted file mode 100644 index d5c133c..0000000 --- a/automation/terraform/install_kubeflow/monitoring.tf +++ /dev/null @@ -1,54 +0,0 @@ -resource "null_resource" "install_monitoring" { - depends_on = [ - null_resource.now, - null_resource.install_prometheus_helm - ] -} - -resource "null_resource" "helm_setup" { - count = var.install_prometheus ? 1 : 0 - - provisioner "local-exec" { - command = <