diff --git a/kubeflow/helm/katib/Chart.yaml b/kubeflow/helm/katib/Chart.yaml index 5059491a7..0077185da 100644 --- a/kubeflow/helm/katib/Chart.yaml +++ b/kubeflow/helm/katib/Chart.yaml @@ -3,4 +3,4 @@ name: katib description: A Helm chart for Kubernetes type: application version: 0.1.20 -appVersion: "0.11.1" +appVersion: "v0.16.0-rc.1" diff --git a/kubeflow/helm/katib/crds/apiextensions.k8s.io_v1_customresourcedefinition_experiments.kubeflow.org.yaml b/kubeflow/helm/katib/crds/apiextensions.k8s.io_v1_customresourcedefinition_experiments.kubeflow.org.yaml deleted file mode 100644 index 449222127..000000000 --- a/kubeflow/helm/katib/crds/apiextensions.k8s.io_v1_customresourcedefinition_experiments.kubeflow.org.yaml +++ /dev/null @@ -1,35 +0,0 @@ -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: experiments.kubeflow.org -spec: - group: kubeflow.org - names: - categories: - - all - - kubeflow - - katib - kind: Experiment - plural: experiments - singular: experiment - scope: Namespaced - versions: - - additionalPrinterColumns: - - jsonPath: .status.conditions[-1:].type - name: Type - type: string - - jsonPath: .status.conditions[-1:].status - name: Status - type: string - - jsonPath: .metadata.creationTimestamp - name: Age - type: date - name: v1beta1 - schema: - openAPIV3Schema: - type: object - x-kubernetes-preserve-unknown-fields: true - served: true - storage: true - subresources: - status: {} diff --git a/kubeflow/helm/katib/crds/apiextensions.k8s.io_v1_customresourcedefinition_suggestions.kubeflow.org.yaml b/kubeflow/helm/katib/crds/apiextensions.k8s.io_v1_customresourcedefinition_suggestions.kubeflow.org.yaml deleted file mode 100644 index 99a858209..000000000 --- a/kubeflow/helm/katib/crds/apiextensions.k8s.io_v1_customresourcedefinition_suggestions.kubeflow.org.yaml +++ /dev/null @@ -1,41 +0,0 @@ -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: suggestions.kubeflow.org -spec: - group: kubeflow.org - names: - categories: - - all - - kubeflow - - katib - kind: Suggestion - plural: suggestions - singular: suggestion - scope: Namespaced - versions: - - additionalPrinterColumns: - - jsonPath: .status.conditions[-1:].type - name: Type - type: string - - jsonPath: .status.conditions[-1:].status - name: Status - type: string - - jsonPath: .spec.requests - name: Requested - type: string - - jsonPath: .status.suggestionCount - name: Assigned - type: string - - jsonPath: .metadata.creationTimestamp - name: Age - type: date - name: v1beta1 - schema: - openAPIV3Schema: - type: object - x-kubernetes-preserve-unknown-fields: true - served: true - storage: true - subresources: - status: {} diff --git a/kubeflow/helm/katib/crds/apiextensions.k8s.io_v1_customresourcedefinition_trials.kubeflow.org.yaml b/kubeflow/helm/katib/crds/apiextensions.k8s.io_v1_customresourcedefinition_trials.kubeflow.org.yaml deleted file mode 100644 index e1ff82afe..000000000 --- a/kubeflow/helm/katib/crds/apiextensions.k8s.io_v1_customresourcedefinition_trials.kubeflow.org.yaml +++ /dev/null @@ -1,35 +0,0 @@ -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: trials.kubeflow.org -spec: - group: kubeflow.org - names: - categories: - - all - - kubeflow - - katib - kind: Trial - plural: trials - singular: trial - scope: Namespaced - versions: - - additionalPrinterColumns: - - jsonPath: .status.conditions[-1:].type - name: Type - type: string - - jsonPath: .status.conditions[-1:].status - name: Status - type: string - - jsonPath: .metadata.creationTimestamp - name: Age - type: date - name: v1beta1 - schema: - openAPIV3Schema: - type: object - x-kubernetes-preserve-unknown-fields: true - served: true - storage: true - subresources: - status: {} diff --git a/kubeflow/helm/katib/crds/experiment.yaml b/kubeflow/helm/katib/crds/experiment.yaml new file mode 100644 index 000000000..8b07270c3 --- /dev/null +++ b/kubeflow/helm/katib/crds/experiment.yaml @@ -0,0 +1,36 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: experiments.kubeflow.org +spec: + group: kubeflow.org + scope: Namespaced + versions: + - name: v1beta1 + served: true + storage: true + additionalPrinterColumns: + - name: Type + type: string + jsonPath: .status.conditions[-1:].type + - name: Status + type: string + jsonPath: .status.conditions[-1:].status + - name: Age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + type: object + x-kubernetes-preserve-unknown-fields: true + names: + kind: Experiment + singular: experiment + plural: experiments + categories: + - all + - kubeflow + - katib diff --git a/kubeflow/helm/katib/crds/suggestion.yaml b/kubeflow/helm/katib/crds/suggestion.yaml new file mode 100644 index 000000000..b6eaa3fd4 --- /dev/null +++ b/kubeflow/helm/katib/crds/suggestion.yaml @@ -0,0 +1,42 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: suggestions.kubeflow.org +spec: + group: kubeflow.org + scope: Namespaced + versions: + - name: v1beta1 + served: true + storage: true + additionalPrinterColumns: + - name: Type + type: string + jsonPath: .status.conditions[-1:].type + - name: Status + type: string + jsonPath: .status.conditions[-1:].status + - name: Requested + type: string + jsonPath: .spec.requests + - name: Assigned + type: string + jsonPath: .status.suggestionCount + - name: Age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + type: object + x-kubernetes-preserve-unknown-fields: true + names: + kind: Suggestion + singular: suggestion + plural: suggestions + categories: + - all + - kubeflow + - katib diff --git a/kubeflow/helm/katib/crds/trial.yaml b/kubeflow/helm/katib/crds/trial.yaml new file mode 100644 index 000000000..765314b3f --- /dev/null +++ b/kubeflow/helm/katib/crds/trial.yaml @@ -0,0 +1,36 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: trials.kubeflow.org +spec: + group: kubeflow.org + scope: Namespaced + versions: + - name: v1beta1 + served: true + storage: true + additionalPrinterColumns: + - name: Type + type: string + jsonPath: .status.conditions[-1:].type + - name: Status + type: string + jsonPath: .status.conditions[-1:].status + - name: Age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + type: object + x-kubernetes-preserve-unknown-fields: true + names: + kind: Trial + singular: trial + plural: trials + categories: + - all + - kubeflow + - katib diff --git a/kubeflow/helm/katib/templates/controller/cluster-role.yaml b/kubeflow/helm/katib/templates/controller/cluster-role.yaml new file mode 100644 index 000000000..4d0d73448 --- /dev/null +++ b/kubeflow/helm/katib/templates/controller/cluster-role.yaml @@ -0,0 +1,129 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: {{- include "katib.labels" . | nindent 4 }} + name: {{ include "katib.fullname" . }}-controller-cluster-role +rules: + - apiGroups: + - "" + resources: + - services + verbs: + - "get" + - "list" + - "watch" + - "create" + - "delete" + - apiGroups: + - "" + resources: + - events + verbs: + - "create" + - "patch" + - "update" + - apiGroups: + - "" + resources: + - serviceaccounts + - persistentvolumes + - persistentvolumeclaims + verbs: + - "get" + - "list" + - "watch" + - "create" + - apiGroups: + - "" + resources: + - namespaces + - configmaps + verbs: + - "get" + - "list" + - "watch" + - apiGroups: + - "" + resources: + - pods + - pods/status + verbs: + - "get" + - apiGroups: + - "" + resources: + - secrets + verbs: + - "get" + - "list" + - "watch" + - "patch" + - apiGroups: + - apps + resources: + - deployments + verbs: + - "get" + - "list" + - "watch" + - "create" + - "delete" + - apiGroups: + - rbac.authorization.k8s.io + resources: + - roles + - rolebindings + verbs: + - "get" + - "create" + - "list" + - "watch" + - apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - "get" + - "list" + - "watch" + - "create" + - "delete" + - apiGroups: + - kubeflow.org + resources: + - tfjobs + - pytorchjobs + - mpijobs + - xgboostjobs + - mxjobs + verbs: + - "get" + - "list" + - "watch" + - "create" + - "delete" + - apiGroups: + - kubeflow.org + resources: + - experiments + - experiments/status + - experiments/finalizers + - trials + - trials/status + - trials/finalizers + - suggestions + - suggestions/status + - suggestions/finalizers + verbs: + - "*" + - apiGroups: + - admissionregistration.k8s.io + resources: + - validatingwebhookconfigurations + - mutatingwebhookconfigurations + verbs: + - "get" + - "watch" + - "list" + - "patch" diff --git a/kubeflow/helm/katib/templates/controller/clusterrole.yaml b/kubeflow/helm/katib/templates/controller/clusterrole.yaml deleted file mode 100644 index a23e1c540..000000000 --- a/kubeflow/helm/katib/templates/controller/clusterrole.yaml +++ /dev/null @@ -1,65 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: {{- include "katib.labels" . | nindent 4 }} - name: {{ include "katib.fullname" . }}-controller-cluster-role -rules: - - apiGroups: - - "" - resources: - - configmaps - - serviceaccounts - - services - - events - - namespaces - - persistentvolumes - - persistentvolumeclaims - - pods - - pods/log - - pods/status - verbs: - - '*' - - apiGroups: - - apps - resources: - - deployments - verbs: - - '*' - - apiGroups: - - rbac.authorization.k8s.io - resources: - - roles - - rolebindings - verbs: - - '*' - - apiGroups: - - batch - resources: - - jobs - - cronjobs - verbs: - - '*' - - apiGroups: - - kubeflow.org - resources: - - experiments - - experiments/status - - experiments/finalizers - - trials - - trials/status - - trials/finalizers - - suggestions - - suggestions/status - - suggestions/finalizers - - tfjobs - - pytorchjobs - - mpijobs - verbs: - - '*' - - apiGroups: - - tekton.dev - resources: - - pipelineruns - - taskruns - verbs: - - '*' diff --git a/kubeflow/helm/katib/templates/controller/configmap.yaml b/kubeflow/helm/katib/templates/controller/configmap.yaml index 084e66af5..a245420c6 100644 --- a/kubeflow/helm/katib/templates/controller/configmap.yaml +++ b/kubeflow/helm/katib/templates/controller/configmap.yaml @@ -1,66 +1,76 @@ apiVersion: v1 -data: - early-stopping: |- - { - "medianstop": { - "image": "docker.io/kubeflowkatib/earlystopping-medianstop:v0.11.1" - } - } - metrics-collector-sidecar: |- - { - "StdOut": { - "image": "docker.io/kubeflowkatib/file-metrics-collector:v0.11.1" - }, - "File": { - "image": "docker.io/kubeflowkatib/file-metrics-collector:v0.11.1" - }, - "TensorFlowEvent": { - "image": "docker.io/kubeflowkatib/tfevent-metrics-collector:v0.11.1", - "resources": { - "limits": { - "memory": "1Gi" - } - } - } - } - suggestion: |- - { - "random": { - "image": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.11.1" - }, - "tpe": { - "image": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.11.1" - }, - "grid": { - "image": "docker.io/kubeflowkatib/suggestion-chocolate:v0.11.1" - }, - "hyperband": { - "image": "docker.io/kubeflowkatib/suggestion-hyperband:v0.11.1" - }, - "bayesianoptimization": { - "image": "docker.io/kubeflowkatib/suggestion-skopt:v0.11.1" - }, - "cmaes": { - "image": "docker.io/kubeflowkatib/suggestion-goptuna:v0.11.1" - }, - "enas": { - "image": "docker.io/kubeflowkatib/suggestion-enas:v0.11.1", - "resources": { - "limits": { - "memory": "200Mi" - } - } - }, - "darts": { - "image": "docker.io/kubeflowkatib/suggestion-darts:v0.11.1" - } - } kind: ConfigMap metadata: - name: katib-config + name: {{ include "katib.fullname" . }}-config labels: {{- include "katib.labels" . | nindent 4 }} +data: + katib-config.yaml: |- + apiVersion: config.kubeflow.org/v1beta1 + kind: KatibConfig + init: + controller: + webhookPort: 8443 + trialResources: + - Job.v1.batch + - TFJob.v1.kubeflow.org + - PyTorchJob.v1.kubeflow.org + - MPIJob.v1.kubeflow.org + - XGBoostJob.v1.kubeflow.org + - MXJob.v1.kubeflow.org + runtime: + metricsCollectors: + - kind: StdOut + image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0-rc.1 + - kind: File + image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0-rc.1 + - kind: TensorFlowEvent + image: docker.io/kubeflowkatib/tfevent-metrics-collector:v0.16.0-rc.1 + resources: + limits: + memory: 1Gi + suggestions: + - algorithmName: random + image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0-rc.1 + - algorithmName: tpe + image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0-rc.1 + - algorithmName: grid + image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0-rc.1 + - algorithmName: hyperband + image: docker.io/kubeflowkatib/suggestion-hyperband:v0.16.0-rc.1 + - algorithmName: bayesianoptimization + image: docker.io/kubeflowkatib/suggestion-skopt:v0.16.0-rc.1 + - algorithmName: cmaes + image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0-rc.1 + - algorithmName: sobol + image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0-rc.1 + - algorithmName: multivariate-tpe + image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0-rc.1 + - algorithmName: enas + image: docker.io/kubeflowkatib/suggestion-enas:v0.16.0-rc.1 + resources: + limits: + memory: 200Mi + - algorithmName: darts + image: docker.io/kubeflowkatib/suggestion-darts:v0.16.0-rc.1 + - algorithmName: pbt + image: docker.io/kubeflowkatib/suggestion-pbt:v0.16.0-rc.1 + persistentVolumeClaimSpec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 5Gi + earlyStoppings: + - algorithmName: medianstop + image: docker.io/kubeflowkatib/earlystopping-medianstop:v0.16.0-rc.1 --- apiVersion: v1 +kind: ConfigMap +metadata: + labels: + katib.kubeflow.org/component: trial-templates + {{- include "katib.labels" . | nindent 4 }} + name: {{ include "katib.fullname" . }}-trial-template data: defaultTrialTemplate.yaml: |- apiVersion: batch/v1 @@ -70,7 +80,7 @@ data: spec: containers: - name: training-container - image: docker.io/kubeflowkatib/mxnet-mnist:v1beta1-45c5727 + image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0-rc.1 command: - "python3" - "/opt/mxnet-mnist/mnist.py" @@ -79,6 +89,7 @@ data: - "--num-layers=${trialParameters.numberLayers}" - "--optimizer=${trialParameters.optimizer}" restartPolicy: Never + # For ConfigMap templates double quotes must set in commands to correct parse JSON parameters in Trial Template (e.g nn_config, architecture) enasCPUTemplate: |- apiVersion: batch/v1 kind: Job @@ -87,7 +98,7 @@ data: spec: containers: - name: training-container - image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v1beta1-45c5727 + image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.16.0-rc.1 command: - python3 - -u @@ -97,7 +108,7 @@ data: - "--nn_config=\"${trialParameters.neuralNetworkConfig}\"" restartPolicy: Never pytorchJobTemplate: |- - apiVersion: "kubeflow.org/v1" + apiVersion: kubeflow.org/v1 kind: PyTorchJob spec: pytorchReplicaSpecs: @@ -108,8 +119,7 @@ data: spec: containers: - name: pytorch - image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727 - imagePullPolicy: Always + image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0-rc.1 command: - "python3" - "/opt/pytorch-mnist/mnist.py" @@ -123,16 +133,10 @@ data: spec: containers: - name: pytorch - image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727 - imagePullPolicy: Always + image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0-rc.1 command: - "python3" - "/opt/pytorch-mnist/mnist.py" - "--epochs=1" - "--lr=${trialParameters.learningRate}" - "--momentum=${trialParameters.momentum}" -kind: ConfigMap -metadata: - labels: - app: katib-trial-templates - name: trial-template diff --git a/kubeflow/helm/katib/templates/controller/deployment.yaml b/kubeflow/helm/katib/templates/controller/deployment.yaml index 5e802e1d2..14ea32383 100644 --- a/kubeflow/helm/katib/templates/controller/deployment.yaml +++ b/kubeflow/helm/katib/templates/controller/deployment.yaml @@ -34,12 +34,7 @@ spec: image: "{{ .Values.controller.image.repository }}:{{ .Values.controller.image.tag | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.controller.image.pullPolicy }} args: - - --webhook-port=8443 - - --trial-resources=Job.v1.batch - - --trial-resources=TFJob.v1.kubeflow.org - - --trial-resources=PyTorchJob.v1.kubeflow.org - - --trial-resources=MPIJob.v1.kubeflow.org - - --trial-resources=PipelineRun.v1beta1.tekton.dev + - --katib-config=/katib-config.yaml command: - ./katib-controller ports: @@ -49,25 +44,40 @@ spec: - name: metrics containerPort: 8080 protocol: TCP + - containerPort: 18080 + name: probe + protocol: TCP + env: + - name: KATIB_CORE_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace livenessProbe: httpGet: - path: /metrics - port: metrics + path: /healthz + port: probe readinessProbe: httpGet: - path: /metrics - port: metrics + path: /readyz + port: probe resources: {{- toYaml .Values.controller.resources | nindent 12 }} volumeMounts: - mountPath: /tmp/cert name: cert readOnly: true + - mountPath: /katib-config.yaml + name: katib-config + subPath: katib-config.yaml + readOnly: true volumes: - name: cert secret: defaultMode: 420 secretName: {{ include "katib.fullname" . }}-controller-certs + - name: katib-config + configMap: + name: {{ include "katib.fullname" . }}-config {{- with .Values.controller.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/kubeflow/helm/katib/templates/controller/mutatingwebhookconfiguration.yaml b/kubeflow/helm/katib/templates/controller/mutatingwebhookconfiguration.yaml index 4dcfa10d8..a16fdfb78 100644 --- a/kubeflow/helm/katib/templates/controller/mutatingwebhookconfiguration.yaml +++ b/kubeflow/helm/katib/templates/controller/mutatingwebhookconfiguration.yaml @@ -6,16 +6,16 @@ metadata: name: katib.kubeflow.org labels: {{- include "katib.labels" . | nindent 4 }} webhooks: - - admissionReviewVersions: - - v1beta1 + - name: defaulter.experiment.katib.kubeflow.org + sideEffects: None + admissionReviewVersions: + - v1 clientConfig: caBundle: Cg== service: name: {{ include "katib.fullname" . }}-controller namespace: {{ .Release.Namespace }} path: /mutate-experiment - failurePolicy: Ignore - name: defaulter.experiment.katib.kubeflow.org rules: - apiGroups: - kubeflow.org @@ -26,20 +26,29 @@ webhooks: - UPDATE resources: - experiments + - name: mutator.pod.katib.kubeflow.org sideEffects: None - - admissionReviewVersions: - - v1beta1 + admissionReviewVersions: + - v1 clientConfig: caBundle: Cg== service: name: {{ include "katib.fullname" . }}-controller-controller namespace: {{ .Release.Namespace }} path: /mutate-pod - failurePolicy: Ignore - name: mutator.pod.katib.kubeflow.org namespaceSelector: matchLabels: - katib-metricscollector-injection: enabled + katib.kubeflow.org/metrics-collector-injection: enabled + # Once the AdmissionWebhookMatchConditions feature gate is enabled by default, we should switch to control based on userInfo. + # REF: + # - AdmissionWebhookMatchConditions: https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-matchconditions + # - Tracking issue: https://github.com/kubeflow/katib/issues/2206 + objectSelector: + matchExpressions: + - key: katib.kubeflow.org/metrics-collector-injection + operator: NotIn + values: + - disabled rules: - apiGroups: - "" @@ -49,4 +58,3 @@ webhooks: - CREATE resources: - pods - sideEffects: None diff --git a/kubeflow/helm/katib/templates/controller/validatingwebhookconfiguration.yaml b/kubeflow/helm/katib/templates/controller/validatingwebhookconfiguration.yaml index 94058c221..7f707e2ee 100644 --- a/kubeflow/helm/katib/templates/controller/validatingwebhookconfiguration.yaml +++ b/kubeflow/helm/katib/templates/controller/validatingwebhookconfiguration.yaml @@ -6,16 +6,16 @@ metadata: name: katib.kubeflow.org labels: {{- include "katib.labels" . | nindent 4 }} webhooks: - - admissionReviewVersions: - - v1beta1 + - name: validator.experiment.katib.kubeflow.org + sideEffects: None + admissionReviewVersions: + - v1 clientConfig: caBundle: Cg== service: name: {{ include "katib.fullname" . }}-controller namespace: {{ .Release.Namespace }} path: /validate-experiment - failurePolicy: Ignore - name: validator.experiment.katib.kubeflow.org rules: - apiGroups: - kubeflow.org @@ -26,4 +26,3 @@ webhooks: - UPDATE resources: - experiments - sideEffects: None diff --git a/kubeflow/helm/katib/templates/db-manager/deployment.yaml b/kubeflow/helm/katib/templates/db-manager/deployment.yaml index 0358f4f21..8a7e0731e 100644 --- a/kubeflow/helm/katib/templates/db-manager/deployment.yaml +++ b/kubeflow/helm/katib/templates/db-manager/deployment.yaml @@ -40,24 +40,22 @@ spec: containerPort: 6789 protocol: TCP livenessProbe: - exec: - command: - - /bin/grpc_health_probe - - -addr=:6789 - failureThreshold: 5 + grpc: + port: 6789 initialDelaySeconds: 10 periodSeconds: 60 + failureThreshold: 5 readinessProbe: - exec: - command: - - /bin/grpc_health_probe - - -addr=:6789 - initialDelaySeconds: 5 + grpc: + port: 6789 + initialDelaySeconds: 10 + periodSeconds: 60 + failureThreshold: 5 resources: {{- toYaml .Values.dbManager.resources | nindent 12 }} env: - name: DB_NAME - value: mysql + value: {{ .Values.dbManager.config.database.type }} - name: DB_USER valueFrom: secretKeyRef: @@ -68,12 +66,21 @@ spec: secretKeyRef: key: {{ .Values.dbManager.config.secret.passwordKey }} name: {{ .Values.dbManager.config.secret.name }} + {{- if eq .Values.dbManager.config.database.type "mysql" }} - name: KATIB_MYSQL_DB_DATABASE value: {{ .Values.dbManager.config.database.name }} - name: KATIB_MYSQL_DB_HOST value: {{ .Values.dbManager.config.database.host }} - name: KATIB_MYSQL_DB_PORT value: {{ .Values.dbManager.config.database.port | quote }} + {{- else if eq .Values.dbManager.config.database.type "postgres" }} + - name: KATIB_POSTGRESQL_DB_DATABASE + value: {{ .Values.dbManager.config.database.name }} + - name: KATIB_POSTGRESQL_DB_HOST + value: {{ .Values.dbManager.config.database.host }} + - name: KATIB_POSTGRESQL_DB_PORT + value: {{ .Values.dbManager.config.database.port | quote }} + {{- end }} {{- with .Values.dbManager.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/kubeflow/helm/katib/templates/web-app/clusterrole.yaml b/kubeflow/helm/katib/templates/web-app/cluster-role.yaml similarity index 85% rename from kubeflow/helm/katib/templates/web-app/clusterrole.yaml rename to kubeflow/helm/katib/templates/web-app/cluster-role.yaml index 6e92dc48b..1c22555eb 100644 --- a/kubeflow/helm/katib/templates/web-app/clusterrole.yaml +++ b/kubeflow/helm/katib/templates/web-app/cluster-role.yaml @@ -4,13 +4,19 @@ metadata: labels: {{- include "katib.labels" . | nindent 4 }} name: {{ include "katib.fullname" . }}-web-app-cluster-role rules: + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create - apiGroups: - "" resources: - configmaps - namespaces verbs: - - '*' + - "*" - apiGroups: - kubeflow.org resources: @@ -18,7 +24,19 @@ rules: - trials - suggestions verbs: - - '*' + - "*" + - apiGroups: + - "" + resources: + - pods + verbs: + - list + - apiGroups: + - "" + resources: + - pods/log + verbs: + - get --- aggregationRule: clusterRoleSelectors: diff --git a/kubeflow/helm/katib/templates/web-app/deployment.yaml b/kubeflow/helm/katib/templates/web-app/deployment.yaml index 9ab33e1c2..9b95471c7 100644 --- a/kubeflow/helm/katib/templates/web-app/deployment.yaml +++ b/kubeflow/helm/katib/templates/web-app/deployment.yaml @@ -41,6 +41,13 @@ spec: - name: website containerPort: 8080 protocol: TCP + env: + - name: KATIB_CORE_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: APP_DISABLE_AUTH + value: "false" # TODO: should this be configurable? # livenessProbe: # httpGet: # path: / # TODO: check if this is correct diff --git a/kubeflow/helm/katib/templates/web-app/kubeflow-cluster-roles.yaml b/kubeflow/helm/katib/templates/web-app/kubeflow-cluster-roles.yaml new file mode 100644 index 000000000..9dc4baef0 --- /dev/null +++ b/kubeflow/helm/katib/templates/web-app/kubeflow-cluster-roles.yaml @@ -0,0 +1,65 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: {{- include "katib.labels" . | nindent 4 }} + rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true" + name: {{ include "katib.fullname" . }}-katib-admin +aggregationRule: + clusterRoleSelectors: + - matchLabels: + rbac.authorization.kubeflow.org/aggregate-to-kubeflow-katib-admin: "true" +rules: [] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: {{- include "katib.labels" . | nindent 4 }} + rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" + rbac.authorization.kubeflow.org/aggregate-to-kubeflow-katib-admin: "true" + name: {{ include "katib.fullname" . }}-katib-edit +rules: + - apiGroups: + - kubeflow.org + resources: + - experiments + - trials + - suggestions + verbs: + - get + - list + - watch + - create + - delete + - deletecollection + - patch + - update + - apiGroups: + - "" + resources: + - pods + verbs: + - list + - apiGroups: + - "" + resources: + - pods/log + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: {{- include "katib.labels" . | nindent 4 }} + rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true" + name: {{ include "katib.fullname" . }}-katib-view +rules: + - apiGroups: + - kubeflow.org + resources: + - experiments + - trials + - suggestions + verbs: + - get + - list + - watch diff --git a/kubeflow/helm/katib/values.yaml b/kubeflow/helm/katib/values.yaml index 5fdb96d6a..f6c035389 100644 --- a/kubeflow/helm/katib/values.yaml +++ b/kubeflow/helm/katib/values.yaml @@ -30,10 +30,10 @@ serviceAccount: webApp: replicaCount: 1 image: - repository: docker.io/kubeflowkatib/katib-new-ui + repository: docker.io/kubeflowkatib/katib-ui pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: v0.11.1 + tag: v0.16.0-rc.1 podAnnotations: cluster-autoscaler.kubernetes.io/safe-to-evict: "true" sidecar.istio.io/inject: "true" @@ -90,7 +90,7 @@ controller: repository: docker.io/kubeflowkatib/katib-controller pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: v0.11.1 + tag: v0.16.0-rc.1 replicaCount: 1 @@ -135,7 +135,7 @@ controller: service: webhook: - port: 8443 + port: 443 metrics: port: 8080 @@ -144,7 +144,7 @@ dbManager: repository: docker.io/kubeflowkatib/katib-db-manager pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: v0.11.1 + tag: v0.16.0-rc.1 replicaCount: 1 @@ -196,6 +196,7 @@ dbManager: userKey: USERNAME passwordKey: PASSWORD database: + type: mysql # mysql or postgres name: katib host: kubeflow-mysql-cluster-mysql-master.kubeflow.svc.cluster.local port: 3306