Merge pull request #1317 from porter-dev/main

Push latest changes to prod
porter-dev · Jun 20, 2024 · ccb8287 · ccb8287
2 parents cdcc993 + b5dd686
commit ccb8287
Show file tree

Hide file tree

Showing 67 changed files with 4,190 additions and 13 deletions.
diff --git a/Tiltfile b/Tiltfile
@@ -76,6 +76,8 @@ local_resource(
   helm cm-push addons/redis-managed local && \
   helm cm-push addons/deepgram local && \
   helm cm-push addons/hf-llm-models local && \
+  helm cm-push addons/keda-http-add-on local && \
+  helm cm-push addons/kube-image-keeper local && \
   helm repo update local
   ''',
   deps=[

diff --git a/addons/hf-llm-models/templates/deployment.yaml b/addons/hf-llm-models/templates/deployment.yaml
@@ -2,21 +2,21 @@ apiVersion: apps/v1
 kind: Deployment
 metadata:
   labels:
-    llm-model: {{ .Release.Name }}
+    llm-model: {{ .Release.Name }}-hf-llm
   annotations:
     porter.run/hf-llm-model-version: "{{ .Chart.Version }}"
-  name: {{ .Release.Name }}-workload
+  name: {{ .Release.Name }}-hf-llm
 spec:
   replicas: 1
   strategy:
     type: Recreate
   selector:
     matchLabels:
-      llm-model: {{ .Release.Name }}
+      llm-model: {{ .Release.Name }}-hf-llm
   template:
     metadata:
       labels:
-        llm-model: {{ .Release.Name }}
+        llm-model: {{ .Release.Name }}-hf-llm
     spec:
       tolerations:
         - key: "removable"
@@ -49,14 +49,24 @@ spec:
         - --max-model-len={{ .Values.maxModelLen }}
       {{- end }}
         image: {{ .Values.vllmImage }}
-        imagePullPolicy: Always
+        imagePullPolicy: IfNotPresent
         env:
         - name: HF_TOKEN
           value: {{ .Values.huggingFaceToken }}
         ports:
         - containerPort: 8000
           protocol: TCP
           name: https
+        readinessProbe:
+          failureThreshold: 3
+          httpGet:
+            path: /health
+            port: 8000
+            scheme: HTTP
+          initialDelaySeconds: 15
+          periodSeconds: 15
+          successThreshold: 1
+          timeoutSeconds: 2
         resources:
           requests:
               {{- if .Values.resources.requests.cpu }}
@@ -88,4 +98,4 @@ spec:
       volumes:
       - name: model-volume
         persistentVolumeClaim:
-          claimName: "{{ .Release.Name }}-model-pvc"
+          claimName: "{{ .Release.Name }}-hf-llm"
diff --git a/addons/hf-llm-models/templates/httpscaledobject.yaml b/addons/hf-llm-models/templates/httpscaledobject.yaml
@@ -0,0 +1,20 @@
+{{ if .Values.autoscaling.enabled }} 
+kind: HTTPScaledObject
+apiVersion: http.keda.sh/v1alpha1
+metadata:
+    name: {{ .Release.Name }}-hf-llm
+spec:
+    hosts:
+    - {{ .Release.Name }}.porter.llm
+    scaleTargetRef:
+        deployment: {{ .Release.Name }}-hf-llm 
+        service: {{ .Release.Name }}-hf-llm
+        port: 8000
+    replicas:
+        min: {{ .Values.autoscaling.min }} 
+        max: {{ .Values.autoscaling.max }}
+    scaledownPeriod: {{ .Values.autoscaling.scaledownPeriod }}
+    scalingMetric:
+        concurrency:
+            targetValue: {{ .Values.autoscaling.targetConcurrency }}
+{{- end }}
diff --git a/addons/hf-llm-models/templates/pvc.yaml b/addons/hf-llm-models/templates/pvc.yaml
@@ -1,11 +1,11 @@
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
-  name: {{ .Release.Name }}-model-pvc
+  name: {{ .Release.Name }}-hf-llm
 spec:
   accessModes:
     - ReadWriteMany
-  storageClassName: efs-{{ .Release.Name }}
+  storageClassName: efs-{{ .Release.Name }}-hf-llm
   resources:
     requests:
       storage: 20Gi
diff --git a/addons/hf-llm-models/templates/service.yaml b/addons/hf-llm-models/templates/service.yaml
@@ -2,12 +2,12 @@ apiVersion: v1
 kind: Service
 metadata:
   labels:
-    llm-model: {{ .Release.Name }}
-  name: {{ .Release.Name }}
+    llm-model: {{ .Release.Name }}-hf-llm
+  name: {{ .Release.Name }}-hf-llm
 spec:
   ports:
     - name: https
       port: 8000
       targetPort: https
   selector:
-    llm-model: {{ .Release.Name }}
+    llm-model: {{ .Release.Name }}-hf-llm
diff --git a/addons/hf-llm-models/templates/storageclass.yaml b/addons/hf-llm-models/templates/storageclass.yaml
@@ -1,7 +1,7 @@
 apiVersion: storage.k8s.io/v1
 kind: StorageClass
 metadata:
-  name: efs-{{ .Release.Name }}
+  name: efs-{{ .Release.Name }}-hf-llm
 provisioner: efs.csi.aws.com
 parameters:
   provisioningMode: efs-ap

diff --git a/addons/hf-llm-models/values.yaml b/addons/hf-llm-models/values.yaml
@@ -29,4 +29,11 @@ resources:
 tolerations:
   - key: "nvidia.com/gpu"
     operator: "Exists"
-    effect: "NoSchedule"
+    effect: "NoSchedule"
+
+autoscaling:
+  enabled: false 
+  minReplicas: 0 
+  maxReplicas: 10
+  scaledownPeriod: 300 # the time in seconds to wait before scaling down the deployment after the last request
+  targetConcurrency: 100 # the target concurrent connections per replica
diff --git a/addons/keda-http-add-on/.helmignore b/addons/keda-http-add-on/.helmignore
@@ -0,0 +1,24 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
+*.gotmpl
diff --git a/addons/keda-http-add-on/Chart.yaml b/addons/keda-http-add-on/Chart.yaml
@@ -0,0 +1,31 @@
+apiVersion: v2
+type: application
+name: keda-add-ons-http
+description: Event-based autoscaler for HTTP workloads on Kubernetes
+
+# Specify the Kubernetes version range that we support.
+# We allow pre-release versions for cloud-specific Kubernetes versions such as  v1.21.5-gke.1302 or v1.18.9-eks-d1db3c
+kubeVersion: ">=v1.23.0-0"
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version. This is incremented at chart release time and does not need
+# to be included in any PRs to main.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.8.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+appVersion: 0.8.0
+home: https://github.com/kedacore/http-add-on
+sources:
+  - https://github.com/kedacore/http-add-on
+maintainers:
+  - name: Ahmed ElSayed
+    email: [email protected]
+  - name: Jorge Turrado
+    email: [email protected]
+  - name: Tom Kerkhove
+    email: [email protected]
+  - name: Zbynek Roubalik
+    email: [email protected]