Skip to content

Commit

Permalink
Merge pull request #1317 from porter-dev/main
Browse files Browse the repository at this point in the history
Push latest changes to prod
  • Loading branch information
yosefmih authored Jun 20, 2024
2 parents cdcc993 + b5dd686 commit ccb8287
Show file tree
Hide file tree
Showing 67 changed files with 4,190 additions and 13 deletions.
2 changes: 2 additions & 0 deletions Tiltfile
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ local_resource(
helm cm-push addons/redis-managed local && \
helm cm-push addons/deepgram local && \
helm cm-push addons/hf-llm-models local && \
helm cm-push addons/keda-http-add-on local && \
helm cm-push addons/kube-image-keeper local && \
helm repo update local
''',
deps=[
Expand Down
22 changes: 16 additions & 6 deletions addons/hf-llm-models/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,21 @@ apiVersion: apps/v1
kind: Deployment
metadata:
labels:
llm-model: {{ .Release.Name }}
llm-model: {{ .Release.Name }}-hf-llm
annotations:
porter.run/hf-llm-model-version: "{{ .Chart.Version }}"
name: {{ .Release.Name }}-workload
name: {{ .Release.Name }}-hf-llm
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
llm-model: {{ .Release.Name }}
llm-model: {{ .Release.Name }}-hf-llm
template:
metadata:
labels:
llm-model: {{ .Release.Name }}
llm-model: {{ .Release.Name }}-hf-llm
spec:
tolerations:
- key: "removable"
Expand Down Expand Up @@ -49,14 +49,24 @@ spec:
- --max-model-len={{ .Values.maxModelLen }}
{{- end }}
image: {{ .Values.vllmImage }}
imagePullPolicy: Always
imagePullPolicy: IfNotPresent
env:
- name: HF_TOKEN
value: {{ .Values.huggingFaceToken }}
ports:
- containerPort: 8000
protocol: TCP
name: https
readinessProbe:
failureThreshold: 3
httpGet:
path: /health
port: 8000
scheme: HTTP
initialDelaySeconds: 15
periodSeconds: 15
successThreshold: 1
timeoutSeconds: 2
resources:
requests:
{{- if .Values.resources.requests.cpu }}
Expand Down Expand Up @@ -88,4 +98,4 @@ spec:
volumes:
- name: model-volume
persistentVolumeClaim:
claimName: "{{ .Release.Name }}-model-pvc"
claimName: "{{ .Release.Name }}-hf-llm"
20 changes: 20 additions & 0 deletions addons/hf-llm-models/templates/httpscaledobject.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{{ if .Values.autoscaling.enabled }}
kind: HTTPScaledObject
apiVersion: http.keda.sh/v1alpha1
metadata:
name: {{ .Release.Name }}-hf-llm
spec:
hosts:
- {{ .Release.Name }}.porter.llm
scaleTargetRef:
deployment: {{ .Release.Name }}-hf-llm
service: {{ .Release.Name }}-hf-llm
port: 8000
replicas:
min: {{ .Values.autoscaling.min }}
max: {{ .Values.autoscaling.max }}
scaledownPeriod: {{ .Values.autoscaling.scaledownPeriod }}
scalingMetric:
concurrency:
targetValue: {{ .Values.autoscaling.targetConcurrency }}
{{- end }}
4 changes: 2 additions & 2 deletions addons/hf-llm-models/templates/pvc.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ .Release.Name }}-model-pvc
name: {{ .Release.Name }}-hf-llm
spec:
accessModes:
- ReadWriteMany
storageClassName: efs-{{ .Release.Name }}
storageClassName: efs-{{ .Release.Name }}-hf-llm
resources:
requests:
storage: 20Gi
6 changes: 3 additions & 3 deletions addons/hf-llm-models/templates/service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ apiVersion: v1
kind: Service
metadata:
labels:
llm-model: {{ .Release.Name }}
name: {{ .Release.Name }}
llm-model: {{ .Release.Name }}-hf-llm
name: {{ .Release.Name }}-hf-llm
spec:
ports:
- name: https
port: 8000
targetPort: https
selector:
llm-model: {{ .Release.Name }}
llm-model: {{ .Release.Name }}-hf-llm
2 changes: 1 addition & 1 deletion addons/hf-llm-models/templates/storageclass.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: efs-{{ .Release.Name }}
name: efs-{{ .Release.Name }}-hf-llm
provisioner: efs.csi.aws.com
parameters:
provisioningMode: efs-ap
Expand Down
9 changes: 8 additions & 1 deletion addons/hf-llm-models/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,11 @@ resources:
tolerations:
- key: "nvidia.com/gpu"
operator: "Exists"
effect: "NoSchedule"
effect: "NoSchedule"

autoscaling:
enabled: false
minReplicas: 0
maxReplicas: 10
scaledownPeriod: 300 # the time in seconds to wait before scaling down the deployment after the last request
targetConcurrency: 100 # the target concurrent connections per replica
24 changes: 24 additions & 0 deletions addons/keda-http-add-on/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
*.gotmpl
31 changes: 31 additions & 0 deletions addons/keda-http-add-on/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
apiVersion: v2
type: application
name: keda-add-ons-http
description: Event-based autoscaler for HTTP workloads on Kubernetes

# Specify the Kubernetes version range that we support.
# We allow pre-release versions for cloud-specific Kubernetes versions such as v1.21.5-gke.1302 or v1.18.9-eks-d1db3c
kubeVersion: ">=v1.23.0-0"

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version. This is incremented at chart release time and does not need
# to be included in any PRs to main.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.8.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
appVersion: 0.8.0
home: https://github.com/kedacore/http-add-on
sources:
- https://github.com/kedacore/http-add-on
maintainers:
- name: Ahmed ElSayed
email: [email protected]
- name: Jorge Turrado
email: [email protected]
- name: Tom Kerkhove
email: [email protected]
- name: Zbynek Roubalik
email: [email protected]
Loading

0 comments on commit ccb8287

Please sign in to comment.