diff --git a/README.md b/README.md index 9692ecb..9760794 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,27 @@ This repo is aimed at housing the scripts and settings to aid in collecting other repositories and managing a kubernetes cluster(s) on GCP +## NOTICE: Paused in progress +This repository is partly complete. This repository deploys the ambient app back-end services to GCP. It includes a kubernetes manifest, instructions on deployment (below). + +In progress, was: +- configuration directories (./config_stage/) +- automated setup files, geared toward GitHub action automation (run_*.sh) + +Next Steps, include: +- Finishing the test scripts (run_*.sh) +- Finishing development and testing of the deployment_docker +- automating infrastructure via GitHub Actions + +If you are reading these notes, in terms of onboarding, the sensible step would be to start running the setup scripts: +- run rebuild: See if the docker image is built +- run "run_infura": see if the docker container runs. It should stay online +- run "setup_infra": This step is unfinished, and should set up Google Cloud tools, secrets, and set up for GCP deployment +- run "redeploy_cluster": This should redeploy the cluster, and then shut down the container. + +Although unfinished, inspecting and studying the repository should allow anyone to run the ambient application. However, at present, there is no standard method or step-by-step guide on how to complete this process, save the instructions below: + + ## Startup instructions install gcloud : https://cloud.google.com/sdk/docs/install-sdk diff --git a/infra-manifest.yml b/config_stage/infra-manifest.yml similarity index 100% rename from infra-manifest.yml rename to config_stage/infra-manifest.yml diff --git a/deployment_docker/Dockerfile b/deployment_docker/Dockerfile new file mode 100644 index 0000000..c53856f --- /dev/null +++ b/deployment_docker/Dockerfile @@ -0,0 +1,38 @@ +# Use an official Debian runtime as a parent image +FROM debian:bullseye-slim + +# Install prerequisites +RUN apt-get update && \ + apt-get install -y curl apt-transport-https lsb-release gnupg && \ + apt-get clean + +# Install Google Cloud SDK +RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ + curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \ + apt-get update -y && apt-get install -y google-cloud-sdk + +# Install kubectl +RUN apt-get install -y kubectl + + +RUN curl -O "https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-453.0.0-linux-x86_64.tar.gz" + +RUN tar zxvf "google-cloud-sdk-453.0.0-linux-x86_64.tar.gz" + +RUN ./google-cloud-sdk/install.sh --quiet + +COPY ./config_stage/* ./ + +# Setup the entrypoint +#COPY entrypoint.sh /entrypoint.sh +#RUN chmod +x /entrypoint.sh + +# Set the working directory +#WORKDIR / + +# The entrypoint script should handle the population of secret files, gcloud and kubectl configurations. +# Ensure you create an `entrypoint.sh` script that configures everything as needed. +#ENTRYPOINT ["/entrypoint.sh"] + +# Keep the container running +CMD ["tail", "-f", "/dev/null"] \ No newline at end of file diff --git a/deployment_docker/entrypoint.sh b/deployment_docker/entrypoint.sh new file mode 100644 index 0000000..2e662b1 --- /dev/null +++ b/deployment_docker/entrypoint.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# Populate the env secrets-.yml files using the templates +# (You need to define how you're going to replace the placeholders in your templates) +# For example: +# envsubst < secrets-template.yml > secrets-analytic-tools.yml + +# Then follow these commands to get going: +# Note that you will need to provide your own configurations for `gcloud auth login` as it requires interactive user input. +gcloud components install kubectl -q +gcloud auth login --no-launch-browser +gcloud config set project YOUR_PROJECT_ID +gcloud container clusters create my-cluster --zone us-central1-a +gcloud container clusters list +gcloud container clusters get-credentials autopilot-cluster-1 --zone us-central1 + +# Add secrets +kubectl apply -f secrets-analytic-tools.yml +kubectl apply -f secrets-chat.yml +kubectl apply -f secrets-gcgo-candles.yml + +# Execute anything else passed to the docker run command +exec "$@" diff --git a/manifest-staging.yml b/manifest-staging.yml deleted file mode 100644 index 8d98cb0..0000000 --- a/manifest-staging.yml +++ /dev/null @@ -1,494 +0,0 @@ -# Google managed SSL certificate for the domain ---- -apiVersion: networking.gke.io/v1 -kind: ManagedCertificate -metadata: - name: managed-cert-prod - labels: - app: graphcache -spec: - domains: - - ambindexer.net - -# Public facing gateway that ultimately routes to graphcache servers ---- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: graphcache-ingress - labels: - app: graphcache - annotations: - kubernetes.io/ingress.global-static-ip-name: ambient-backend-prod-ip - networking.gke.io/managed-certificates: managed-cert-prod - kubernetes.io/ingress.class: "gce" -spec: - rules: - - host: ambindexer.net - http: - paths: - - pathType: Prefix - path: "/" - backend: - service: - name: graphcache-service - port: - number: 80 - - path: "/chat/*" - pathType: ImplementationSpecific - backend: - service: - name: chat-service - port: - number: 80 - -# Session affinity necessary to support web sockets and user intention position -# hinting ---- -apiVersion: cloud.google.com/v1 -kind: BackendConfig -metadata: - name: stick-service-backend-cfg -spec: - sessionAffinity: - affinityType: "CLIENT_IP" - -# Load balancer to any graphcache servers in deployment that are fully synced ---- -apiVersion: v1 -kind: Service -metadata: - name: graphcache-service - labels: - app: graphcache - annotations: - networking.gke.io/load-balancer-type: "Internal" - cloud.google.com/backend-config: '{"default": "stick-service-backend-cfg"}' -spec: - type: LoadBalancer - selector: - tier: servers - ports: - - name: http - port: 80 - targetPort: 5000 - -# Internal load balancer for rpc-cache service ---- -apiVersion: v1 -kind: Service -metadata: - name: rpc-cache-service - labels: - app: graphcache - annotations: - networking.gke.io/load-balancer-type: "Internal" -spec: - type: LoadBalancer - selector: - tier: rpc-servers - ports: - - name: http - port: 80 - targetPort: 5000 - -# Internal load balancer for chat-server ---- -apiVersion: v1 -kind: Service -metadata: - name: chat-service - labels: - app: graphcache - annotations: - networking.gke.io/load-balancer-type: "Internal" - cloud.google.com/backend-config: '{"default": "stick-service-backend-cfg"}' -spec: - type: LoadBalancer - selector: - tier: chat-servers - ports: - - name: http - port: 80 - targetPort: 5000 - -# Internal load balancer for analytics-server ---- -apiVersion: v1 -kind: Service -metadata: - name: analytics-tools-service - labels: - app: graphcache - annotations: - networking.gke.io/load-balancer-type: "Internal" - cloud.google.com/backend-config: '{"default": "stick-service-backend-cfg"}' -spec: - type: LoadBalancer - selector: - tier: analytics-tools - ports: - - name: http - port: 8080 - targetPort: 5000 - -# Deployes a replica set of independently synced graphcache servers ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: server-deployment - labels: - app: graphcache -spec: - replicas: 3 - selector: - matchLabels: - tier: servers - template: - metadata: - labels: - tier: servers - spec: - containers: - - # Each replicate runs an instance of the graphcache server and an attached redis - # instance for caching - - name: graphcache - image: us-central1-docker.pkg.dev/ambient-app-384603/graphcache/graphcache:latest - args: - - '--skip-initial-candles' - - '--skip-initial-pool-stats' - - '--skip-initial-states' - - '--skip-pruning' - - '--suppress-output' - - '--redis-host' - - 'localhost' - - '--db-location' - - '/data' - ports: - - containerPort: 5001 - - containerPort: 5002 - - containerPort: 5003 - - containerPort: 5004 - - containerPort: 5005 - resources: - requests: - cpu: "2000m" - memory: "14Gi" - - volumeMounts: - - name: google-cloud-key - mountPath: /var/secrets/google - - name: graphcache-data-vol - mountPath: /data/ - - env: - - name: GOOGLE_APPLICATION_CREDENTIALS - value: /var/secrets/google/gcloud-creds.json - - name: GRAPHCACHE_CLOUD_ENV - value: gcloud - - name: GRAPHCACHE_RPC_CACHE_URL - value: http://rpc-cache-service - - # Uses the latest_block endpoint on the graphcache server as a liveness and startup - # probe. If this endpoint changes or is no longer indicative of server readiness - # this probe needs to be updated - livenessProbe: - httpGet: - path: '/latest_block?chainId=0x1' - port: 5001 - initialDelaySeconds: 3 - periodSeconds: 3 - timeoutSeconds: 5 - failureThreshold: 3 - startupProbe: - httpGet: - path: '/latest_block?chainId=0x1' - port: 5001 - failureThreshold: 30 # 30 minute deadline to complete startup sync - periodSeconds: 60 - - - name: nginx-proxy - image: us-central1-docker.pkg.dev/ambient-app-384603/graphcache/nginx-proxy:latest - ports: - - containerPort: 5000 - resources: - requests: - cpu: "250m" - memory: "2Gi" - - - name: redis - image: us-central1-docker.pkg.dev/ambient-app-384603/graphcache/redis:latest - ports: - - containerPort: 6379 - resources: - requests: - cpu: "600m" - memory: "14Gi" - - # Expects a secret set for a GCP service account with the necessary permissions - # for the graphcache workload (i.e. read/write access to the snapshot GCP bucket) - volumes: - - name: google-cloud-key - secret: - secretName: gcloud-creds - - # Use ephemeral volume claim, because GKE autopilot only supports 10GB of storage - # natively in the cluster - - name: graphcache-data-vol - ephemeral: - volumeClaimTemplate: - spec: - accessModes: [ "ReadWriteOnce" ] - resources: - requests: - storage: 100Gi - - -# Very similar to to the deployment pattern, but difference is instead of starting -# a public facing server, runs the graphcache in snapshot mode. Point of this cronjob -# is to periodically update a recent snapshot in the GCS bucket, so new starting -# graphcache instances have short sync time ---- -apiVersion: batch/v1 -kind: CronJob -metadata: - name: sqlite-snapshot - labels: - app: graphcache - -spec: - # Syncs every 4 hours - schedule: "0 */4 * * *" - concurrencyPolicy: Forbid - jobTemplate: - metadata: - labels: - tier: workers - spec: - completions: 1 - template: - - spec: - restartPolicy: Never - containers: - - name: graphcache - image: us-central1-docker.pkg.dev/ambient-app-384603/graphcache/graphcache:latest - - # Unlike a graphcache server, this doesn't include any skip flags, because the - # goal is to produce a full database file - args: - - '--snapshot-run' - - '--suppress-output' - - '--redis-host' - - 'localhost' - - '--db-location' - - '/data' - ports: - - containerPort: 5001 - - # Provision significantly less CPU, because this isn't serving requests - resources: - requests: - cpu: "1400m" - memory: "12Gi" - limits: - memory: "16Gi" - - volumeMounts: - - name: google-cloud-key - mountPath: /var/secrets/google - - name: graphcache-data-vol - mountPath: /data/ - - env: - - name: GOOGLE_APPLICATION_CREDENTIALS - value: /var/secrets/google/gcloud-creds.json - - name: GRAPHCACHE_CLOUD_ENV - value: gcloud - - name: GRAPHCACHE_RPC_CACHE_URL - value: http://rpc-cache-service - - - name: redis - image: us-central1-docker.pkg.dev/ambient-app-384603/graphcache/redis:latest - ports: - - containerPort: 6379 - resources: - requests: - cpu: "400m" - memory: "16Gi" - limits: - memory: "16Gi" - - # Expects a secret set for a GCP service account with the necessary permissions - # for the graphcache workload (i.e. read/write access to the snapshot GCP bucket) - volumes: - - name: google-cloud-key - secret: - secretName: gcloud-creds - - # Use ephemeral volume claim, because GKE autopilot only supports 10GB of storage - # natively in the cluster - - name: graphcache-data-vol - ephemeral: - volumeClaimTemplate: - spec: - accessModes: [ "ReadWriteOnce" ] - resources: - requests: - storage: 100Gi - -# Deployes a replica set of rpc-cache servers ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: rpc-cache-deployment - labels: - app: graphcache -spec: - replicas: 2 - selector: - matchLabels: - tier: rpc-servers - template: - metadata: - labels: - tier: rpc-servers - spec: - containers: - - # Each replicate runs an instance of the rpc-cache server and an attached redis - # instance for caching - - name: graphcache - image: us-central1-docker.pkg.dev/ambient-app-384603/graphcache/graphcache:latest - command: ["python3", "-u", "/app/webserver/rpcserver.py", "--redis-host", "localhost"] - ports: - - containerPort: 4999 - resources: - requests: - cpu: "4000m" - memory: "2Gi" - limits: - memory: "2Gi" - - # Uses the latest_block endpoint on the graphcache server as a liveness and startup - # probe. If this endpoint changes or is no longer indicative of server readiness - # this probe needs to be updated - livenessProbe: - httpGet: - path: '/ping' - port: 5001 - initialDelaySeconds: 3 - periodSeconds: 3 - failureThreshold: 3 - - - name: nginx-proxy - image: us-central1-docker.pkg.dev/ambient-app-384603/graphcache/nginx-proxy:latest - ports: - - containerPort: 5000 - resources: - requests: - cpu: "250m" - memory: "500Mi" - limits: - memory: "2Gi" - - - name: redis - image: us-central1-docker.pkg.dev/ambient-app-384603/graphcache/redis:latest - ports: - - containerPort: 6379 - resources: - requests: - cpu: "600m" - memory: "12Gi" - limits: - memory: "16Gi" - -# Deployes a replica set of rpc-cache servers ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chat-server-deployment - labels: - app: graphcache -spec: - # The websocket library of the chat server does requires session afinity (which the - # current ingress doesn't support). Therefore we can only one 1 replica at a time, - # any more will break client. For current chat workloads this should be sufficient. - replicas: 1 - selector: - matchLabels: - tier: chat-servers - template: - metadata: - labels: - tier: chat-servers - spec: - containers: - - name: chat-server - image: us-central1-docker.pkg.dev/ambient-app-384603/graphcache/chat-server:latest - ports: - - containerPort: 5000 - - resources: - requests: - cpu: "4000m" - memory: "2Gi" - limits: - memory: "2Gi" - - env: - - name: PROTOCOL - value: "http" - - name: PORT - value: "5000" - - name: MONGO_USER - value: "chat-server" - - name: MONGO_SERVER - value: "serverlessinstance0.swuv52l.mongodb.net" - - name: MONGO_PWD - valueFrom: - secretKeyRef: - name: mongo-creds - key: mongo-pwd - - livenessProbe: - httpGet: - path: '/chat/api/status' - port: 5000 - initialDelaySeconds: 3 - periodSeconds: 3 - failureThreshold: 3 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: analytics-tools - labels: - app: analytics-tools -spec: - replicas: 1 - selector: - matchLabels: - app: analytics-tools - template: - metadata: - labels: - app: analytics-tools - spec: - containers: - - name: crocswap-analytics-tools-container - image: us-central1-docker.pkg.dev/crocswap/crocswap-audit-tools-master/crocswap_audit_tools:latest # Make sure this is the correct image name and tag - command: ["python3", "run_server.py"] # Will start the actual server inside - ports: - - containerPort: 8080 # Adjust the port as needed - - resources: - requests: - cpu: "4000m" - memory: "2Gi" - limits: - memory: "2Gi" \ No newline at end of file diff --git a/run_0_rebuild_infra.sh b/run_0_rebuild_infra.sh new file mode 100755 index 0000000..0dc3f0f --- /dev/null +++ b/run_0_rebuild_infra.sh @@ -0,0 +1,5 @@ +docker stop ambient-infa-container +docker stop $(docker ps -a -q --filter ancestor=ambient-infra-image) +docker rm $(docker ps -a -q --filter ancestor=ambient-infra-image) +docker rmi ambient-infra-image +docker build -t ambient-infra-image deployment_docker/. \ No newline at end of file diff --git a/run_1_run_infra.sh b/run_1_run_infra.sh new file mode 100755 index 0000000..2028f71 --- /dev/null +++ b/run_1_run_infra.sh @@ -0,0 +1 @@ +docker run --name ambient-infa-container -it ambient-infra-image /bin/bash \ No newline at end of file diff --git a/run_2_setup_infra.sh b/run_2_setup_infra.sh new file mode 100755 index 0000000..73b7802 --- /dev/null +++ b/run_2_setup_infra.sh @@ -0,0 +1,3 @@ +docker run --name ambient-infa-container ./google-cloud-sdk/bin/gcloud init #REQUIRES INTERACTIVE STEPS +docker run --name ambient-infa-container ./google-cloud-sdk/bin/gcloud components update --quiet +docker run --name ambient-infa-container./google-cloud-sdk/bin/gcloud components install kubectl --quiet \ No newline at end of file diff --git a/run_3_redeploy_cluster.sh b/run_3_redeploy_cluster.sh new file mode 100755 index 0000000..1b7894c --- /dev/null +++ b/run_3_redeploy_cluster.sh @@ -0,0 +1,13 @@ +docker run --name ambient-infa-container gcloud config set project crocswap +docker run --name ambient-infa-container gcloud container clusters delete gcgo-test-cluster --zone us-central1-a +docker run --name ambient-infa-container gcloud container clusters create gcgo-test-cluster --zone us-central1-a + + +# gcloud config set project crocswap +# apt-get install google-cloud-sdk-gke-gcloud-auth-plugin +# gcloud container clusters get-credentials gcgo-test-cluster --zone us-central1-a +# gcloud container clusters create gcgo-test-cluster --zone us-central1-a #Expected to fail on re-run + +# kubectl delete all --all # -n your-namespace if not default +# kubectl delete deployment deployment-name +# kubectl apply -f infra-manifest.yml \ No newline at end of file