From 0224c72bdd48bfbb12996095e29180cb61c9cac4 Mon Sep 17 00:00:00 2001 From: Gerhard Lazu Date: Fri, 16 Aug 2024 08:30:13 +0100 Subject: [PATCH] Initial commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We explain the purpose of this repository, the history behind it, where it's going & how can others help. We also captured all contributors to date. We also captured the Varnish + Fly.io config as it existed for: - Let's build a CDN - Part 1 - https://www.youtube.com/watch?v=8bDgWvyglno - Kaizen 15 - NOT a pipe dream - https://changelog.com/friends/50 There is a lot more context here: https://github.com/thechangelog/changelog.com/pull/518 The most interesting part is the `run` script. To run it, you will need to: 1. Have a Fly.io account 2. Have a back-end app deployed 💡 https://fly.io/speedrun/ 3. Change the name of the backend app (i.e. `changelog-2024-01-12`) 4. Launch the app in this repository Signed-off-by: Gerhard Lazu --- Dockerfile | 4 + LICENSE | 21 ++++ README.md | 41 +++++++ default.vcl | 117 +++++++++++++++++++ fly.toml | 43 +++++++ regions.txt | 36 ++++++ run | 323 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 585 insertions(+) create mode 100644 Dockerfile create mode 100644 LICENSE create mode 100644 README.md create mode 100644 default.vcl create mode 100644 fly.toml create mode 100644 regions.txt create mode 100755 run diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..67c9a3b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,4 @@ +# https://hub.docker.com/_/varnish +FROM varnish:7.4.3 +ENV VARNISH_HTTP_PORT 9000 +COPY default.vcl /etc/varnish/default.vcl diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..19e0bf2 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +Content and Design Copyright (c) Changelog Media LLC. All rights reserved. + +Code Copyright (c) Changelog Media LLC and licensed under the following conditions: + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..0c8dce4 --- /dev/null +++ b/README.md @@ -0,0 +1,41 @@ +# The Pipe Dream™️ + +A single-purpose, single-tenant CDN for [changelog.com](https://changelog.com). +Runs [Varnish Cache](https://varnish-cache.org/releases/index.html) (open +source) on [Fly.io](https://fly.io/changelog) + +This repository exists for a single reason: build the simplest CDN on Fly.io + +## How it started + +> I like the idea of having like this 20-line Varnish config that we deploy +> around the world, and it’s like “Look at our CDN, guys.” +> +> It’s so simple, and it can do exactly what we want it to do, and nothing +> more. +> +> But I understand that that’s a pipe dream, because that +> Varnish config will be slightly longer than 20 lines, and we’d run into all +> sorts of issues that we end up sinking all kinds of time into. +> +> Jerod Santo - March 29, 2024 - Changelog & Friends #38 + +## How is it going + +- [x] Static backend, 1 day stale, stale on error, x-headers - `46` lines of VCL +- [ ] Dynamic backend, cache-status header - `55` lines of VCL + +## How can you help + +If you have any ideas on how to improve this, please open an issue or go +straight for a pull request. We make this as easy as possible: +- All commits emphasize [good commit messages](https://cbea.ms/git-commit/) (more text for humans than code for machines) +- This repository is kept small & simple (the only purpose is to build the simplest CDN on Fly.io) +- We are taking a slow & thoughtful approach - join our journey via [audio with transcripts](https://changelog.com/topic/kaizen) or [written](https://github.com/thechangelog/changelog.com/discussions/categories/kaizen) + +Hope to see you in our Slack: 👋 + +## Contributors + +- [James A Rosen](https://www.jamesarosen.com/), Staff Engineer +- [Matt Johnson](https://github.com/mttjohnson), Sr Site Reliability Engineer diff --git a/default.vcl b/default.vcl new file mode 100644 index 0000000..2981a4e --- /dev/null +++ b/default.vcl @@ -0,0 +1,117 @@ +# https://varnish-cache.org/docs/7.4/reference/vcl.html#versioning +vcl 4.1; + +import std; + +# Thanks Matt Johnson! 👋 +# - https://github.com/magento/magento2/blob/03621bbcd75cbac4ffa8266a51aa2606980f4830/app/code/Magento/PageCache/etc/varnish6.vcl +# - https://abhishekjakhotiya.medium.com/magento-internals-cache-purging-and-cache-tags-bf7772e60797 + +backend default { + .host = "top1.nearest.of.changelog-2024-01-12.internal"; + .host_header = "changelog-2024-01-12.fly.dev"; + .port = "4000"; + .first_byte_timeout = 5s; + .probe = { + .url = "/health"; + .timeout = 2s; + .interval = 5s; + .window = 10; + .threshold = 5; + } +} + +# https://varnish-cache.org/docs/7.4/users-guide/vcl-grace.html +# https://docs.varnish-software.com/tutorials/object-lifetime/ +# https://www.varnish-software.com/developers/tutorials/http-caching-basics/ +# https://blog.markvincze.com/how-to-gracefully-fall-back-to-cache-on-5xx-responses-with-varnish/ +sub vcl_backend_response { + # Objects within ttl are considered fresh. + set beresp.ttl = 60s; + + # Objects within grace are considered stale. + # Serve stale content while refreshing in the background. + # 🤔 QUESTION: should we vary this based on backend health? + set beresp.grace = 24h; + + if (beresp.status >= 500) { + # Don't cache a 5xx response + set beresp.uncacheable = true; + + # If is_bgfetch is true, it means that we've found and returned the cached + # object to the client, and triggered an asynchoronus background update. In + # that case, since backend returned a 5xx, we have to abandon, otherwise + # the previously cached object would be erased from the cache (even if we + # set uncacheable to true). + if (bereq.is_bgfetch) { + return (abandon); + } + } + + # 🤔 QUESTION: Should we configure beresp.keep? +} + +# NOTE: vcl_recv is called at the beginning of a request, after the complete +# request has been received and parsed. Its purpose is to decide whether or not +# to serve the request, how to do it, and, if applicable, which backend to use. +sub vcl_recv { + # https://varnish-cache.org/docs/7.4/users-guide/purging.html + if (req.method == "PURGE") { + return (purge); + } + + # Implement a Varnish health-check + if (req.method == "GET" && req.url == "/varnish_status") { + return(synth(204)); + } +} + +# https://gist.github.com/leotsem/1246511/824cb9027a0a65d717c83e678850021dad84688d#file-default-vcl-pl +# https://varnish-cache.org/docs/7.4/reference/vcl-var.html#obj +sub vcl_deliver { + # What is the remaining TTL for this object? + set resp.http.x-ttl = obj.ttl; + # What is the max object staleness permitted? + set resp.http.x-grace = obj.grace; + + # Did the response come from Varnish or from the backend? + if (obj.hits > 0) { + set resp.http.x-cache = "HIT"; + } else { + set resp.http.x-cache = "MISS"; + } + + # Is this object stale? + if (obj.ttl < std.duration(integer=0)) { + set resp.http.x-cache = "STALE"; + } + + # How many times has this response been served from Varnish? + set resp.http.x-cache-hits = obj.hits; +} + +# TODOS: +# - ✅ Run in debug mode (locally) +# - ✅ Connect directly to app - not Fly.io Proxy 🤦 +# - ✅ Serve stale content + background refresh +# - QUESTION: Should the app control this via Surrogate-Control? Should we remove this header? +# - EXPLORE: varnishstat +# - EXPLORE: varnishtop +# - EXPLORE: varnishncsa -c -F '%m %U %H %{x-cache}o %{x-cache-hits}o' +# - ✅ Serve stale content on backend error +# - https://varnish-cache.org/docs/7.4/users-guide/vcl-grace.html#misbehaving-servers +# - If the backend gets restarted (e.g. new deploy), backend remains sick in Varnish +# - https://info.varnish-software.com/blog/two-minute-tech-tuesdays-backend-health +# - EXPLORE: varnishlog -g raw -i backend_health +# - Implement If-Modified-Since? keep +# - Expose FLY_REGION=sjc env var as a custom header +# - https://varnish-cache.org/lists/pipermail/varnish-misc/2019-September/026656.html +# - Add Feeds backend: /feed -> https://feeds.changelog.place/feed.xml +# - Store cache on disk? A pre-requisite for static backend +# - https://varnish-cache.org/docs/trunk/users-guide/storage-backends.html#file +# - Add Static backend: cdn.changelog.com requests +# +# FOLLOW-UPs: +# - Run varnishncsa as a separate process (will need a supervisor + log drain) +# - https://info.varnish-software.com/blog/varnish-and-json-logging +# - How to cache purge across all varnish instances? diff --git a/fly.toml b/fly.toml new file mode 100644 index 0000000..ab2e25e --- /dev/null +++ b/fly.toml @@ -0,0 +1,43 @@ +# Full app config reference: https://fly.io/docs/reference/configuration/ +app = "cdn-2024-01-26" +# Closest to James +primary_region = "sjc" +# Secondary region will be "lhr", closest to Gerhard + +kill_signal = "SIGTERM" +kill_timeout = 30 + +[env] +VARNISH_SIZE="500M" + +[[vm]] +size = "shared-cpu-1x" +memory = "256MB" + +[deploy] +strategy = "bluegreen" + +[[services]] +internal_port = 9000 +protocol = "tcp" + +[[services.http_checks]] +grace_period = "5s" +interval = "5s" +method = "get" +path = "/varnish_status" +protocol = "http" +timeout = "4s" + +[[services.ports]] +handlers = ["tls", "http"] +port = 443 + +[[services.ports]] +handlers = ["http"] +port = "80" + +[services.concurrency] +hard_limit = 2500 +soft_limit = 2000 +type = "connections" diff --git a/regions.txt b/regions.txt new file mode 100644 index 0000000..6342c96 --- /dev/null +++ b/regions.txt @@ -0,0 +1,36 @@ +NAME CODE GATEWAY LAUNCH PLAN + ONLY GPUS +Amsterdam, Netherlands ams ✓ ✓ +Ashburn, Virginia (US) iad ✓ ✓ +Atlanta, Georgia (US) atl +Bogotá, Colombia bog +Boston, Massachusetts (US) bos +Bucharest, Romania otp +Chicago, Illinois (US) ord ✓ +Dallas, Texas (US) dfw ✓ +Denver, Colorado (US) den +Ezeiza, Argentina eze +Frankfurt, Germany fra ✓ ✓ +Guadalajara, Mexico gdl +Hong Kong, Hong Kong hkg ✓ +Johannesburg, South Africa jnb +London, United Kingdom lhr ✓ +Los Angeles, California (US) lax ✓ +Madrid, Spain mad +Miami, Florida (US) mia +Montreal, Canada yul +Mumbai, India bom ✓ +Paris, France cdg ✓ +Phoenix, Arizona (US) phx +Querétaro, Mexico qro ✓ +Rio de Janeiro, Brazil gig +San Jose, California (US) sjc ✓ ✓ +Santiago, Chile scl ✓ +Sao Paulo, Brazil gru +Seattle, Washington (US) sea ✓ +Secaucus, NJ (US) ewr ✓ +Singapore, Singapore sin ✓ +Stockholm, Sweden arn +Sydney, Australia syd ✓ ✓ +Tokyo, Japan nrt ✓ +Toronto, Canada yyz ✓ +Warsaw, Poland waw diff --git a/run b/run new file mode 100755 index 0000000..bf4bc96 --- /dev/null +++ b/run @@ -0,0 +1,323 @@ +#!/usr/bin/env bash + +# https://linux.101hacks.com/ps1-examples/prompt-color-using-tput/ +BOLD="$(tput bold)" +RESET="$(tput sgr0)" +# BLACK="$(tput bold)$(tput setaf 0)" +RED="$(tput bold)$(tput setaf 1)" +# GREEN="$(tput bold)$(tput setaf 2)" +YELLOW="$(tput bold)$(tput setaf 3)" +# BLUE="$(tput bold)$(tput setaf 4)" +# MAGENTA="$(tput bold)$(tput setaf 5)" +CYAN="$(tput bold)$(tput setaf 6)" +# WHITE="$(tput bold)$(tput setaf 7)" +# BLACKB="$(tput bold)$(tput setab 0)" +# REDB="$(tput setab 1)$(tput setaf 0)" +GREENB="$(tput setab 2)$(tput setaf 0)" +YELLOWB="$(tput setab 3)$(tput setaf 0)" +BLUEB="$(tput setab 4)$(tput setaf 0)" +MAGENTAB="$(tput setab 5)$(tput setaf 0)" +CYANB="$(tput setab 6)$(tput setaf 0)" +WHITEB="$(tput setab 7)$(tput setaf 0)" + +APP="${PWD##*/}" +TS="$(date +'%F.%H-%M-%S')" +IMAGE="registry.fly.io/$APP:$TS" + +main() { + if [ -z "$1" ] + then + echo "${RED}First argument must be one of the following${RESET}" + echo "${BOLD}deploy${RESET} → deploys to Fly.io" + echo "${BOLD}world-scale${RESET} → makes it World Scale™" + echo "${BOLD}small-scale${RESET} → makes it Small Scale™" + echo "${BOLD}http-detailed${RESET} → shows detailed http response" + echo "${BOLD}http-measure${RESET} → measures http response times" + echo "${BOLD}http-profile${RESET} → profiles http responses" + echo "${CYAN}demo-2024-01-26${RESET} → runs through the first demo" + echo "${CYAN}demo-2024-06-21${RESET} → runs through the second demo" + echo "" + echo "${YELLOW}💡 All following arguments are passed to the command${RESET}" + else + clear + "$@" + fi +} + +deploy() { + _run_interactive \ + "1/3. BUILD LOCAL VARNISH CONTAINER IMAGE" \ + "docker buildx build . --tag $IMAGE" + + clear + + _run_interactive \ + "2/3. DEPLOY TO Fly.io" \ + "flyctl auth docker && docker push $IMAGE && flyctl deploy --ha=false --image $IMAGE" + + _run \ + "3/3. SHOW MACHINES LOCATION" \ + "flyctl machines list" +} + +world-scale() { + _run \ + "Run across many regions to show what World Scale™ feels like" \ + "flyctl scale count 16 --max-per-region 1 --region sjc,dfw,ord,iad,yyz,scl,gru,lhr,cdg,fra,ams,jnb,bom,sin,nrt,syd" + + _run_interactive \ + "Show machines location" \ + "flyctl machines list" + + clear + + _run_interactive \ + "${GREENB}How many USDs do you think this costs PER MONTH?${RESET} 🤔" \ + ":" + + _run \ + "16 x shared-cpu-1x instances with 256MB memory ${GREENB} 16 * \$1.94 = \$31.04 ${RESET}" \ + ": https://fly.io/docs/about/pricing/#compute" +} + +small-scale() { + _run \ + "Run in a remote region only to show what low latency feels like" \ + "flyctl scale count 1 --max-per-region 1 --region syd" + + _run \ + "Scale down all other locations" \ + "flyctl scale count 0 --region sjc,dfw,ord,iad,yyz,scl,gru,lhr,cdg,fra,ams,jnb,bom,sin,nrt" + + _run \ + "Show machines location" \ + "flyctl machines list" +} + +close-to-gerhard() { + _run \ + "Run close to Gerhard" \ + "flyctl scale count 1 --max-per-region 1 --region lhr" + + _run \ + "Scale down all other locations" \ + "flyctl scale count 0 --region sjc,dfw,ord,iad,yyz,scl,gru,cdg,fra,ams,jnb,bom,sin,nrt,syd" + + _run \ + "Show machines location" \ + "flyctl machines list" +} + +close-to-jerod() { + _run \ + "Run close to Jerod" \ + "flyctl scale count 1 --max-per-region 1 --region ord" + + _run \ + "Scale down all other locations" \ + "flyctl scale count 0 --region sjc,dfw,iad,yyz,scl,gru,lhr,cdg,fra,ams,jnb,bom,sin,nrt,syd" + + _run \ + "Show machines location" \ + "flyctl machines list" +} + +close-to-adam() { + _run \ + "Run close to Adam" \ + "flyctl scale count 1 --max-per-region 1 --region dfw" + + _run \ + "Scale down all other locations" \ + "flyctl scale count 0 --region sjc,ord,iad,yyz,scl,gru,lhr,cdg,fra,ams,jnb,bom,sin,nrt,syd" + + _run \ + "Show machines location" \ + "flyctl machines list" +} + +http-detailed() { + local url="${1:-https://pipedream.changelog.com/}" + shift # remove first argument + local command="httpstat $url $*" + + if ! which httpstat >/dev/null + then + brew install httpstat + fi + + $command + _play "$command" +} + +http-measure() { + local url="${1:-https://pipedream.changelog.com/}" + shift # remove first argument + local command="oha -c 1 -n 30 -q 1 $* $url" + + if ! which oha >/dev/null + then + brew install oha + fi + + $command + _play "$command" +} + +http-profile() { + local url="${1:-https://pipedream.changelog.com/}" + shift # remove first argument + + while sleep 1 + do + # https://blog.cloudflare.com/a-question-of-timing + curl -sL -o /dev/null \ + --write-out "%{url} http:%{http_version} status:%{http_code} ${WHITEB}ip:%{remote_ip}${RESET} ${CYANB}dns:%{time_namelookup}s${RESET} ${YELLOWB}tcp:%{time_connect}s${RESET} ${MAGENTAB}tls:%{time_appconnect}s${RESET} ${GREENB}wait:%{time_starttransfer}s${RESET} ${BLUEB}total:%{time_total}s${RESET}\n" \ + "$url" + done +} + +how-many-lines() { + _run_interactive \ + "${GREENB}How many lines of Varnish config?${RESET} 🧐" \ + ":" + + clear + + _run_interactive \ + "Total lines of Varnish config" \ + "bat default.vcl && wc -l default.vcl" + + clear + + _run_interactive \ + "${GREENB}How many lines of Varnish config without comments!${RESET} 🧐" \ + ":" + + clear + + _run_interactive \ + "Total lines of Varnish config without comments or empty lines" \ + "rg -v '^.*#|^\$' default.vcl | bat && rg -vc '^.*#|^\$' default.vcl" +} + +demo-2024-01-26() { + _run_interactive \ + "1/7. EXPLORE Varnish container image" \ + "docker history --no-trunc --format=json $(_varnish_version) | jq .CreatedBy | lvim -" + + clear + + _run_interactive \ + "2/7. EXPLORE Varnish container image command" \ + "true https://github.com/varnish/docker-varnish/blob/master/fresh/debian/scripts/docker-varnish-entrypoint" + + clear + + _run_interactive \ + "3/7. BUILD LOCAL Varnish container image" \ + "docker buildx build . --tag $IMAGE" + + clear + + _run_interactive \ + "4/7. EXPLORE Varnish command-line options" \ + "docker run --rm $IMAGE varnishd -? | lvim -" + + clear + + _run_interactive \ + "5/7. EXPLORE Varnish parameters" \ + "docker run --rm $IMAGE varnishd -x parameter | lvim -" + + clear + + _run_interactive \ + "6/7. EXPERIMENT with Varnish locally" \ + "docker run --name $APP --volume $PWD/default.vcl:/etc/varnish/default.vcl --rm -itp 9000:9000 $IMAGE" + + clear + + _run_interactive \ + "7/7. Deploy to Fly.io" \ + "flyctl auth docker && docker push $IMAGE && flyctl deploy --ha=false --image $IMAGE" +} + +demo-2024-06-21() { + _run_interactive \ + "${CYANB}1/7. WHAT COMMANDS ARE AVAILABLE?${RESET}" \ + "main" + + clear + + _run_interactive \ + "${CYANB}2/7. WHAT DOES THE https://pipedream.changelog.com RESPONSE LOOK LIKE?${RESET}" \ + "http-detailed https://pipedream.changelog.com/" + + clear + + _run_interactive \ + "${CYANB}3/7. MAKE IT WORLD SCALE™${RESET}" \ + "world-scale" + + clear + + _run_interactive \ + "${CYANB}4/7. WHAT DOES THE https://pipedream.changelog.com RESPONSE LOOK LIKE NOW?${RESET}" \ + "http-detailed https://pipedream.changelog.com/" + + clear + + _run_interactive \ + "${CYANB}5/7. LET'S MEASURE THE https://pipedream.changelog.com RESPONSE LATENCY${RESET}" \ + "http-measure https://pipedream.changelog.com/" + + clear + + _run_interactive \ + "${CYANB}6/7. HOW DOES https://changelog.com COMPARE?${RESET}" \ + "http-measure https://changelog.com/" + + clear + + _run \ + "${CYANB}7/7. HOW MANY LINES?${RESET}" \ + "how-many-lines" +} + + + +## PRIVATE FUNCTIONS +# + +_run() { + local name="${1:?first argument must be command description}" + local command="${2:?second argument must be command to run}" + + printf "\n${MAGENTAB}$name${RESET}\n${BOLD}$command${RESET}\n\n" + + eval "$command" +} + +_run_interactive() { + _run "$1" "$2" + _next +} + +_play() { + local command="${1:?first argument must be command to run}" + + printf "\n${CYANB}PLAY WITH IT${RESET} %s\n" "$command" +} + +_next() { + echo + read -rp "${WHITEB}PRESS ANY KEY TO CONTINUE${RESET} " -n 1 +} + +_varnish_version() { + rg FROM Dockerfile \ + | awk '{ print $2 }' +} + +main "$@"