From 58eed8b73455fe0293ece062879714c7e13fbc57 Mon Sep 17 00:00:00 2001 From: Tyler Hendrickson <1851017+TylerHendrickson@users.noreply.github.com> Date: Fri, 13 Dec 2024 13:13:08 -0600 Subject: [PATCH] Add ECS health checks for API tasks (#3632) * Refactor GET /api/health endpoint handler * Add healthcheck script * Configure ECS healthcheck for API tasks * Update tests for healthcheck endpoint * Use request logger * Bump query execution timeout to 1s --- packages/server/__tests__/api/health.test.js | 2 +- packages/server/src/routes/health.js | 24 +++++++++++++++----- packages/server/src/scripts/healthcheck.js | 20 ++++++++++++++++ terraform/modules/gost_api/task.tf | 12 ++++++++++ 4 files changed, 51 insertions(+), 7 deletions(-) create mode 100644 packages/server/src/scripts/healthcheck.js diff --git a/packages/server/__tests__/api/health.test.js b/packages/server/__tests__/api/health.test.js index 2f5d364e7..4af37462f 100644 --- a/packages/server/__tests__/api/health.test.js +++ b/packages/server/__tests__/api/health.test.js @@ -15,6 +15,6 @@ describe('/api/health', () => { await server .get('/api/health') .expect(200) - .expect({ success: true, db: 'OK' }); + .expect({ success: true }); }); }); diff --git a/packages/server/src/routes/health.js b/packages/server/src/routes/health.js index e771d15f9..3f2f05c83 100644 --- a/packages/server/src/routes/health.js +++ b/packages/server/src/routes/health.js @@ -4,12 +4,24 @@ const router = express.Router(); const knex = require('../db/connection'); router.get('/', async (req, res) => { - // if DB call fails, this will throw and health route will 500 - const dbResult = await knex - .raw('SELECT \'OK\' AS ok') - .timeout(500, { cancel: true }); - - res.json({ success: true, db: dbResult.rows[0].ok }); + const logger = req.log.child({ ip: req.ip, healthcheck: true }); + logger.debug('starting healthcheck'); + let success = false; + try { + // if DB call fails, this will throw and health route will 500 + const dbHealth = await knex + .raw(`SELECT 'ok' AS healthcheck_result`) + .timeout(1000, { cancel: true }); + success = true; + logger.debug( + { success, db_result: dbHealth.rows[0] }, + 'received healthcheck result from database', + ); + } catch (err) { + logger.error({ success: false, err }, 'healthcheck failed'); + throw err; + } + res.json({ success }); }); module.exports = router; diff --git a/packages/server/src/scripts/healthcheck.js b/packages/server/src/scripts/healthcheck.js new file mode 100644 index 000000000..89c81238c --- /dev/null +++ b/packages/server/src/scripts/healthcheck.js @@ -0,0 +1,20 @@ +// Usage: node healthcheck.js [url] [timeout] +// Exits with error (status code 1) when HTTP request errors or responds with non-200 status. +const got = require('got'); + +const url = process.argv[2] || 'http://localhost:3000/api/health'; +const timeout = Number.parseInt(process.argv[3], 10) || 5000; + +console.log(url); + +got(url, { timeout }).then((res) => { + const { statusCode } = res; + console.log(res.statusCode); + if (statusCode !== 200) { + process.exit(1); + } + process.exit(0); +}).catch((e) => { + console.error(e); + process.exit(1); +}); diff --git a/terraform/modules/gost_api/task.tf b/terraform/modules/gost_api/task.tf index ef4c9cd9d..35fec8da5 100644 --- a/terraform/modules/gost_api/task.tf +++ b/terraform/modules/gost_api/task.tf @@ -29,6 +29,18 @@ module "api_container_definition" { condition = "START" }] + healthcheck = { + command = ["CMD-SHELL", join(" ", [ + "node ./src/scripts/healthcheck.js", + "http://localhost:${local.api_container_port}/api/health 5000", + "|| exit 1" + ])] + startPeriod = 10 + interval = 30 + timeout = 10 + retries = 2 + } + linux_parameters = { capabilities = { add = []