Skip to content

Commit

Permalink
CLDSRV-574 implement KMS health check
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolas2bert committed Nov 9, 2024
1 parent c5ad444 commit 07179dd
Show file tree
Hide file tree
Showing 5 changed files with 314 additions and 20 deletions.
56 changes: 56 additions & 0 deletions lib/kms/Cache.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
class Cache {
constructor() {
this.lastChecked = null;
this.result = null;
}

/**
* Retrieves the cached result.
* @returns {object|null} The cached result or null if not set.
*/
getResult() {
return this.result;
}

/**
* Retrieves the last checked timestamp.
* @returns {number|null} The timestamp of the last check or null if never checked.
*/
getLastChecked() {
return this.lastChecked;
}

/**
* Updates the cache with a new result and timestamp.
* @param {object} result - The result to cache.
* @returns {undefined}
*/
setResult(result) {
this.lastChecked = Date.now();
this.result = result;
}

/**
* Determines whether the cache should be refreshed based on the last checked time.
* @param {number} duration - Duration in milliseconds for cache validity.
* @returns {boolean} true if the cache should be refreshed, else false.
*/
shouldRefresh(duration = 1 * 60 * 60 * 1000) { // Default: 1 hours
if (!this.lastChecked) return true;
const now = Date.now();
const elapsed = now - this.lastChecked;
const jitter = Math.floor(Math.random() * 15 * 60 * 1000); // Up to 15 minutes
return elapsed > (duration - jitter);
}

/**
* Clears the cache.
* @returns {undefined}
*/
clear() {
this.lastChecked = null;
this.result = null;
}
}

module.exports = Cache;
55 changes: 40 additions & 15 deletions lib/kms/wrapper.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ const KMIPClient = require('arsenal').network.kmipClient;
const { KmsAWSClient } = require('arsenal').network;
const Common = require('./common');
const vault = require('../auth/vault');
const Cache = require('./Cache');
const cache = new Cache();
let scalityKMS;
let scalityKMSImpl;
try {
Expand Down Expand Up @@ -355,26 +357,49 @@ class KMS {
}

static checkHealth(log, cb) {
logger.debug('current KMS cache state', { lastChecked: cache.getLastChecked(), result: cache.getResult() });
if (!client.healthcheck) {
return cb(null, {
[implName]: { code: 200, message: 'OK' },
});
}
return client.healthcheck(log, err => {
const respBody = {};
if (err) {
respBody[implName] = {
error: err.description,
code: err.code,
};
} else {
respBody[implName] = {
code: 200,
message: 'OK',
};
}
return cb(null, respBody);
});

const shouldRefreshCache = cache.shouldRefresh();

// healthcheck only if cache needs refreshing
if (shouldRefreshCache) {
logger.debug('health check for KMS backend');
return client.healthcheck(log, (err) => {
const respBody = {};
if (err) {
// Returning HTTP status code 200 allows CloudServer to start up and listen on port 8000
// when KMS is unavailable, and ensures that the S3 frontend deep healthcheck
// still allow requests to this CloudServer instance. This way, an unavailable KMIP
// host or a connection issue with KMIP will not prevent the normal operation of CloudServer.
respBody[implName] = {
code: 200,
message: 'KMS health check failed',
description: err.description,
};
logger.warn('KMS health check failed', { errorCode: err.code, error: err.description });
} else {
respBody[implName] = {
code: 200,
message: 'OK',
};
logger.info('KMS health check succeeded', { respBody });
}
// Only update the cache on success
cache.setResult(respBody);
logger.debug('updated KMS cache:', { lastChecked: cache.getLastChecked(), result: cache.getResult() });
return cb(null, respBody);
});
}

// Use cached healthcheck result if within a 1 hour window
const result = cache.getResult();
logger.debug('using cached KMS health check', { cachedResult: result });
return cb(null, result);
}
}

Expand Down
7 changes: 2 additions & 5 deletions lib/utilities/healthcheckHandler.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const { data } = require('../data/wrapper');
const vault = require('../auth/vault');
const metadata = require('../metadata/wrapper');
const async = require('async');
const kms = require('../kms/wrapper');

// current function utility is minimal, but will be expanded
function isHealthy() {
Expand Down Expand Up @@ -40,15 +41,11 @@ function writeResponse(res, error, log, results, cb) {
* @return {undefined}
*/
function clientCheck(flightCheckOnStartUp, log, cb) {
// FIXME S3C-4833 KMS healthchecks have been disabled:
// - they should be reworked to avoid blocking all requests,
// including unencrypted requests
// - they should not prevent Cloudserver from starting up
const clients = [
data,
metadata,
vault,
// kms,
kms,
];
const clientTasks = [];
clients.forEach(client => {
Expand Down
91 changes: 91 additions & 0 deletions tests/unit/encryption/checkHealth.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
const assert = require('assert');
const sinon = require('sinon');
const { errors } = require('arsenal');

const Cache = require('../../../lib/kms/Cache');
const { DummyRequestLogger } = require('../helpers');
const memBackend = require('../../../lib/kms/in_memory/backend');
const kms = require('../../../lib/kms/wrapper');

const log = new DummyRequestLogger();

describe('KMS.checkHealth', () => {
let getResultStub;
let setResultStub;
let shouldRefreshStub;

beforeEach(() => {
getResultStub = sinon.stub(Cache.prototype, 'getResult').returns(null);
setResultStub = sinon.stub(Cache.prototype, 'setResult').returns();
shouldRefreshStub = sinon.stub(Cache.prototype, 'shouldRefresh').returns(true);

delete memBackend.healthcheck;
});

afterEach(() => {
sinon.restore();
});

it('should return OK when kms backend does not have healthcheck method', (done) => {
kms.checkHealth(log, (err, result) => {
assert.strictEqual(err, null);
assert.deepStrictEqual(result, {
['memoryKms']: { code: 200, message: 'OK' },
});

done();
});
});

it('should return OK when healthcheck succeeds', (done) => {
memBackend.backend.healthcheck = sinon.stub().callsFake((log, cb) => cb(null));

kms.checkHealth(log, (err, result) => {
assert.strictEqual(err, null);
assert.deepStrictEqual(result, {
['memoryKms']: { code: 200, message: 'OK' },
});

// Verify that cache.setResult was called with the correct result
assert(setResultStub.calledOnceWithExactly(result));

done();
});
});

it('should return failure message when healthcheck fails', (done) => {
memBackend.backend.healthcheck = sinon.stub().callsFake((log, cb) => cb(errors.InternalError));

kms.checkHealth(log, (err, result) => {
assert.strictEqual(err, null);
assert.deepStrictEqual(result, {
['memoryKms']: {
code: 200,
message: 'KMS health check failed',
description: 'We encountered an internal error. Please try again.',
},
});

assert(setResultStub.calledOnceWithExactly(result));

done();
});
});

it('should use cached result when not refreshing', (done) => {
memBackend.backend.healthcheck = sinon.stub().callsFake((log, cb) => cb(null));
shouldRefreshStub.returns(false);

const cachedResult = {
['memoryKms']: { code: 200, message: 'Cached OK' },
};
getResultStub.returns(cachedResult);

kms.checkHealth(log, (err, result) => {
assert.strictEqual(err, null);
assert.deepStrictEqual(result, cachedResult);

done();
});
});
});
125 changes: 125 additions & 0 deletions tests/unit/encryption/healthCheckCache.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
const assert = require('assert');
const Cache = require('../../../lib/kms/Cache');

describe('Cache Class', () => {
let cache;
let originalDateNow;
let originalMathRandom;

beforeEach(() => {
cache = new Cache();
originalDateNow = Date.now;
originalMathRandom = Math.random;
});

afterEach(() => {
Date.now = originalDateNow;
Math.random = originalMathRandom;
});

describe('getResult()', () => {
it('should return null when no result is set', () => {
assert.strictEqual(cache.getResult(), null);
});

it('should return the cached result after setResult is called', () => {
const result = { data: 'test' };
cache.setResult(result);
assert.deepStrictEqual(cache.getResult(), result);
});
});

describe('getLastChecked()', () => {
it('should return null when cache has never been set', () => {
assert.strictEqual(cache.getLastChecked(), null);
});

it('should return the timestamp after setResult is called', () => {
const fakeTimestamp = 1625077800000; // Example timestamp
Date.now = () => fakeTimestamp; // Mock Date.now
cache.setResult({ data: 'test' });
assert.strictEqual(cache.getLastChecked(), fakeTimestamp);
});
});

describe('setResult()', () => {
it('should set the result and update lastChecked', () => {
const fakeTimestamp = 1625077800000;
Date.now = () => fakeTimestamp; // Mock Date.now
const result = { data: 'test' };
cache.setResult(result);
assert.deepStrictEqual(cache.getResult(), result);
assert.strictEqual(cache.getLastChecked(), fakeTimestamp);
});
});

describe('shouldRefresh()', () => {
it('should return true if cache has never been set', () => {
assert.strictEqual(cache.shouldRefresh(), true);
});

it('should return false if elapsed time is less than duration minus maximum jitter', () => {
const fakeNow = 1625077800000;
// Elapsed time = 1 hour - 15 minutes = 45 minutes
const fakeLastChecked = fakeNow - (45 * 60 * 1000); // 45 minutes ago
Date.now = () => fakeNow;
cache.lastChecked = fakeLastChecked;

// Mock Math.random to return 0 (jitter = 0)
Math.random = () => 0;

// elapsed = 45 minutes, duration - jitter = 60 minutes
// 45 < 60 => shouldRefresh = false
assert.strictEqual(cache.shouldRefresh(), false);
});

it('should return true if elapsed time is greater than duration minus maximum jitter', () => {
const fakeNow = 1625077800000;
// Elapsed time = 1 hour + 1 minute = 61 minutes
const fakeLastChecked = fakeNow - (61 * 60 * 1000); // 61 minutes ago
Date.now = () => fakeNow;
cache.lastChecked = fakeLastChecked;

// Mock Math.random to return 0 (jitter = 0)
Math.random = () => 0;

// elapsed = 61 minutes, duration - jitter = 60 minutes
// 61 > 60 => shouldRefresh = true
assert.strictEqual(cache.shouldRefresh(), true);
});

it('should use custom duration if provided', () => {
const customDuration = 6 * 60 * 60 * 1000; // 6 hours in milliseconds
const fakeNow = 1625077800000;
Date.now = () => fakeNow;

// Elapsed time = 5 hours
const fakeLastChecked1 = fakeNow - (5 * 60 * 60 * 1000);
cache.lastChecked = fakeLastChecked1;

// Mock Math.random to return 0 (jitter = 0)
Math.random = () => 0;

// 5 hours < 6 hours => shouldRefresh = false
assert.strictEqual(cache.shouldRefresh(customDuration), false,
'Cache should not refresh within custom duration');

// Elapsed time = 7 hours
const fakeLastChecked2 = fakeNow - (7 * 60 * 60 * 1000);
cache.lastChecked = fakeLastChecked2;

// 7 hours > 6 hours => shouldRefresh = true
assert.strictEqual(cache.shouldRefresh(customDuration), true,
'Cache should refresh after custom duration');
});
});

describe('clear()', () => {
it('should reset lastChecked and result to null', () => {
cache.setResult({ data: 'test' });
cache.clear();
assert.strictEqual(cache.getResult(), null);
assert.strictEqual(cache.getLastChecked(), null);
});
});
});

0 comments on commit 07179dd

Please sign in to comment.