Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLDSRV-574 implement KMS health check #5697

Merged
merged 1 commit into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions lib/kms/Cache.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
class Cache {
constructor() {
this.lastChecked = null;
this.result = null;
}

/**
* Retrieves the cached result with the last checked timestamp.
* @returns {object|null} An object containing the result and lastChecked, or null if not set.
*/
getResult() {
if (!this.result) {
return null;
}

return Object.assign({}, this.result, {
lastChecked: this.lastChecked ? new Date(this.lastChecked).toISOString() : null,
});
}

/**
* Retrieves the last checked timestamp.
* @returns {number|null} The timestamp of the last check or null if never checked.
*/
getLastChecked() {
return this.lastChecked;
}

/**
* Updates the cache with a new result and timestamp.
* @param {object} result - The result to cache.
* @returns {undefined}
*/
setResult(result) {
this.lastChecked = Date.now();
this.result = result;
}

/**
* Determines if the cache should be refreshed based on the last checked time.
* @param {number} duration - Duration in milliseconds for cache validity.
* @returns {boolean} true if the cache should be refreshed, else false.
*/
shouldRefresh(duration = 1 * 60 * 60 * 1000) { // Default: 1 hour
if (!this.lastChecked) {
return true;
}

const now = Date.now();
const elapsed = now - this.lastChecked;
const jitter = Math.floor(Math.random() * 15 * 60 * 1000); // Up to 15 minutes
return elapsed > (duration - jitter);
}

/**
* Clears the cache.
* @returns {undefined}
*/
clear() {
this.lastChecked = null;
this.result = null;
}
}

module.exports = Cache;
58 changes: 43 additions & 15 deletions lib/kms/wrapper.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ const inMemory = require('./in_memory/backend').backend;
const file = require('./file/backend');
const KMIPClient = require('arsenal').network.kmipClient;
const Common = require('./common');
const Cache = require('./Cache');
const cache = new Cache();
let scalityKMS;
let scalityKMSImpl;
try {
Expand Down Expand Up @@ -306,21 +308,47 @@ class KMS {
[implName]: { code: 200, message: 'OK' },
});
}
return client.healthcheck(log, err => {
const respBody = {};
if (err) {
respBody[implName] = {
error: err.description,
code: err.code,
};
} else {
respBody[implName] = {
code: 200,
message: 'OK',
};
}
return cb(null, respBody);
});

const cachedResult = cache.getResult();
logger.debug('current KMS cache state', { result: cachedResult });

const shouldRefreshCache = cache.shouldRefresh();

if (shouldRefreshCache) {
logger.debug('health check for KMS backend');
return client.healthcheck(log, err => {
let res;
if (err) {
res = {
// The following response makes sure that if KMS is down,
// cloudserver health check is still healthy.
// Simply including an error code in the response won't cause the health check to fail.
// Instead, the healthCheck logic detects errors by checking for the "error" field.
code: err.code,
message: 'KMS health check failed',
description: err.description,
};
logger.warn('KMS health check failed', { errorCode: err.code, error: err.description });
} else {
res = {
code: 200,
message: 'OK',
};
logger.info('KMS health check succeeded', { res });
}

cache.setResult(res);
const updatedResult = cache.getResult();
logger.debug('updated KMS cache:', { result: updatedResult });

const respBody = { [implName]: updatedResult };
return cb(null, respBody);
});
}

// Use cached healthcheck result if within a 1-hour window
logger.debug('using cached KMS health check', { cachedResult });
return cb(null, { [implName]: cachedResult });
}
}

Expand Down
7 changes: 2 additions & 5 deletions lib/utilities/healthcheckHandler.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const { data } = require('../data/wrapper');
const vault = require('../auth/vault');
const metadata = require('../metadata/wrapper');
const async = require('async');
const kms = require('../kms/wrapper');

// current function utility is minimal, but will be expanded
function isHealthy() {
Expand Down Expand Up @@ -40,15 +41,11 @@ function writeResponse(res, error, log, results, cb) {
* @return {undefined}
*/
function clientCheck(flightCheckOnStartUp, log, cb) {
// FIXME S3C-4833 KMS healthchecks have been disabled:
// - they should be reworked to avoid blocking all requests,
// including unencrypted requests
// - they should not prevent Cloudserver from starting up
const clients = [
data,
metadata,
vault,
// kms,
kms,
];
const clientTasks = [];
clients.forEach(client => {
Expand Down
132 changes: 132 additions & 0 deletions tests/unit/encryption/checkHealth.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
const assert = require('assert');
const sinon = require('sinon');
const { errors } = require('arsenal');

const Cache = require('../../../lib/kms/Cache');
const { DummyRequestLogger } = require('../helpers');
const memBackend = require('../../../lib/kms/in_memory/backend');
const kms = require('../../../lib/kms/wrapper');

const log = new DummyRequestLogger();

describe('KMS.checkHealth', () => {
let setResultSpy;
let shouldRefreshStub;
let clock;

beforeEach(() => {
clock = sinon.useFakeTimers({
now: 1625077800000,
toFake: ['Date'],
});

setResultSpy = sinon.spy(Cache.prototype, 'setResult');
shouldRefreshStub = sinon.stub(Cache.prototype, 'shouldRefresh').returns(true);

delete memBackend.backend.healthcheck;
});

afterEach(() => {
sinon.restore();
if (clock) {
clock.restore();
}
});

it('should return OK when kms backend does not have healthcheck method', done => {
kms.checkHealth(log, (err, result) => {
assert.ifError(err);
assert.deepStrictEqual(result, {
memoryKms: { code: 200, message: 'OK' },
});

assert(shouldRefreshStub.notCalled, 'shouldRefresh should not be called');
assert(setResultSpy.notCalled, 'setResult should not be called');

done();
});
});

it('should return OK when healthcheck succeeds', done => {
memBackend.backend.healthcheck = sinon.stub().callsFake((log, cb) => cb(null));

kms.checkHealth(log, (err, result) => {
assert.ifError(err);

const expectedLastChecked = new Date(clock.now).toISOString();

assert.deepStrictEqual(result, {
memoryKms: { code: 200, message: 'OK', lastChecked: expectedLastChecked },
});

assert(shouldRefreshStub.calledOnce, 'shouldRefresh should be called once');

assert(setResultSpy.calledOnceWithExactly({
code: 200,
message: 'OK',
}));

done();
});
});

it('should return failure message when healthcheck fails', done => {
memBackend.backend.healthcheck = sinon.stub().callsFake((log, cb) => cb(errors.InternalError));

kms.checkHealth(log, (err, result) => {
assert.ifError(err);

const expectedLastChecked = new Date(clock.now).toISOString();

assert.deepStrictEqual(result, {
memoryKms: {
code: 500,
message: 'KMS health check failed',
description: 'We encountered an internal error. Please try again.',
lastChecked: expectedLastChecked,
},
});

assert(shouldRefreshStub.calledOnce, 'shouldRefresh should be called once');

assert(setResultSpy.calledOnceWithExactly({
code: 500,
message: 'KMS health check failed',
description: 'We encountered an internal error. Please try again.',
}));

done();
});
});

it('should use cached result when not refreshing', done => {
memBackend.backend.healthcheck = sinon.stub().callsFake((log, cb) => cb(null));
// first call to populate the cache
kms.checkHealth(log, err => {
assert.ifError(err);
shouldRefreshStub.returns(false);

// second call should use the cached result
kms.checkHealth(log, (err, result) => {
assert.ifError(err);

const expectedLastChecked = new Date(clock.now).toISOString();
assert.deepStrictEqual(result, {
memoryKms: {
code: 200,
message: 'OK',
lastChecked: expectedLastChecked,
},
});

// once each call
assert.strictEqual(shouldRefreshStub.callCount, 2, 'shouldRefresh should be called twice');

// only the first call
assert.strictEqual(setResultSpy.callCount, 1, 'setResult should be called once');

done();
});
});
});
});
Loading
Loading