Skip to content

Commit

Permalink
[OGUI-1389] Let gRPC do the reconnection on its own (#2155)
Browse files Browse the repository at this point in the history
* removes the creation of a new client and shutting down of the existing one in case of AliECS hanging. 
* Instead it leaves gRPC to attempt the reconnection.
* increases the timeout to allow more time to AliECS to reply in cases in which it gets frozen
  • Loading branch information
graduta authored Oct 6, 2023
1 parent 689e974 commit 111cd8a
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 25 deletions.
21 changes: 7 additions & 14 deletions Control/lib/control-core/ControlService.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
const assert = require('assert');
const path = require('path');
const {WebSocketMessage, Log} = require('@aliceo2/web-ui');
const GrpcProxy = require('./GrpcProxy.js');
const log = new Log(`${process.env.npm_config_log_label ?? 'cog'}/controlservice`);
const {errorHandler, errorLogger} = require('./../utils.js');
const CoreUtils = require('./CoreUtils.js');
Expand Down Expand Up @@ -55,22 +54,16 @@ class ControlService {
* @returns {Interval}
*/
initiateHeartBeat() {
let wasInError = false;
return setInterval(async () => {
try {
await this.ctrlProx['GetEnvironments']({}, {deadline: Date.now() + 3500});
wasInError = false;
await this.ctrlProx['GetEnvironments']({}, {deadline: Date.now() + 9000});
} catch (err) {
if (!wasInError) {
log.errorMessage('Unable to reach AliECS, attempting reconnection in silence', {
level: 20,
system: 'GUI',
facility: 'cog/controlservice'
});
}
wasInError = true;
this.ctrlProx.client.close()
this.ctrlProx = new GrpcProxy(this.coreConfig, this.O2_CONTROL_PROTO_PATH, wasInError);
const stateCode = this.ctrlProx.client.getChannel().getConnectivityState();
log.errorMessage(`Unable to reach AliECS (state: ${stateCode}), attempting reconnection`, {
level: 20,
system: 'GUI',
facility: 'cog/controlservice'
});
}
}, 10000);
}
Expand Down
2 changes: 1 addition & 1 deletion Control/lib/control-core/EnvCache.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class EnvCache {
constructor(ctrlService) {
this.ctrlService = ctrlService;
this.cache = {};
this.timeout = 5000;
this.timeout = 9000;
this.cacheEvictionTimeout = 5 * 60 * 1000;
this.cacheEvictionLast = new Date();
this.refreshInterval = setInterval(() => this.refresh(), this.timeout);
Expand Down
18 changes: 8 additions & 10 deletions Control/lib/control-core/GrpcProxy.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,8 @@ class GrpcProxy {
* https://grpc.io/grpc/node/grpc.Client.html
* @param {Object} config - Contains configuration fields for gRPC client
* @param {string} path - path to protofile location
* @param {boolean} wasInError - parameter to define if the connection is done following a failed attempt, thus logs are silent
*/
constructor(config, path, wasInError = false) {
constructor(config, path) {
if (this._isConfigurationValid(config, path)) {
const packageDefinition = protoLoader.loadSync(path, {longs: String, keepCase: false, arrays: true});
const octlProto = grpcLibrary.loadPackageDefinition(packageDefinition);
Expand All @@ -42,8 +41,10 @@ class GrpcProxy {
const options = {'grpc.max_receive_message_length': 1024 * 1024 * this._maxMessageLength}; // MB

this.client = new protoService(address, credentials, options);
this.client.waitForReady(Date.now() + this._connectionTimeout,
(error) => this._logConnectionResponse(error, wasInError, address));
this.client.waitForReady(
Date.now() + this._connectionTimeout,
(error) => this._logConnectionResponse(error, address)
);

// set all the available gRPC methods in object and build a separate array with names only
this.methods = Object.keys(protoService.prototype)
Expand Down Expand Up @@ -135,15 +136,12 @@ class GrpcProxy {
/**
*
* @param {Error} error - error following attempt to connect to gRPC server
* @param {boolean} wasInError - flag to know if connection is attempted again after a failure; If yes, logging is disabled
* @param {string} address - address on which connection was attempted
*/
_logConnectionResponse(error, wasInError, address) {
_logConnectionResponse(error, address) {
if (error) {
if (!wasInError) {
log.error(`Connection to ${this._label} server (${address}) timeout`);
log.error(error.message);
}
log.error(`Connection to ${this._label} server (${address}) timeout`);
log.error(error.message);

this.connectionError = error;
this.isConnectionReady = false;
Expand Down

0 comments on commit 111cd8a

Please sign in to comment.