Skip to content

Commit

Permalink
(new) CookResource will retry failed connections before returning (#609)
Browse files Browse the repository at this point in the history
  • Loading branch information
EMaslowskiQ authored Aug 7, 2024
1 parent c95c0ec commit bd8da54
Showing 1 changed file with 40 additions and 23 deletions.
63 changes: 40 additions & 23 deletions server/job/impl/Cook/CookResource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,33 +44,50 @@ const supportedJobTypes: string[] = ['inspect','scene_generation','generate_down
const getCookResourceStatus = async (address: string, port: number): Promise<CookResourceState> => {
// example: http://si-3dcook02.us.sinet.si.edu:8000/machine
const endpoint = address+':'+port+'/machine';
const maxRetries: number = 3;

for (let attempt = 1; attempt <= maxRetries; attempt++) {
try {
LOG.info(`getCookResources getting status for resource: ${endpoint}.`,LOG.LS.eSYS);

// make our query to the resource and timeout after 5 seconds to minimize delays
const response: AxiosResponse | null = await axios.get(endpoint, { timeout: 5000 });
if (!response || response.status<200 || response.status>299) {
return {
success: false,
error: `${response?.status}: ${response?.statusText}`,
address };
}

try {
LOG.info(`getCookResources getting status for resource: ${endpoint}.`,LOG.LS.eSYS);
const data = await response.data;
const result: CookResourceState = {
success: true,
address,
weight: -1,
jobsCreated: data.jobs.created,
jobsWaiting: data.jobs.waiting,
jobsRunning: data.jobs.running,
};
return result;

} catch (error: any) {
const errorMessage = error.message ? error.message : JSON.stringify(error);
LOG.error(`getCookResources ${address} attempt ${attempt} failed with error: ${errorMessage}`, LOG.LS.eSYS);

if (attempt === maxRetries || !errorMessage.includes('getaddrinfo EAI_AGAIN')) {
return { success: false, error: errorMessage, address };
}

// make our query to the resource and timeout after 5 seconds to minimize delays
const response: AxiosResponse | null = await axios.get(endpoint, { timeout: 5000 });
if (!response || response.status<200 || response.status>299) {
return {
success: false,
error: `${response?.status}: ${response?.statusText}`,
address };
}
// Wait for a short delay before retrying
await new Promise(resolve => setTimeout(resolve, 1000));

const data = await response.data;
const result: CookResourceState = {
success: true,
address,
weight: -1,
jobsCreated: data.jobs.created,
jobsWaiting: data.jobs.waiting,
jobsRunning: data.jobs.running,
};
return result;

} catch (error: any) {
return { success: false, error: (error.message)?error.message:JSON.stringify(error), address, };
return { success: false, error: (error.message)?error.message:JSON.stringify(error), address, };
}
}

// if all retries failed, return failed state
LOG.error(`getCookResources ${address} maximum retries reached.`, LOG.LS.eSYS);
return { success: false, error: 'Max retries reached', address };
};
const verifyCookResourceCapability = (job: string, resource: DBAPI.CookResource): number => {
// check if the resource supports the given job type
Expand Down

0 comments on commit bd8da54

Please sign in to comment.