diff --git a/common/constants.ts b/common/constants.ts index e1e87c0c0..a4eb66f03 100644 --- a/common/constants.ts +++ b/common/constants.ts @@ -175,6 +175,7 @@ export enum eVocabularyID { eWorkflowTypeCookJob, eWorkflowTypeIngestion, eWorkflowTypeUpload, + eWorkflowTypeVerifier, // ADDED: EM (2023-01-28) eWorkflowStepTypeStart, eWorkflowEventIngestionUploadAssetVersion, eWorkflowEventIngestionIngestObject, diff --git a/server/cache/VocabularyCache.ts b/server/cache/VocabularyCache.ts index 255c5c378..89aed3ab3 100644 --- a/server/cache/VocabularyCache.ts +++ b/server/cache/VocabularyCache.ts @@ -306,6 +306,7 @@ export class VocabularyCache { case 'Cook Job': eVocabEnum = COMMON.eVocabularyID.eWorkflowTypeCookJob; break; case 'Ingestion': eVocabEnum = COMMON.eVocabularyID.eWorkflowTypeIngestion; break; case 'Upload': eVocabEnum = COMMON.eVocabularyID.eWorkflowTypeUpload; break; + case 'Verifier': eVocabEnum = COMMON.eVocabularyID.eWorkflowTypeVerifier; break; } } break; diff --git a/server/db/api/WorkflowReport.ts b/server/db/api/WorkflowReport.ts index 6dc093952..07a4a5b23 100644 --- a/server/db/api/WorkflowReport.ts +++ b/server/db/api/WorkflowReport.ts @@ -8,6 +8,7 @@ export class WorkflowReport extends DBC.DBObject implements idWorkflow!: number; MimeType!: string; Data!: string; + Name!: string | null; constructor(input: WorkflowReportBase) { super(input); @@ -18,7 +19,7 @@ export class WorkflowReport extends DBC.DBObject implements protected async createWorker(): Promise { try { - const { idWorkflow, MimeType, Data } = this; + const { idWorkflow, MimeType, Data, Name } = this; ({ idWorkflowReport: this.idWorkflowReport, idWorkflow: this.idWorkflow, MimeType: this.MimeType, Data: this.Data } = await DBC.DBConnection.prisma.workflowReport.create({ @@ -26,6 +27,7 @@ export class WorkflowReport extends DBC.DBObject implements Workflow: { connect: { idWorkflow }, }, MimeType, Data, + Name, }, })); return true; @@ -36,13 +38,14 @@ export class WorkflowReport extends DBC.DBObject implements protected async updateWorker(): Promise { try { - const { idWorkflowReport, idWorkflow, MimeType, Data } = this; + const { idWorkflowReport, idWorkflow, MimeType, Data, Name } = this; return await DBC.DBConnection.prisma.workflowReport.update({ where: { idWorkflowReport, }, data: { Workflow: { connect: { idWorkflow }, }, MimeType, Data, + Name, }, }) ? true : /* istanbul ignore next */ false; } catch (error) /* istanbul ignore next */ { diff --git a/server/db/prisma/schema.prisma b/server/db/prisma/schema.prisma index 5551f4a13..1be1a6bed 100644 --- a/server/db/prisma/schema.prisma +++ b/server/db/prisma/schema.prisma @@ -875,6 +875,7 @@ model WorkflowReport { idWorkflow Int MimeType String @mariasql.VarChar(256) Data String @mariasql.LongText + Name String? @mariasql.VarChar(255) Workflow Workflow @relation(fields: [idWorkflow], references: [idWorkflow], onDelete: NoAction, onUpdate: NoAction, map: "fk_workflowreport_workflow1") @@index([idWorkflow], map: "fk_workflowreport_workflow1") diff --git a/server/db/sql/models/Packrat.mwb b/server/db/sql/models/Packrat.mwb index b605adab4..38111f9ec 100644 Binary files a/server/db/sql/models/Packrat.mwb and b/server/db/sql/models/Packrat.mwb differ diff --git a/server/db/sql/scripts/Packrat.ALTER.sql b/server/db/sql/scripts/Packrat.ALTER.sql index 2f0cc8cd1..609781379 100644 --- a/server/db/sql/scripts/Packrat.ALTER.sql +++ b/server/db/sql/scripts/Packrat.ALTER.sql @@ -483,4 +483,8 @@ UPDATE Unit SET ARKPrefix = 'uj5' WHERE Abbreviation = 'OCIO'; -- 2022-11-11 Jon ALTER TABLE ModelSceneXref MODIFY COLUMN `NAME` varchar(512) DEFAULT NULL; --- 2022-11-11 Deployed to Staging and Production \ No newline at end of file +-- 2022-11-11 Deployed to Staging and Production + +-- 2023-01-29 Eric +INSERT INTO Vocabulary (idVocabularySet, SortOrder, Term) VALUES (22, 4, 'Verifier'); +ALTER TABLE WorkflowRecord ADD 'Name' varchar(255); \ No newline at end of file diff --git a/server/db/sql/scripts/Packrat.DATA.sql b/server/db/sql/scripts/Packrat.DATA.sql index 7f26bc1f9..f79c9ff22 100644 --- a/server/db/sql/scripts/Packrat.DATA.sql +++ b/server/db/sql/scripts/Packrat.DATA.sql @@ -190,6 +190,7 @@ INSERT INTO Vocabulary (idVocabularySet, SortOrder, Term) VALUES (23, 1, 'Ingest INSERT INTO Vocabulary (idVocabularySet, SortOrder, Term) VALUES (23, 2, 'Ingestion: Ingest Object'); INSERT INTO Vocabulary (idVocabularySet, SortOrder, Term) VALUES (22, 2, 'Ingestion'); INSERT INTO Vocabulary (idVocabularySet, SortOrder, Term) VALUES (22, 3, 'Upload'); +INSERT INTO Vocabulary (idVocabularySet, SortOrder, Term) VALUES (22, 4, 'Verifier'); INSERT INTO Vocabulary (idVocabularySet, SortOrder, Term) VALUES (18, 2, 'Image'); INSERT INTO Vocabulary (idVocabularySet, SortOrder, Term) VALUES (24, 1, 'mm'); INSERT INTO Vocabulary (idVocabularySet, SortOrder, Term) VALUES (24, 2, 'cm'); diff --git a/server/db/sql/scripts/Packrat.SCHEMA.sql b/server/db/sql/scripts/Packrat.SCHEMA.sql index 990d48c09..84d4b6ce3 100644 --- a/server/db/sql/scripts/Packrat.SCHEMA.sql +++ b/server/db/sql/scripts/Packrat.SCHEMA.sql @@ -626,6 +626,7 @@ CREATE TABLE IF NOT EXISTS `WorkflowReport` ( `idWorkflow` int(11) NOT NULL, `MimeType` varchar(256) NOT NULL, `Data` longtext NOT NULL, + `Name` varchar(256) DEFAULT NULL, PRIMARY KEY (`idWorkflowReport`) ) ENGINE=InnoDB DEFAULT CHARSET=UTF8MB4; diff --git a/server/http/index.ts b/server/http/index.ts index 3a127ac78..0b2927dfd 100644 --- a/server/http/index.ts +++ b/server/http/index.ts @@ -14,6 +14,8 @@ import { Downloader, download } from './routes/download'; import { errorhandler } from './routes/errorhandler'; import { WebDAVServer } from './routes/WebDAVServer'; +import * as VERIFIERS from './routes/verifiers'; + import express, { Request, Express, RequestHandler } from 'express'; import cors from 'cors'; import { ApolloServer } from 'apollo-server-express'; @@ -87,6 +89,10 @@ export class HttpServer { this.app.get(`${Downloader.httpRoute}*`, HttpServer.idRequestMiddleware2); this.app.get(`${Downloader.httpRoute}*`, download); + // endpoints for verifiers. + this.app.get('/verifier',VERIFIERS.routeRequest); // catch in case of misuse + this.app.get('/verifier/:id', VERIFIERS.routeRequest); + const WDSV: WebDAVServer | null = await WebDAVServer.server(); if (WDSV) { this.app.use(WebDAVServer.httpRoute, HttpServer.idRequestMiddleware2); diff --git a/server/http/routes/dataQueries.ts b/server/http/routes/dataQueries.ts new file mode 100644 index 000000000..6ce7e6a42 --- /dev/null +++ b/server/http/routes/dataQueries.ts @@ -0,0 +1,100 @@ +/* eslint-disable @typescript-eslint/no-unused-vars */ + +import { Request, Response } from 'express'; +import * as H from '../../utils/helpers'; +import * as LOG from '../../utils/logger'; +import * as DBAPI from '../../db'; + +import GraphQLApi from '../../graphql'; +import { GetSystemObjectDetailsInput, GetSystemObjectDetailsResult } from '../../types/graphql'; + +export async function routeRequest(request: Request, response: Response): Promise { + + const detailsToReturn = request.params.id; + console.warn(detailsToReturn+'|'+JSON.stringify(request.params)); + + // if nothing then complain + if(detailsToReturn===undefined) { + LOG.error('HTTP request: incorrect usage of endpoint', LOG.LS.eHTTP); + response.send('Request failed. Incorrect use of endpoint. Be sure to include what you are looking for'); + return; + } + + // handle the proper type + switch(detailsToReturn){ + case 'systemObject': { + return await getSystemObjectDetails(request,response); + } break; + + default: { + LOG.error(`HTTP request: unsupported request (${detailsToReturn})`, LOG.LS.eHTTP); + response.send(`Request failed. Unsupported request/path (${detailsToReturn})`); + } + } +} + +// convenience routine routine for getting system object details to be used with routes. +// NOTE: not connected as it should not be 'live' until an API is created and protected +async function getSystemObjectDetails(req: Request, response: Response): Promise { + // TODO: update to use direct call to getSystemObjectDetails (db/api/schema/systemobject/resolvers/queries/...) + + // grab our config options from query params + const subjectLimit: number = (req.query.limit)?parseInt(req.query.limit as string):10000; + const systemObjectId: number = (req.query.objectId)?parseInt(req.query.objectId as string):-1; + + // fetch all subjects from Packrat DB to get list of IDs + const systemObjects: DBAPI.SystemObject[] | null = await DBAPI.SystemObject.fetchAll(); /* istanbul ignore if */ + if (!systemObjects) { + sendResponseMessage(response,false,'could not get system objects from DB'); + return; + } + if(systemObjects.length<=0) { + sendResponseMessage(response,false,'no system objects found in DB'); + return; + } + LOG.info(`Getting SystemObject Details processing ${systemObjects.length} ids`,LOG.LS.eGQL); + + // loop through subjects, extract name, query from EDAN + const output: string[] = []; + for(let i=0; i=subjectLimit) break; + + const idSystemObject: number = systemObjects[i].fetchID(); + if(systemObjectId>0 && idSystemObject!=systemObjectId) continue; + + const input: GetSystemObjectDetailsInput = { + idSystemObject + }; + const graphQLApi = new GraphQLApi(true); + const results: GetSystemObjectDetailsResult = await graphQLApi.getSystemObjectDetails(input); + + // TODO: get asset details and inject into above results on 'asset' field + // getAssetDetailsForSystemObject() + + // store our results + output.push(H.Helpers.JSONStringify(results)); + + // break; + } + + // if we return the file then do so, overwriting any message + if(output.length>0) { + const name = 'SystemObjectDetails_'+new Date().toISOString().split('T')[0]; + response.setHeader('Content-disposition', `attachment; filename=${name}.json`); + response.set('Content-Type', 'text/json'); + response.statusMessage = 'Gathering system object details SUCCEEDED!'; + response.status(200).send(output.join('\n')); + return; + } + + const message = 'Getting system object details succeeded, but nothing to return.'; + LOG.info(message,LOG.LS.eGQL); + sendResponseMessage(response,true,message); + return; +} + +function sendResponseMessage(response: Response, success: boolean, message: string) { + LOG.error(`Getting data from database ${(success)?'SUCCEEDED':'FAILED'}: ${message}`, LOG.LS.eGQL); + response.send(`Getting data from database ${(success)?'SUCCEEDED':'FAILED'}: ${message}`); +} \ No newline at end of file diff --git a/server/http/routes/download.ts b/server/http/routes/download.ts index 28bf06bd6..c269e94cc 100644 --- a/server/http/routes/download.ts +++ b/server/http/routes/download.ts @@ -198,7 +198,19 @@ export class Downloader { const mimeType: string = WFReports[0].MimeType; const idWorkflowReport: number = WFReports[0].idWorkflowReport; - this.response.setHeader('Content-disposition', `inline; filename=WorkflowReport.${idWorkflowReport}.htm`); + // get/set our filename depending on if it's present or not + let filename: string = `WorkflowReport.${idWorkflowReport}`; + if(WFReports[0].Name) filename = WFReports[0].Name; + + // add our extension based on mimeType + switch(mimeType) { + case 'text/html': filename += '.htm'; break; + case 'text/csv': filename += '.csv'; break; + default: filename += '.htm'; break; + } + + // set our properties and configure the response + this.response.setHeader('Content-disposition', `inline; filename=${filename}`); if (mimeType) this.response.setHeader('Content-type', mimeType); let first: boolean = true; diff --git a/server/http/routes/verifiers.ts b/server/http/routes/verifiers.ts new file mode 100644 index 000000000..dd35fd065 --- /dev/null +++ b/server/http/routes/verifiers.ts @@ -0,0 +1,186 @@ +/* eslint-disable @typescript-eslint/no-unused-vars */ + +import { Request, Response } from 'express'; +import * as COL from '../../collections/interface/'; +import * as WF from '../../workflow/interface'; +import * as REP from '../../report/interface'; +import * as COMMON from '@dpo-packrat/common'; +import * as H from '../../utils/helpers'; +import * as LOG from '../../utils/logger'; +import * as WFV from '../../workflow/impl/Packrat/WorkflowVerifier'; +import * as DBAPI from '../../db'; +import { ASL, LocalStore } from '../../utils/localStore'; +import { RouteBuilder, eHrefMode } from '../../http/routes/routeBuilder'; + +export async function routeRequest(request: Request, response: Response): Promise { + + const verifierToRun = request.params.id; + + // if nothing then complain + if(verifierToRun===undefined) { + LOG.error('HTTP request: incorrect usage of endpoint', LOG.LS.eHTTP); + response.send('Request failed. Incorrect use of endpoint. Be sure to include which verifier to use.'); + return; + } + + // handle the proper type + switch(verifierToRun){ + case 'edan': { + return await verifyEdanWorkflow(request,response); + } break; + + default: { + LOG.error(`HTTP request: unsupported verify type (${verifierToRun})`, LOG.LS.eHTTP); + response.send(`Request failed. Unsupported verify type/path (${verifierToRun})`); + } break; + } +} + +// TODO: progressively build up report so that if requested before done it returns partial results +// requires changing verifier to append after each subject, connecting tightly to workflow logic +// TODO: fork verifier(s) so it does not use the same event loop as the main server improving performance +// https://nodejs.org/api/child_process.html +// TODO: support server side events (SSE) to provide notifications to client on progress +async function verifyEdanWorkflow(req: Request, response: Response): Promise { + LOG.info('(Workflows) Verifying EDAN Records from endpoint...', LOG.LS.eGQL); + + const workflowEngine: WF.IWorkflowEngine | null = await WF.WorkflowFactory.getInstance(); + if (!workflowEngine) { + const error: string = 'verifiers createWorkflow could not load WorkflowEngine'; + sendResponseMessage(response,false,error); + return; + } + + // grab our local store and user ID + const LS: LocalStore | undefined = ASL.getStore(); + const idUser: number | undefined | null = LS?.idUser; + + // create our workflow (but don't start) and add it to the DB + const wfParams: WF.WorkflowParameters = { + eWorkflowType: COMMON.eVocabularyID.eWorkflowTypeVerifier, + //idSystemObject: undefined, // not operating on SystemObjects + //idProject: TODO: populate with idProject + idUserInitiator: idUser ?? undefined, // not getting user at this point (but should when behind wall) + autoStart: false // don't start the workflow because we need to configure it + }; + const workflow: WF.IWorkflow | null = await workflowEngine.create(wfParams); + if (!workflow) { + const error: string = `unable to create EDAN Verifier workflow: ${H.Helpers.JSONStringify(wfParams)}`; + sendResponseMessage(response,false,error); + return; + } + + // grab our config options from query params + const returnFile: boolean = req.query.returnFile==='true'?true:false; + const detailedLogs: boolean = req.query.details==='true'?true:false; + const subjectLimit: number = (req.query.limit)?parseInt(req.query.limit as string):10000; + const systemObjectId: number = (req.query.objectId)?parseInt(req.query.objectId as string):-1; + + // cast it to our verifier type (TODO: catch fails) and configure + const verifierWorkflow = workflow as WFV.WorkflowVerifier; + verifierWorkflow.config = { + collection: COL.CollectionFactory.getInstance(), + detailedLogs, + subjectLimit, + systemObjectId + }; + + // start our workflow + // TODO: check during execution for it timing out + const workflowResult: H.IOResults = await verifierWorkflow.start(); + if(!workflowResult || workflowResult.success===false) { + const error: string = 'EDAN Verifier workflow failed to start. '+workflowResult?.error; + sendResponseMessage(response,false,error); + return; + } + + // get/create our report + const iReport: REP.IReport | null = await REP.ReportFactory.getReport(); + if(!iReport) { + const error: string = 'EDAN Verifier workflow failed to get report.'; + LOG.error(error, LOG.LS.eGQL); + response.send('Verifing EDAN records FAILED!\n'+error); + return; + } + + // get the local store, our current report ID and fetch it + // WHY: can the IReport higher up expose the id for fetch or grabbing the ID? + // const LS: LocalStore = await ASL.getOrCreateStore(); + const idWorkflowReport: number | undefined = LS?.getWorkflowReportID(); + if (!idWorkflowReport) { + const error: string = 'could not get workflow report ID'; + sendResponseMessage(response,false,error); + return; + } + + // get our report from the DB and configure + const workflowReport = await DBAPI.WorkflowReport.fetch(idWorkflowReport); + if (!workflowReport) { + sendResponseMessage(response,false,`unable to fetch report with id ${idWorkflowReport}`); + return; + } + + // if we have CSV output add it to our report + if(verifierWorkflow.result && verifierWorkflow.result.csvOutput) { + + // our desired filename + const now: string = new Date().toISOString().split('T')[0]; + + workflowReport.MimeType = 'text/csv'; + workflowReport.Data = verifierWorkflow.result.csvOutput; + workflowReport.Name = 'EDANVerifier_Results_'+now; + workflowReport.update(); + } else { + const error: string = 'Error with verifier result'; + sendResponseMessage(response,false,`unable to fetch report with id ${idWorkflowReport}`); + workflowReport.Data = error; + workflowReport.update(); + return; + } + + // if we return the file then do so, overwriting any message + if(returnFile && verifierWorkflow.result.csvOutput!=undefined) { + response.setHeader('Content-disposition', `attachment; filename=${workflowReport.Name}.csv`); + response.set('Content-Type', 'text/csv'); + response.statusMessage = 'Verifying EDAN records SUCCEEDED!'; + response.status(200).send(verifierWorkflow.result.csvOutput); + return; + } + + // create our download URL for future use. (NOTE: using HTTP so localhost works) + const workflowReportURL: string = RouteBuilder.DownloadWorkflowReport(idWorkflowReport,eHrefMode.ePrependServerURL); //`http://localhost:4000/download?idWorkflowReport=${idWorkflowReport}`; + LOG.info(`EDAN verifier SUCCEEDED!! (${workflowReportURL})`,LOG.LS.eGQL); + sendResponseMessage(response,true,getResponseMarkup(true,'Download Report',workflowReportURL));//`DOWNLOAD`); + + return; +} + +function sendResponseMessage(response: Response, success: boolean, message: string) { + if(success) { + LOG.info(`EDAN Verifier SUCCEEDED: ${message}`, LOG.LS.eGQL); + response.send(message); + } else { + LOG.error(`EDAN Verifier FAILED: ${message}`, LOG.LS.eGQL); + response.send(`Verifing EDAN records FAILED: ${message}`); + } +} + +function getResponseMarkup(success: boolean, message?: string, url?: string): string { + let output = ''; + output += '
'; + output += '
'; + output += ''; + + output += '
'; + if(success) { + output += `${message}`; + } else { + output += 'ERROR'; + output += `

${message}

`; + } + output += '
'; + + output += '
'; + output += '
'; + return output; +} \ No newline at end of file diff --git a/server/report/interface/ReportFactory.ts b/server/report/interface/ReportFactory.ts index 82bbdb4ee..9936e35e1 100644 --- a/server/report/interface/ReportFactory.ts +++ b/server/report/interface/ReportFactory.ts @@ -28,7 +28,8 @@ export class ReportFactory { idWorkflow, MimeType: 'text/html', Data: '', - idWorkflowReport: 0 + idWorkflowReport: 0, + Name: null }); if (!await workflowReport.create()) { LOG.error(`ReportFactory.getReport() unable to create WorkflowReport for workflow with ID ${JSON.stringify(idWorkflow)}`, LOG.LS.eRPT); diff --git a/server/tests/cache/VocabularyCache.test.ts b/server/tests/cache/VocabularyCache.test.ts index ebee7a091..4a7be612d 100644 --- a/server/tests/cache/VocabularyCache.test.ts +++ b/server/tests/cache/VocabularyCache.test.ts @@ -237,6 +237,7 @@ function vocabularyCacheTestWorker(eMode: eCacheTestMode): void { case COMMON.eVocabularyID.eWorkflowTypeCookJob: testVocabulary(vocabulary, 'Cook Job'); break; case COMMON.eVocabularyID.eWorkflowTypeIngestion: testVocabulary(vocabulary, 'Ingestion'); break; case COMMON.eVocabularyID.eWorkflowTypeUpload: testVocabulary(vocabulary, 'Upload'); break; + case COMMON.eVocabularyID.eWorkflowTypeVerifier: testVocabulary(vocabulary, 'Verifier'); break; case COMMON.eVocabularyID.eWorkflowStepTypeStart: testVocabulary(vocabulary, 'Start'); break; @@ -795,6 +796,7 @@ function vocabularyCacheTestWorker(eMode: eCacheTestMode): void { expect(await VocabularyCache.isVocabularyInSet(COMMON.eVocabularyID.eWorkflowTypeCookJob, COMMON.eVocabularySetID.eWorkflowType)).toBeTruthy(); expect(await VocabularyCache.isVocabularyInSet(COMMON.eVocabularyID.eWorkflowTypeIngestion, COMMON.eVocabularySetID.eWorkflowType)).toBeTruthy(); expect(await VocabularyCache.isVocabularyInSet(COMMON.eVocabularyID.eWorkflowTypeUpload, COMMON.eVocabularySetID.eWorkflowType)).toBeTruthy(); + expect(await VocabularyCache.isVocabularyInSet(COMMON.eVocabularyID.eWorkflowTypeVerifier, COMMON.eVocabularySetID.eWorkflowType)).toBeTruthy(); expect(await VocabularyCache.isVocabularyInSet(COMMON.eVocabularyID.eWorkflowStepTypeStart, COMMON.eVocabularySetID.eWorkflowStepWorkflowStepType)).toBeTruthy(); expect(await VocabularyCache.isVocabularyInSet(COMMON.eVocabularyID.eWorkflowEventIngestionUploadAssetVersion, COMMON.eVocabularySetID.eWorkflowEvent)).toBeTruthy(); diff --git a/server/tests/collections/EdanCollection.test.ts b/server/tests/collections/EdanCollection.test.ts index 982a73cde..311ef61d8 100644 --- a/server/tests/collections/EdanCollection.test.ts +++ b/server/tests/collections/EdanCollection.test.ts @@ -4,6 +4,7 @@ import * as COL from '../../collections/interface/'; import * as LOG from '../../utils/logger'; import * as H from '../../utils/helpers'; import * as L from 'lodash'; +import * as V from '../../utils/verifiers/EdanVerifier'; // import { join } from 'path'; afterAll(async done => { @@ -31,6 +32,7 @@ enum eTestType { eScrapeEDANListsIDs, eScrapeEDAN, eOneOff, + eEDANVerifier, } const eTYPE: eTestType = +eTestType.eRegressionSuite; // + needed here so that compiler stops thinking eTYPE has a type of eTestType.eRegressionSuite! @@ -112,10 +114,13 @@ describe('Collections: EdanCollection', () => { await scrapeEdan(ICol, 'd:\\Work\\SI\\EdanScrape.EDAN.txt', 0); }); break; + + case eTestType.eEDANVerifier: + executeEdanVerifier(ICol); + break; } }); - function executeTestQuery(ICol: COL.ICollection, query: string, expectNoResults: boolean, searchCollections: boolean = true, edanRecordType: string = ''): void { test('Collections: EdanCollection.queryCollection ' + query, async () => { @@ -674,7 +679,6 @@ async function scrapeDPOEdanMDMWorker(ICol: COL.ICollection, IDLabelSet: Set, records: EdanResult[]): Promise { // #region vz_migration @@ -2443,6 +2447,7 @@ async function scrapeDPOEdanListIDsWorker(ICol: COL.ICollection, IDLabelSet: Set await handleResultsWithIDs(ICol, 'edanmdm:nmah_1289214', '907', IDLabelSet, records); await handleResultsWithIDs(ICol, 'edanmdm:nmah_1289216', '907', IDLabelSet, records); } +// #endregion // #region Handle Results async function handleResultsEdanLists(ICol: COL.ICollection, WS: NodeJS.WritableStream | null, query: string, id: string, unitFilter?: string | undefined): Promise { @@ -2550,4 +2555,33 @@ async function handle3DContentQuery(ICol: COL.ICollection, _WS: NodeJS.WritableS LOG.error(`Content Query ${id ? id : ''}${url ? url : ''} [${queryID}] failed`, LOG.LS.eTEST); return false; } +// #endregion + +//---------------------------------------------------------------- +// #region EDAN Verifier +function executeEdanVerifier(ICol: COL.ICollection) { + + jest.setTimeout(3000000); + // HACK: to simplify calling the verifier and satisfying JEST requirements + test.only('Collections: EdanCollection.fieldCompare', async () => { + const verifierConfig: V.EdanVerifierConfig = { + collection: ICol, + subjectLimit: 1, + // detailedLogs: true, + // writeToFile: '../../EDAN-Verifier_Output.csv' + }; + const verifier: V.EdanVerifier = new V.EdanVerifier(verifierConfig); + const result: V.EdanVerifierResult = await verifier.verify(); + expect(result).toBeTruthy(); + }); + + //return true; +} + +// #endregion + +//---------------------------------------------------------------- +// #region DB verifier + + // #endregion \ No newline at end of file diff --git a/server/tests/db/dbcreation.test.ts b/server/tests/db/dbcreation.test.ts index c5b22da1e..27e9eb8c0 100644 --- a/server/tests/db/dbcreation.test.ts +++ b/server/tests/db/dbcreation.test.ts @@ -1695,7 +1695,8 @@ describe('DB Creation Test Suite', () => { idWorkflow: workflow.idWorkflow, MimeType: 'test/mimetype', Data: 'WorkflowReport test', - idWorkflowReport: 0 + idWorkflowReport: 0, + Name: null }); expect(workflowReport).toBeTruthy(); }); diff --git a/server/utils/verifiers/EdanVerifier.ts b/server/utils/verifiers/EdanVerifier.ts new file mode 100644 index 000000000..23ca7797f --- /dev/null +++ b/server/utils/verifiers/EdanVerifier.ts @@ -0,0 +1,426 @@ +import * as COL from '../../collections/interface'; +import * as LOG from '../logger'; +import * as DBAPI from '../../db'; +import * as V from './VerifierBase'; + +export type EdanVerifierConfig = { + collection: COL.ICollection; + detailedLogs?: boolean | undefined; // do we want to output detailed debug logs + logPrefix?: string | undefined; // what should logs be prefixed with + fixErrors?: boolean | undefined; // do we try to fix errors (todo) + subjectLimit?: number | undefined; // total number of subjects to process + systemObjectId?: number | undefined; // limit execution to this specific SystemObject + writeToFile?: string | undefined; // should we dump the output to a specific path +}; +export type EdanVerifierResult = { + success: boolean; + csvOutput?: string | undefined; +}; + +//---------------------------------------------------------------- +// EDAN VERIFIER +//---------------------------------------------------------------- +// [DPO3DPKRT-698] +// TODO: ocassional failed fetch requests (ECONNRESET). make more robust +// TODO: verify EDAN scene UUID are correct +// TODO: verify EDAN 3d_package match the most recent published version +// TODO: hook to endpoint and support running on a schedule via node_scheduler* +// TODO: add to workflow reports* +// TODO: fix DPO subject errors +export class EdanVerifier extends V.VerifierBase { + + protected config: EdanVerifierConfig; + + constructor(verifierConfig: EdanVerifierConfig){ + super(); + + this.config = verifierConfig; + this.config.collection = verifierConfig.collection; + + if(this.config.detailedLogs === undefined) + this.config.detailedLogs = false; + if(this.config.logPrefix === undefined) + this.config.logPrefix = 'EDAN Verifier'; + if(this.config.fixErrors === undefined) + this.config.fixErrors = false; + if(this.config.subjectLimit === undefined) + this.config.subjectLimit = 10000; // total number of subjects to process + if(this.config.systemObjectId === undefined) + this.config.systemObjectId = -1; // limit execution to this specific SystemObject + } + + public async verify(): Promise { + // our structure and header for saved output to CSV + const output: string[] = []; + output.push('ID,MDM,URL,SUBJECT,STATUS,TEST,DESCRIPTION,PACKRAT,EDAN,NOTES'); + + // fetch all subjects from Packrat DB + const subjects: DBAPI.Subject[] | null = await DBAPI.Subject.fetchAll(); /* istanbul ignore if */ + if (!subjects) { + LOG.error(`${this.config.logPrefix} could not get subjects from DB`, LOG.LS.eSYS); + return { success: false }; + } + if(subjects.length<=0) { + LOG.error(`${this.config.logPrefix} no subjects found in DB`, LOG.LS.eSYS); + return { success: false }; + } + if(this.config.detailedLogs) + LOG.info(`${this.config.logPrefix} Subjects: ${subjects.length}`, LOG.LS.eSYS); + + // loop through subjects, extract name, query from EDAN + for(let i=0; i=this.config.subjectLimit) break; + + // create our stats object and helper variable + const subject = subjects[i]; + + // get our system object for the subject to help with logging and identification + const systemObject: DBAPI.SystemObject | null = await subject.fetchSystemObject(); + if(!systemObject){ + LOG.error(`could not get SystemObject from subject. (id: ${subject.idSubject} | subject: ${subject.Name})`, LOG.LS.eSYS); + continue; + } + if(this.config.detailedLogs) + LOG.info(`${this.config.logPrefix} SystemObject: ${JSON.stringify(systemObject)}`, LOG.LS.eSYS); + + // (debug) if not the desired subject id skip + if(this.config.systemObjectId && this.config.systemObjectId>0){ + if(systemObject.idSystemObject!==this.config.systemObjectId) { + if(this.config.detailedLogs) + LOG.info(`${this.config.logPrefix} Subject skipping. IDs don not match.`, LOG.LS.eSYS); + continue; + } + } + + LOG.info(`${this.config.logPrefix} processing subject: ${subject.Name}`, LOG.LS.eSYS); + if(this.config.detailedLogs) + LOG.info(`${this.config.logPrefix} Subject:\t ${JSON.stringify(subject,null,0)}`, LOG.LS.eSYS); + + // get our subject's unit from Packrat DB + const packratUnit: DBAPI.Unit | null = await this.getSubjectUnit(subject); + if(!packratUnit) { + LOG.error(`${this.config.logPrefix} could not find a unit for subject. skipping... (id: ${systemObject.idSystemObject} | subject: ${subject.Name})`, LOG.LS.eSYS); + continue; + } + if(this.config.detailedLogs) + LOG.info(`${this.config.logPrefix} Packrat Units: ${JSON.stringify(packratUnit,null,0)}`, LOG.LS.eSYS); + + // get our subject's identifiers, details, and SystemObject id + const packratIdentifiers: V.IdentifierList | null = await this.getSubjectIdentifiers(subject,systemObject); + if(!packratIdentifiers) { + LOG.error(`${this.config.logPrefix} could not get identifiers for subject. skipping... (id: ${systemObject.idSystemObject} | subject: ${subject.Name})`, LOG.LS.eSYS); + continue; + } + if(this.config.detailedLogs) + LOG.info(`${this.config.logPrefix} Packrat Identifiers: ${JSON.stringify(packratIdentifiers,null,0)}`, LOG.LS.eSYS); + + // if we have an EDAN (should 100% be true) then determine if from DPO or not + // defaults to all subjects coming from EDAN + let isDPOSubject: boolean = false; + if(packratIdentifiers.edan && packratIdentifiers.edan.identifier) { + isDPOSubject = this.isIdentifierFromDPO(packratIdentifiers.edan.identifier); + } else { + LOG.error(`${this.config.logPrefix} could not get EDAN ID for subject. source of subject is ambiguous. (id: ${systemObject.idSystemObject} | subject: ${subject.Name})`, LOG.LS.eSYS); + } + + // get our packrat name + const packratName: string = subject.Name; + + // if we have an EDAN id we use it + let query: string = ''; + if(packratIdentifiers.edan?.identifier) + query = packratIdentifiers.edan.identifier.IdentifierValue; + else if(packratIdentifiers.ark?.identifier) + query = packratIdentifiers.ark.identifier.IdentifierValue; + else if(packratIdentifiers.preferred?.identifier) + query = packratIdentifiers.preferred.identifier.IdentifierValue; + else { + LOG.error(`${this.config.logPrefix} no good options for querying EDAN for subject. skipping... (id: ${systemObject.idSystemObject} | subject: ${subject.Name})`, LOG.LS.eSYS); + continue; + } + if(this.config.detailedLogs) + LOG.info(`${this.config.logPrefix} Query: ${query}`, LOG.LS.eSYS); + + // query EDAN with our identifier and skip if nothing returned + const options: COL.CollectionQueryOptions | null = { + recordType: 'edanmdm', + // gatherRaw: true, + gatherIDMap: true, + }; + const results: COL.CollectionQueryResults | null = await this.config.collection.queryCollection(query, 10, 0, options); + if(!results || results.records.length<=0) { + LOG.error(`${this.config.logPrefix} did not receive records for subject and identifier. skipping... (id: ${systemObject.idSystemObject} | subject: ${subject.Name} | query: ${query})`, LOG.LS.eSYS); + continue; + } + if(this.config.detailedLogs) + LOG.info(`${this.config.logPrefix} EDAN Results: ${JSON.stringify(results,null,0)}`, LOG.LS.eSYS); + + // structure to hold our units/identifiers + let edanUnit: DBAPI.Unit | null = null; + let edanIdentifiers: V.IdentifierList | null = null; + let edanName: string = ''; + + // if we received multipled records from EDAN, complain but keep going (compares will place subject in output) + if(results.records.length==1) { + const record = results.records[0]; + + // get our EDAN units from this record + const unit: DBAPI.Unit | null = await this.getUnitFromEdanUnit(record.unit); + if(!unit) + LOG.error(`${this.config.logPrefix} no known units found for subject (id: ${systemObject.idSystemObject} | subject: ${subject.Name}) and EDAN record name (${record.name})`, LOG.LS.eSYS); + else + edanUnit = unit; + if(this.config.detailedLogs) + LOG.info(`${this.config.logPrefix} EDAN Units: ${JSON.stringify(unit,null,0)}`, LOG.LS.eSYS); + + // get our EDAN identifiers from this record + const identifiers: V.IdentifierList | null = await this.getEdanRecordIdentifiers(record); + if(!identifiers) + LOG.error(`${this.config.logPrefix} no EDAN identifiers found for subject (id: ${systemObject.idSystemObject} | subject: ${subject.Name}) and EDAN record name (${record.name})`, LOG.LS.eSYS); + else + edanIdentifiers = identifiers; + if(this.config.detailedLogs) + LOG.info(`${this.config.logPrefix} EDAN Ids: ${JSON.stringify(identifiers,null,0)}`, LOG.LS.eSYS); + + // get our name + edanName = record.name; + + } else { + LOG.error(`${this.config.logPrefix} received multiple records from EDAN when expecting 1. skipping... (id: ${systemObject.idSystemObject} | subject: ${subject.Name} | query: ${query})`, LOG.LS.eSYS); + } + + // a structure to hold our output + const outputPrefix = `${systemObject.idSystemObject},${packratIdentifiers.edan?.identifier?.IdentifierValue},${this.getSystemObjectDetailsURL(systemObject)},${JSON.stringify(subject.Name)},`; + + // Compare: Name + if(packratName!=edanName) { + LOG.error(`${this.config.logPrefix} Subject name in Packrat and EDAN are not the same (id:${systemObject.idSystemObject} | Packrat:${packratName} | EDAN:${edanName})`, LOG.LS.eSYS); + let str = outputPrefix; + str += 'error,'; + str += 'name,'; + str += 'Subject name not the same,'; + str += '"' + packratName + '",'; + str += '"' + edanName + '",'; + str += ((isDPOSubject)?'DPO created subject. needs manual fix':'EDAN subject. needs manual fix.')+','; + output.push(str); + } else { + LOG.info(`${this.config.logPrefix} name compare succeeded!`, LOG.LS.eSYS); + let str = outputPrefix; + str += 'success,'; + str += 'name,'; + str += ','; + str += '"' + packratName + '",'; + str += '"' + edanName + '",'; + str += ','; + output.push(str); + } + + // Compare: Units + if(packratUnit && edanUnit) { + // if both present then we check the value match + let foundUnit: boolean = false; + if(edanUnit.idUnit===packratUnit.idUnit) { + foundUnit = true; + } + + // if we couldn't find the unit, add error otherwise success + if(!foundUnit) { + LOG.error(`${this.config.logPrefix} Packrat unit does not match EDAN units (id:${systemObject.idSystemObject} | subject:${subject.Name})`, LOG.LS.eSYS); + let str = outputPrefix; + str += 'error,'; + str += 'units,'; + str += 'Packrat unit does not match EDAN units,'; + str += '"' + packratUnit.Abbreviation + ' - ' + packratUnit.Name + '",'; + str += '"' + edanUnit.Abbreviation + ' - ' + edanUnit.Name +'",'; + + // apply edan unit packrat subject + if(isDPOSubject) { + str += 'DPO subject. needs manual fix to update EDAN unit info'; + } else { + if(this.config.fixErrors) { + const replaceUnitResult: boolean = await this.replacePackratUnit(packratUnit,edanUnit); + if(!replaceUnitResult) { + LOG.error(`${this.config.logPrefix} failed to update Packrat unit to match EDAN (id:${systemObject.idSystemObject} | subject:${subject.Name})`, LOG.LS.eSYS); + str += 'EDAN subject. failed to automatic update. check logs or do manually'; + } else { + LOG.info(`${this.config.logPrefix} successfully updated Packrat unit to match EDAN (id:${systemObject.idSystemObject} | subject:${subject.Name})`, LOG.LS.eSYS); + str += 'EDAN subject. updated Packrat unit to match EDAN'; + } + } else { + str += 'EDAN subject. automatic updating disabled. rerun or manual fix required'; + } + } + + // store for output + output.push(str); + } else { + LOG.info(`${this.config.logPrefix} Unit compare succeeded! (id:${systemObject.idSystemObject} | subject:${subject.Name})`, LOG.LS.eSYS); + let str = outputPrefix; + str += 'success,'; + str += 'units,'; + str += ','; + str += '"' + packratUnit.Abbreviation + ' - ' + packratUnit.Name + '",'; + str += '"' + edanUnit.Abbreviation + ' - ' + edanUnit.Name + '",'; + output.push(str); + } + } else if(packratUnit && !edanUnit) { + LOG.error(`${this.config.logPrefix} Packrat unit not found in EDAN record (id:${systemObject.idSystemObject} | subject:${subject.Name} | packrat:${JSON.stringify(packratUnit)})`, LOG.LS.eSYS); + let str = outputPrefix; + str += 'error,'; + str += 'units,'; + str += 'Packrat unit not found in EDAN,'; + str += '"' + packratUnit.Abbreviation + ' - ' + packratUnit.Name + '",'; + str += 'null,'; + str += ((isDPOSubject)?'DPO created subject. needs manual fix to apply to EDAN.':'EDAN subject. needs manual fix because EDAN should have unit assigned. (todo)')+','; + output.push(str); + } else if(!packratUnit && edanUnit) { + LOG.error(`${this.config.logPrefix} EDAN unit not found in Packrat (id:${systemObject.idSystemObject} | subject:${subject.Name} | EDAN:${edanUnit.Name})`, LOG.LS.eSYS); + let str = outputPrefix; + str += 'error,'; + str += 'units,'; + str += 'EDAN unit not found in Packrat,'; + str += 'null,'; + str += '"' + edanUnit.Abbreviation + ' - ' + edanUnit.Name +'",'; + + // apply edan unit to packrat unit (creating new unit in process) + if(isDPOSubject) { + str += 'DPO subject. needs manual fix as Packrat should have unit'; + } else { + if(this.config.fixErrors) { + const replaceUnitResult: boolean = await this.replacePackratUnit(packratUnit,edanUnit); + if(!replaceUnitResult) { + LOG.error(`${this.config.logPrefix} failed to update Packrat unit to match EDAN (id:${systemObject.idSystemObject} | subject:${subject.Name})`, LOG.LS.eSYS); + str += 'EDAN subject. failed to automatic update. check logs or do manually'; + } else { + LOG.info(`${this.config.logPrefix} successfully updated Packrat unit to match EDAN (id:${systemObject.idSystemObject} | subject:${subject.Name})`, LOG.LS.eSYS); + str += 'EDAN subject. updated Packrat unit to match EDAN'; + } + } else { + str += 'EDAN subject. automatic updating disabled. rerun or manual fix required'; + } + } + + // store for output + output.push(str); + + } else { + LOG.error(`${this.config.logPrefix} Packrat & EDAN units not found (id:${systemObject.idSystemObject} | subject:${subject.Name})`, LOG.LS.eSYS); + let str = outputPrefix; + str += 'error,'; + str += 'units,'; + str += 'Packrat and EDAN units not,'; + str += 'null,'; + str += 'null,'; + str += 'needs manual fix as neither source has a unit,'; + output.push(str); + } + + // Compare: Identifiers + if(packratIdentifiers && edanIdentifiers) { + let idMismatch: boolean = false; + + // figure out how many identifiers edan has + const edanIdCount = edanIdentifiers.details.length; + + // build our list of identifiers + // TODO: clean strings for CSV output + const strPackratIds: string[] = (packratIdentifiers.details)?(packratIdentifiers.details?.map(id=>{ return id.identifier?.IdentifierValue+' ('+id.identifierType?.Term+')'; })):([]); + const strEdanIds: string[] = []; + for(const id of edanIdentifiers.details) { + strEdanIds.push(id.identifier?.IdentifierValue+' ('+id.identifierType?.Term+')'); + } + + // see if we have the same count for a quick catch of difference, otherwise do deeper compare + if(packratIdentifiers.details?.length != edanIdCount) { + idMismatch = true; + } else { + // cycle through all edan identifiers and look for match in packrat + let didFindId = false; + for(const id of edanIdentifiers.details) { + for(const packratId of packratIdentifiers.details) { + if(packratId.identifierTypeEnum===id.identifierTypeEnum && + packratId.identifier?.IdentifierValue===id.identifier?.IdentifierValue) { + didFindId = true; + break; + } + } + } + if(!didFindId) idMismatch = true; + } + + // if we have a mismatch then output + if(idMismatch) { + // log error and details + LOG.error(`${this.config.logPrefix} Packrat has different identifiers than EDAN (id:${systemObject.idSystemObject} | subject:${subject.Name})`, LOG.LS.eSYS); + LOG.error(`${this.config.logPrefix} \tPackrat: `+strPackratIds.sort().join(','), LOG.LS.eSYS); + LOG.error(`${this.config.logPrefix} \t EDAN: `+strEdanIds, LOG.LS.eSYS); + + // if we're an EDAN subject then we need to fix the situation + if(!isDPOSubject) { + // TODO: wipe out Packrat identifiers and replace with EDAN modifiers + const replaceIdsResult: boolean = await this.replacePackratIdentifiers(packratIdentifiers,edanIdentifiers,systemObject); + if(!replaceIdsResult) + LOG.error(`${this.config.logPrefix} could not replace Packrat identifiers. function not finished.`, LOG.LS.eSYS); + } + + // build our output string + let str = outputPrefix; + str += 'error,'; + str += 'identifiers,'; + str += 'Packrat identifiers do not match EDAN,'; + str += '"'+strPackratIds.sort().join('\n')+'",'; + str += '"'+strEdanIds.sort().join('\n')+'",'; + + // apply edan unit to packrat unit (creating new unit in process) + if(isDPOSubject) { + str += 'DPO subject. needs manual fix'; + } else { + if(this.config.fixErrors) { + const replaceUnitResult: boolean = await this.replacePackratIdentifiers(packratIdentifiers,edanIdentifiers,systemObject); + if(!replaceUnitResult) { + LOG.error(`${this.config.logPrefix} failed to update Packrat identifiers to match EDAN (id:${systemObject.idSystemObject} | subject:${subject.Name})`, LOG.LS.eSYS); + str += 'EDAN subject. failed to automatic update. check logs or do manually'; + } else { + LOG.info(`${this.config.logPrefix} successfully updated Packrat identifiers to match EDAN (id:${systemObject.idSystemObject} | subject:${subject.Name})`, LOG.LS.eSYS); + str += 'EDAN subject. updated Packrat identifiers to match EDAN'; + } + } else { + str += 'EDAN subject. automatic updating disabled. rerun or manual fix required'; + } + } + + // store for output + output.push(str); + + } else { + LOG.info(`${this.config.logPrefix} Identifier compare succeeded! (id:${systemObject.idSystemObject} | subject:${subject.Name})`, LOG.LS.eSYS); + let str = outputPrefix; + str += 'success,'; + str += 'identifiers,'; + str += ','; + str += '"'+strPackratIds.sort().join('\n')+'",'; + str += '"'+strEdanIds.sort().join('\n')+'",'; + str += ','; + output.push(str); + } + } else if(packratIdentifiers && !edanIdentifiers) { + // TODO + } else if(!packratIdentifiers && edanIdentifiers) { + // TODO + } + } + + // HACK: dumping to local file until moved into Workflow reports. + if(this.config.writeToFile !== undefined) { + require('fs').writeFile(this.config.writeToFile,output.join('\n'), err=>{ + if(err) { + LOG.error(`${this.config.logPrefix}: ${err}`, LOG.LS.eSYS); + } + }); + } + + return { success: true, csvOutput: output.join('\n') }; + } +} \ No newline at end of file diff --git a/server/utils/verifiers/VerifierBase.ts b/server/utils/verifiers/VerifierBase.ts new file mode 100644 index 000000000..b21176c6f --- /dev/null +++ b/server/utils/verifiers/VerifierBase.ts @@ -0,0 +1,286 @@ +import * as COL from '../../collections/interface'; +import * as LOG from '../logger'; +import * as DBAPI from '../../db'; +import * as CACHE from '../../cache'; +import * as COMMON from '@dpo-packrat/common'; +// import * as H from '../helpers'; + +export type IdentifierDetails = { + identifier: DBAPI.Identifier | null; + identifierType: DBAPI.Vocabulary | null; + identifierTypeEnum: COMMON.eVocabularyID | null; +}; + +export type IdentifierList = { + preferred: IdentifierDetails | null; + edan: IdentifierDetails | null; + ark: IdentifierDetails | null; + details: IdentifierDetails[]; // complete list of all identifiers +}; + +export class VerifierBase { + + constructor() {} + + protected async getIdentifierType(identifier: DBAPI.Identifier): Promise { + + // grab the identifier type object from the Packrat DB + // TODO: use Vocabulary.CACHE to reduce DB hits + const identifierType: DBAPI.Vocabulary | null = await DBAPI.Vocabulary.fetch(identifier.idVIdentifierType); + if(!identifierType){ + LOG.error(`could not find identifier type in DB (identifier: ${identifier.idVIdentifierType} )`, LOG.LS.eSYS); + return null; + } + + // pull from enumeration from the CACHE (vocabulary id -> enum) + const identifierTypeEnum: COMMON.eVocabularyID | undefined = await CACHE.VocabularyCache.vocabularyIdToEnum(identifier.idVIdentifierType); + if(identifierTypeEnum===undefined){ + LOG.error(`could not find enumerator for identifier type (${identifier.idVIdentifierType}) in Cache`, LOG.LS.eSYS); + return null; + } + + return { identifier, identifierType, identifierTypeEnum }; + } + + protected async getSubjectIdentifiers(subject: DBAPI.Subject, systemObject: DBAPI.SystemObject): Promise { + + // structure to hold our results + const result: IdentifierList = { preferred: null, edan: null, ark: null, details: [] }; + + // grab the preferred identifier, if nothing then leave null so calling can decide action + if(subject.idIdentifierPreferred) { + const preferredIdentifier: DBAPI.Identifier | null = await DBAPI.Identifier.fetch(subject.idIdentifierPreferred); + if(!preferredIdentifier){ + LOG.error(`subject's preferredId not found in the DB (id: ${subject.idSubject} | subject: ${subject.Name})`, LOG.LS.eSYS); + // subjectStats[i].isValid = false; + } else { + // grab our identifier details (type) and store it + const preferredIdentifierDetails: IdentifierDetails | null = await this.getIdentifierType(preferredIdentifier); + if(preferredIdentifierDetails) { + result.preferred = preferredIdentifierDetails; + result.details.push(preferredIdentifierDetails); + } + + // check for edan/ark and store as appropriate because the preferred id may be ARK too + switch(preferredIdentifierDetails?.identifierTypeEnum){ + case COMMON.eVocabularyID.eIdentifierIdentifierTypeEdanRecordID: { + if(!result.edan) result.edan = preferredIdentifierDetails; + } break; + + case COMMON.eVocabularyID.eIdentifierIdentifierTypeARK: { + if(!result.ark) result.ark = preferredIdentifierDetails; + } break; + } + } + } + + // grab our list of identifiers from the SystemObject id + const identifiers: DBAPI.Identifier[] | null = await DBAPI.Identifier.fetchFromSystemObject(systemObject.idSystemObject); + if(!identifiers){ + LOG.error(`could not get identifiers from subject (id: ${subject.idSubject} | subject: ${subject.Name})`, LOG.LS.eSYS); + return null; + } + if(identifiers.length<=0){ + LOG.info(`(WARNING) no identifiers assigned to subject (id: ${subject.idSubject} | subject: ${subject.Name})`, LOG.LS.eSYS); + return result; + } + + // cycle through all identifiers, find EDAN/ARK, and push to list + for(const identifier of identifiers) { + + // get our details for this identifier, skip if error, store if valid + const details: IdentifierDetails | null = await this.getIdentifierType(identifier); + if(!details) { + LOG.error(`could not get identifier details from subject (identifier: ${identifier.IdentifierValue} | id: ${subject.idSubject} | subject: ${subject.Name})`, LOG.LS.eSYS); + continue; + } + + // make sure it doesn't already exist before pushing it + // TODO: revisit test in case of 'undefines' + let idExists: boolean = false; + for(const id of result.details){ + if(details.identifierTypeEnum===id.identifierTypeEnum && + details.identifier?.IdentifierValue==id.identifier?.IdentifierValue) { + idExists = true; + break; + } + } + if(idExists) continue; + + // if not found push it and categorize it + result.details?.push(details); + + // check the enumeration type to see if it's an edan or ark type + switch(details.identifierTypeEnum){ + case COMMON.eVocabularyID.eIdentifierIdentifierTypeEdanRecordID: { + if(!result.edan) result.edan = details; + } break; + + case COMMON.eVocabularyID.eIdentifierIdentifierTypeARK: { + if(!result.ark) result.ark = details; + } break; + } + } + + return result; + } + + protected async getSubjectUnit(subject: DBAPI.Subject): Promise { + + const packratUnit = await DBAPI.Unit.fetch(subject.idUnit); + if(!packratUnit) { + LOG.error(`Packrat DB did not return a unit for subject. (id: ${subject.idSubject} | subject: ${subject.Name})`, LOG.LS.eSYS); + return null; + } + + // Todo: any additional verification or handling? + + return packratUnit; + } + + protected async getUnitFromEdanUnit(edanUnit: string): Promise { + + // TODO: relocate logic to central/shared location to benefit ingestion + + // see if Packrat's UnitEdan table has a direct match for this unit. + const edanUnits: DBAPI.UnitEdan | null = await DBAPI.UnitEdan.fetchFromAbbreviation(edanUnit); + if(edanUnits && edanUnits.idUnit) { // && edanUnits.length==1 && edanUnits[0].idUnit) { + const result = DBAPI.Unit.fetch(edanUnits.idUnit); + if(result) return result; + } + + LOG.error(`did not find EDAN unit in the UnitEdan DB. investigate adding it... (${edanUnit}) `, LOG.LS.eSYS); + return null; + } + + protected async getEdanRecordIdentifiers(record: COL.CollectionQueryResultRecord): Promise { + + // structure to hold our results + const result: IdentifierList = { preferred: null, edan: null, ark: null, details: [] }; + + // see if we have an EDAN id stored + if(record.identifierCollection) { + // get the identifier if it exists + // HACK: prefixing identifier with 'edanmdm' to match Packrat's records + const edanIdentifiers: DBAPI.Identifier[] | null = await DBAPI.Identifier.fetchFromIdentifierValue('edanmdm:'+record.identifierCollection); + if(edanIdentifiers) { + + // cycle through and get our type and details + for(const identifier of edanIdentifiers) { + const details: IdentifierDetails | null = await this.getIdentifierType(identifier); + if(!details) { + LOG.error(`could not get details for EDAN identifier (type: ${identifier.idVIdentifierType} | value:${identifier.IdentifierValue})`, LOG.LS.eSYS); + continue; + } + + // if we have an identifier that is of the same type then store + if(details.identifierTypeEnum===COMMON.eVocabularyID.eIdentifierIdentifierTypeEdanRecordID) { + result.edan = details; + result.details?.push(details); + break; + } + } + } else { + // TODO: create new identifier, type, and of EDAN Record ID type + } + } + + // see if we have an ARK id stored + if(record.identifierPublic) { + // get the identifier if it exists + const arkIdentifiers: DBAPI.Identifier[] | null = await DBAPI.Identifier.fetchFromIdentifierValue(record.identifierPublic); + if(arkIdentifiers) { + // cycle through and get our type and details + for(const identifier of arkIdentifiers) { + const details: IdentifierDetails | null = await this.getIdentifierType(identifier); + if(!details) { + LOG.error(`could not get details for ARK identifier (type: ${identifier.idVIdentifierType} | value:${identifier.IdentifierValue})`, LOG.LS.eSYS); + continue; + } + + // if we have an identifier that is of the same type then store + // todo: verify it's an actual ARK id + if(details.identifierTypeEnum===COMMON.eVocabularyID.eIdentifierIdentifierTypeARK) { + result.ark = details; + result.details?.push(details); + break; + } + } + } else { + // todo: create new identifier, type, and of ARK type + } + } + + // handle identifiers by checking if any returned by EDAN + if(record.identifierMap) { + for (const [ label, content ] of record.identifierMap) { + + // get our type for this identifier + const identifierType: DBAPI.Vocabulary | undefined = await CACHE.VocabularyCache.mapIdentifierType(label); + if (!identifierType) { + LOG.error(`\tencountered unknown identifier type ${label} for EDAN record ${record.name}`, LOG.LS.eSYS); + continue; + } + + // pull enumeration from the CACHE (vocabulary id -> enum) + const identifierTypeEnum: COMMON.eVocabularyID | undefined = await CACHE.VocabularyCache.vocabularyIdToEnum(identifierType.idVocabulary); + if(identifierTypeEnum===undefined){ + LOG.error(`\tcould not find enumerator for identifier type (${identifierType.Term}) in Cache`, LOG.LS.eSYS); + continue; + } + + // if identifier exists in our database (value & type) then store it + const identifiers: DBAPI.Identifier[] | null = await DBAPI.Identifier.fetchFromIdentifierValue(content); + if(identifiers) { + for(const identifier of identifiers) { + const details: IdentifierDetails | null = await this.getIdentifierType(identifier); + if(!details) { + LOG.error(`could not get details for EDAN identifier (type: ${identifier.idVIdentifierType} | value:${identifier.IdentifierValue})`, LOG.LS.eSYS); + continue; + } + + // if we have an identifier that is of the same type then store + if(details.identifierTypeEnum===identifierTypeEnum) { + result.details?.push(details); + break; + } + } + } else { + // didn't find the identifier in our database so create one + // TODO: make DBAPI.Identifier object + const details: IdentifierDetails = { identifier: null, identifierType, identifierTypeEnum }; + result.details?.push(details); + } + + // console.log('EDAN: '+label+'|'+content+'|'+JSON.stringify(vIdentifierType)); + } + } + + return result; + } + + protected async replacePackratUnit(_packratUnit: DBAPI.Unit | null, _edanUnit: DBAPI.Unit): Promise { + // TODO: update the Subject record, and point it at the correct idUnit for the Edan Unit + return true; + } + + protected async replacePackratIdentifiers(_packratIdentifiers: IdentifierList, _edanIdentifiers: IdentifierList, _systemObject: DBAPI.SystemObject): Promise { + + // [?] do we remove previous identifiers? + // [?] do we repurpose them by reassign new values keeping ids (still need to add/remove if count mismatch)? + // cycle through edan identifiers creating new entries in the DB for each attached to the same SystemObject + + return true; + } + + protected isIdentifierFromDPO(identifier: DBAPI.Identifier): boolean { + // simply check if the EDAN id starts with the DPO prefix. + // TODO: make more robust with additional checks(?) + return (identifier.IdentifierValue.startsWith('dpo_3d') || identifier.IdentifierValue.startsWith('edanmdm:dpo_3d')); + } + + protected getSystemObjectDetailsURL(systemObject: DBAPI.SystemObject): string { + return '=HYPERLINK("https://packrat-test.si.edu:8443/repository/details/'+systemObject.idSystemObject+'")'; + } + +} diff --git a/server/workflow/impl/Packrat/WorkflowEngine.ts b/server/workflow/impl/Packrat/WorkflowEngine.ts index f1f70cdb5..242017ff9 100644 --- a/server/workflow/impl/Packrat/WorkflowEngine.ts +++ b/server/workflow/impl/Packrat/WorkflowEngine.ts @@ -1,9 +1,12 @@ /* eslint-disable @typescript-eslint/no-explicit-any, @typescript-eslint/explicit-module-boundary-types */ import * as WF from '../../interface'; import * as WFP from '../../../workflow/impl/Packrat'; + import { WorkflowJob } from './WorkflowJob'; import { WorkflowIngestion } from './WorkflowIngestion'; import { WorkflowUpload } from './WorkflowUpload'; +import { WorkflowVerifier } from './WorkflowVerifier'; + import * as COOK from '../../../job/impl/Cook'; import * as LOG from '../../../utils/logger'; import * as CACHE from '../../../cache'; @@ -66,10 +69,14 @@ export class WorkflowEngine implements WF.IWorkflowEngine { if (WFC.workflow) this.workflowMap.set(WFC.workflow.idWorkflow, workflow); - const startResults: H.IOResults = await workflow.start(); - if (!startResults) { - LOG.error(`WorkflowEngine.create failed to start workflow ${COMMON.eVocabularyID[workflowParams.eWorkflowType]}`, LOG.LS.eWF); - return null; + // if we want to automatically start the job + const doStart: boolean = (workflowParams.autoStart === undefined) ? true : workflowParams.autoStart; + if(doStart) { + const startResults: H.IOResults = await workflow.start(); + if (!startResults) { + LOG.error(`WorkflowEngine.create failed to start workflow ${COMMON.eVocabularyID[workflowParams.eWorkflowType]}`, LOG.LS.eWF); + return null; + } } LOG.info(`WorkflowEngine.created workflow [${this.workflowMap.size}]: ${JSON.stringify(workflowParams)}`, LOG.LS.eWF); return workflow; @@ -553,6 +560,7 @@ export class WorkflowEngine implements WF.IWorkflowEngine { case COMMON.eVocabularyID.eWorkflowTypeCookJob: return await WorkflowJob.constructWorkflow(workflowParams, WFC); case COMMON.eVocabularyID.eWorkflowTypeIngestion: return await WorkflowIngestion.constructWorkflow(workflowParams, WFC); case COMMON.eVocabularyID.eWorkflowTypeUpload: return await WorkflowUpload.constructWorkflow(workflowParams, WFC); + case COMMON.eVocabularyID.eWorkflowTypeVerifier: return await WorkflowVerifier.constructWorkflow(workflowParams, WFC); } return null; } diff --git a/server/workflow/impl/Packrat/WorkflowVerifier.ts b/server/workflow/impl/Packrat/WorkflowVerifier.ts new file mode 100644 index 000000000..75a23d2b5 --- /dev/null +++ b/server/workflow/impl/Packrat/WorkflowVerifier.ts @@ -0,0 +1,88 @@ +import * as WF from '../../interface'; +import * as DBAPI from '../../../db'; +import * as H from '../../../utils/helpers'; +import * as COMMON from '@dpo-packrat/common'; +import * as COL from '../../../collections/interface/'; +import * as LOG from '../../../utils/logger'; +import * as V from '../../../utils/verifiers/EdanVerifier'; + + +// This Workflow represents an ingestion action, typically initiated by a user. +// The workflow itself performs no work (ingestion is performed in the graphQl ingestData routine) +// Instead, this workflow provide a means for gathering ingestion report output +export class WorkflowVerifier implements WF.IWorkflow { + private workflowParams: WF.WorkflowParameters; + private workflowData: DBAPI.WorkflowConstellation; + public config: V.EdanVerifierConfig | null = null; // can't pass in due to WorkflowEngine calls not accepting extra params + public result: V.EdanVerifierResult | null = null; + + static async constructWorkflow(workflowParams: WF.WorkflowParameters, WFC: DBAPI.WorkflowConstellation): Promise { + return new WorkflowVerifier(workflowParams, WFC); + } + + constructor(workflowParams: WF.WorkflowParameters, workflowData: DBAPI.WorkflowConstellation) { + this.workflowParams = workflowParams; + this.workflowData = workflowData; + this.workflowParams; this.workflowData; + // this.result = null; + } + + async start(): Promise { + + if(this.config==null) { + LOG.error('WorkflowVerifier cannot start. configuration object not set.', LOG.LS.eWF); + return { success: false }; + } + + // FUTURE: create a new workflow step specific to verifier + // once started should be able to get active ID + // use ID with 'downloads' to get URL for report + // focus on 'downloads' for getting report + const verifierConfig: V.EdanVerifierConfig = { + collection: COL.CollectionFactory.getInstance(), + subjectLimit: this.config.subjectLimit, + detailedLogs: this.config.detailedLogs, + systemObjectId: this.config.systemObjectId, + // writeToFile: '../../EDAN-Verifier_Output.csv' + }; + const verifier: V.EdanVerifier = new V.EdanVerifier(verifierConfig); + this.result = await verifier.verify(); // TODO: how to avoid waiting for this and check at higher level + + // check/create any needed workflow steps and change the state + const workflowStep: DBAPI.WorkflowStep | null = (!this.workflowData.workflowStep || this.workflowData.workflowStep.length <= 0) + ? null : this.workflowData.workflowStep[this.workflowData.workflowStep.length - 1]; + if (workflowStep) { + workflowStep.setState(COMMON.eWorkflowJobRunStatus.eRunning); + await workflowStep.update(); + } + + return { success: true }; + } + + async update(_workflowStep: DBAPI.WorkflowStep, _jobRun: DBAPI.JobRun): Promise { + return { success: true, workflowComplete: true }; + } + + async updateStatus(eStatus: COMMON.eWorkflowJobRunStatus): Promise { + const workflowComplete: boolean = (eStatus === COMMON.eWorkflowJobRunStatus.eDone + || eStatus === COMMON.eWorkflowJobRunStatus.eError + || eStatus === COMMON.eWorkflowJobRunStatus.eCancelled); + + const workflowStep: DBAPI.WorkflowStep | null = (!this.workflowData.workflowStep || this.workflowData.workflowStep.length <= 0) + ? null : this.workflowData.workflowStep[this.workflowData.workflowStep.length - 1]; + + if (!workflowStep) + return { success: false, workflowComplete, error: 'Missing WorkflowStep' }; + workflowStep.setState(eStatus); + const success: boolean = await workflowStep.update(); + return { success, workflowComplete, error: success ? '' : 'Database Error' }; + } + + async waitForCompletion(_timeout: number): Promise { + return { success: true }; + } + + async workflowConstellation(): Promise { + return this.workflowData; + } +} diff --git a/server/workflow/interface/IWorkflowEngine.ts b/server/workflow/interface/IWorkflowEngine.ts index 363b6a118..219177d60 100644 --- a/server/workflow/interface/IWorkflowEngine.ts +++ b/server/workflow/interface/IWorkflowEngine.ts @@ -7,6 +7,7 @@ export interface WorkflowParameters { idSystemObject?: number[] | undefined; // array of system objects as input to this workflow; null for jobs not acting on system objects idProject?: number | undefined; // Project.idProject of project, if any idUserInitiator?: number | undefined; // User.idUser of initiator, if any + autoStart?: boolean | undefined; // should the workflow engine immediately start the workflow parameters?: any | undefined; // Additional workflow parameters; each workflow template should define their own parameter interface }