From 452ed86ce734941affc4ec795f03b769fe087421 Mon Sep 17 00:00:00 2001 From: David de Boer Date: Mon, 5 Aug 2024 11:26:47 +0200 Subject: [PATCH] feat: Improve error messages (#108) --- jest.config.js | 8 +-- src/file.ts | 11 ++-- src/generator.ts | 53 +++++++++------ src/iterator.ts | 51 +++++++++++---- src/main.ts | 7 +- src/pipeline.ts | 9 +-- src/stage.ts | 4 +- src/utils/error.ts | 19 +++--- test/file.test.ts | 20 ++---- test/generator.test.ts | 121 +++++++++++++++++------------------ test/iterator.test.ts | 2 +- test/utils/utilities.test.ts | 55 ++-------------- 12 files changed, 168 insertions(+), 192 deletions(-) diff --git a/jest.config.js b/jest.config.js index 3efee3b..bab33b2 100644 --- a/jest.config.js +++ b/jest.config.js @@ -11,10 +11,10 @@ export default { coverageReporters: ['json-summary', 'text'], coverageThreshold: { global: { - lines: 71.03, - statements: 71.14, - branches: 67, - functions: 76.76, + lines: 71.74, + statements: 71.73, + branches: 67.17, + functions: 80.85, }, }, transform: { diff --git a/src/file.ts b/src/file.ts index 891e710..b479ad3 100644 --- a/src/file.ts +++ b/src/file.ts @@ -14,20 +14,20 @@ import {pipeline as streamPipeline} from 'stream/promises'; import {Progress} from './progress.js'; export default class File { - public readonly $id = 'File'; private $isValid?: boolean; public constructor( private $path: string, private readonly skipExistsCheck: boolean = false - ) {} + ) { + this.validate(); + } - public validate(): File { + private validate(): File { if (this.$isValid !== undefined) return this; if (!isFilePathString(this.$path)) { - const wrongFilePath: string = this.$path as string; throw new Error( - `The filename \`${wrongFilePath}\` should start with \`file://\`` + `The filename \`${this.$path}\` should start with \`file://\`` ); } this.$path = this.$path.replace(/^file:\/\//, ''); @@ -56,7 +56,6 @@ export default class File { } public toString(): string { - this.validate(); return this.$path; } diff --git a/src/generator.ts b/src/generator.ts index e25132f..ee50090 100644 --- a/src/generator.ts +++ b/src/generator.ts @@ -14,7 +14,7 @@ const DEFAULT_BATCH_SIZE = 10; interface Events { end: [iterations: number, statements: number, processed: number]; - error: [e: Error]; + error: [e: GeneratorError]; data: [statement: Quad]; } @@ -79,31 +79,23 @@ export default class Generator extends EventEmitter { } private async runBatch(batch: NamedNode[]): Promise { - const error = (e: unknown): Error => - new Error( - `The Generator did not run successfully, it could not get the results from the endpoint ${ - this.source?.value - }: ${(e as Error).message}` - ); - + const query = this.query.withIris(batch); try { - const stream = await this.engine.queryQuads( - this.query.withIris(batch).toString(), - { - sources: [(this.source ??= getEngineSource(this.endpoint))], - } - ); + const stream = await this.engine.queryQuads(query.toString(), { + sources: [(this.source ??= getEngineSource(this.endpoint))], + }); stream.on('data', (quad: Quad) => { this.statements++; this.emit('data', quad); }); - stream.on('error', e => { - this.emit('error', error(e)); - // reject(e); + stream.on('error', error => { + this.emit( + 'error', + new GeneratorError({endpoint: this.endpoint, error, query}) + ); }); stream.on('end', () => { - // resolve(); this.iterationsProcessed += batch.length; this.emit( 'end', @@ -112,8 +104,15 @@ export default class Generator extends EventEmitter { this.iterationsProcessed ); }); - } catch (e) { - this.emit('error', error(e)); + } catch (error) { + this.emit( + 'error', + new GeneratorError({ + endpoint: this.endpoint, + error: error as Error, + query, + }) + ); } } @@ -182,3 +181,17 @@ export class Query extends BaseQuery { } } } + +export class GeneratorError extends Error { + public readonly context; + constructor(args: {error: Error; endpoint: Endpoint; query: Query}) { + super( + `Generator failed for endpoint ${args.endpoint.toString()} with query:\n${args.query.toString()}`, + {cause: args.error} + ); + this.context = { + endpoint: args.endpoint.toString(), + query: args.query.toString(), + }; + } +} diff --git a/src/iterator.ts b/src/iterator.ts index 79c31f4..17eace0 100644 --- a/src/iterator.ts +++ b/src/iterator.ts @@ -12,7 +12,7 @@ const DEFAULT_LIMIT = 10; interface Events { data: [$this: NamedNode]; end: [numResults: number]; - error: [Error]; + error: [IteratorError]; } export default class Iterator extends EventEmitter { @@ -34,14 +34,6 @@ export default class Iterator extends EventEmitter { setTimeout(async () => { let resultsPerPage = 0; this.query.offset = this.offset; - const error = (e: unknown): Error => - new Error( - `The Iterator did not run successfully, it could not get the results from the endpoint ${ - this.source?.value - } (offset: ${this.offset}, limit ${this.query.limit}): ${ - (e as Error).message - }` - ); try { const stream = await this.engine.queryBindings(this.query.toString(), { sources: [(this.source ??= getEngineSource(this.endpoint))], @@ -65,9 +57,11 @@ export default class Iterator extends EventEmitter { if (this.totalResults === 0) { this.emit( 'error', - error( - new Error(`no results for query:\n ${this.query.toString()}`) - ) + new IteratorError({ + endpoint: this.endpoint, + query: this.query, + error: new Error('No results'), + }) ); } this.offset += this.query.limit; @@ -79,10 +73,24 @@ export default class Iterator extends EventEmitter { }); stream.on('error', e => { - this.emit('error', error(e)); + this.emit( + 'error', + new IteratorError({ + error: e, + endpoint: this.endpoint, + query: this.query, + }) + ); }); } catch (e) { - this.emit('error', error(e)); + this.emit( + 'error', + new IteratorError({ + error: e as Error, + endpoint: this.endpoint, + query: this.query, + }) + ); } }, this.delay); } @@ -121,3 +129,18 @@ export class Query extends BaseQuery { } } } + +export class IteratorError extends Error { + public readonly context; + + constructor(args: {error: Error; endpoint: Endpoint; query: Query}) { + super( + `Iterator failed: ${args.error.message} for endpoint ${args.endpoint.toString()} with query:\n${args.query.toString()}`, + {cause: args.error} + ); + this.context = { + endpoint: args.endpoint.toString(), + query: args.query.toString(), + }; + } +} diff --git a/src/main.ts b/src/main.ts index 14d1c43..b057b7c 100644 --- a/src/main.ts +++ b/src/main.ts @@ -7,6 +7,7 @@ import type {Configuration} from './configuration.js'; import loadPipelines from './utils/loadPipelines.js'; import Pipeline from './pipeline.js'; import {cli} from './cli.js'; +import Process from 'node:process'; console.info( chalk.bold(`Welcome to LD Workbench version ${chalk.cyan(version())}`) @@ -21,8 +22,7 @@ console.info( configuration = pipelines.get(cli.pipeline); if (configuration === undefined) { error( - `No pipeline named “${cli.pipeline}” was found.`, - `Valid pipeline names are: ${names.map(name => `"${name}"`).join(', ')}` + `No pipeline named “${cli.pipeline}” was found. Valid pipeline names are: ${names.map(name => `"${name}"`).join(', ')}` ); } } else if (names.length === 1) { @@ -61,6 +61,7 @@ console.info( }); await pipeline.run(); } catch (e) { - error(`Error in pipeline ${chalk.italic(configuration.name)}`, e as Error); + error(e as Error); + Process.exit(1); } })(); diff --git a/src/pipeline.ts b/src/pipeline.ts index 0b5958c..b60daef 100644 --- a/src/pipeline.ts +++ b/src/pipeline.ts @@ -62,7 +62,7 @@ class Pipeline { } this.destination = isTriplyDBPathString(destinationFile) ? new TriplyDB(destinationFile).validate() - : new File(destinationFile, true).validate(); + : new File(destinationFile, true); this.stores = (configuration.stores ?? []).map( storeConfig => @@ -125,9 +125,10 @@ class Pipeline { ); } catch (e) { throw new Error( - `Error in the iterator of stage \`${stageConfiguration.name}\`: ${ + `Error in the configuration of stage “${stageConfiguration.name}”: ${ (e as Error).message - }` + }`, + {cause: e} ); } } @@ -207,7 +208,7 @@ class Pipeline { this.increaseProgress(progress, stage, iterationsProcessed, count); }); stage.on('error', e => { - progress.fail('Error: ' + (e as Error).message); + progress.fail(`Stage “${this.name}” failed`); reject(e); }); stage.on('end', (iris, statements) => { diff --git a/src/stage.ts b/src/stage.ts index 13970af..adda732 100644 --- a/src/stage.ts +++ b/src/stage.ts @@ -75,7 +75,8 @@ export default class Stage extends EventEmitter { ); } } - this.destination = () => new File(this.destinationPath).getStream(); + this.destination = () => + new File(`file://${this.destinationPath}`, true).getStream(); } public get destinationPath(): string { @@ -157,7 +158,6 @@ export default class Stage extends EventEmitter { this.emit('error', e); }); - // Start the iterator await this.iterator.run(); } diff --git a/src/utils/error.ts b/src/utils/error.ts index 9378bed..863de15 100644 --- a/src/utils/error.ts +++ b/src/utils/error.ts @@ -1,11 +1,10 @@ -import chalk from 'chalk'; - -export const error = ( - message: string | Error, - ...info: Array -): void => { - console.error( - chalk.red(message instanceof Error ? message.message : message) - ); - info.forEach(i => console.info(i)); +export const error = (message: string | Error): void => { + if (message instanceof Error) { + console.error(`\n${message.message}`); + if (message.cause) { + console.error('\nCaused by:\n', message.cause); + } + } else { + console.error(`\n${message}`); + } }; diff --git a/test/file.test.ts b/test/file.test.ts index 8e94033..1e87ed6 100644 --- a/test/file.test.ts +++ b/test/file.test.ts @@ -16,7 +16,6 @@ describe('File Class', () => { expect(file).to.be.an.instanceOf(File); expect(file).to.have.property('$path'); expect(file).to.have.property('skipExistsCheck'); - expect(file).to.have.property('$id'); }); }); describe('validate', () => { @@ -24,31 +23,26 @@ describe('File Class', () => { const path = './static/example/config.yml'; const validFilePath = `file://${path}`; const file = new File(validFilePath); - expect(file.validate()); expect(file.path).to.equal(path); }); it('should throw an error for an invalid file path', () => { const filePath = 'invalid/file/path.txt'; - const file = new File(filePath); - expect(file.validate.bind(file)).to.throw( + expect(() => new File(filePath)).to.throw( 'The filename `invalid/file/path.txt` should start with `file://`' ); }); it('should throw an error if file does not exist', () => { const filePath = 'file://nonexistent/file.txt'; - const file = new File(filePath); - expect(file.validate.bind(file)).to.throw( + expect(() => new File(filePath)).to.throw( 'File not found: `nonexistent/file.txt`' ); }); it('should skip exists check when skipExistsCheck is true', () => { const filePath = 'file://nonexistent/file.txt'; - const file = new File(filePath, true); - expect(() => file.validate()).to.not.throw(); - expect(file.path).to.equal('nonexistent/file.txt'); + expect(new File(filePath, true).path).to.equal('nonexistent/file.txt'); }); }); @@ -73,15 +67,15 @@ describe('File Class', () => { } }); it('should create a write stream for a new file', () => { - const filePath = 'new/file.txt'; - const file = new File(filePath); + const filePath = 'file://new/file.txt'; + const file = new File(filePath, true); writeStream = file.getStream(); expect(writeStream).to.be.an.instanceOf(fs.WriteStream); writeStream.end(); }); it('should append to an existing file when append is true', () => { - const filePath = 'file.txt'; + const filePath = 'file://file.txt'; const file = new File(filePath); writeStream = file.getStream(true); expect(writeStream).to.be.an.instanceOf(fs.WriteStream); @@ -89,7 +83,7 @@ describe('File Class', () => { it('should create parent directories if they do not exist', async () => { const filePath = 'file://new/directory/nested/file.txt'; - const file = new File(filePath, true).validate(); + const file = new File(filePath, true); writeStream = file.getStream(); expect(writeStream).to.be.an.instanceOf(fs.WriteStream); expect( diff --git a/test/generator.test.ts b/test/generator.test.ts index 8770039..3474c64 100644 --- a/test/generator.test.ts +++ b/test/generator.test.ts @@ -19,45 +19,47 @@ describe('Generator Class', () => { const _filename = fileURLToPath(import.meta.url); const _dirname = path.dirname(_filename); const dataDirectoryPath = path.join(_dirname, 'pipelines', 'data'); + let configuration: Configuration; beforeAll(async () => { await removeDirectory(dataDirectoryPath); }); - describe('constructor', () => { - it('should set query, engine, endpoint, and source properties correctly', () => { - const configuration: Configuration = { - name: 'Example Pipeline', - description: - 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', - destination: 'file://pipelines/data/example-pipeline.nt', - stages: [ - { - name: 'Stage 1', - iterator: { - query: 'file://static/example/iterator-stage-1.rq', - endpoint: 'file://static/tests/iris.nt', - }, - generator: [ - { - query: 'file://static/example/generator-stage-1-1.rq', - }, - ], + beforeEach(() => { + configuration = { + name: 'Example Pipeline', + description: + 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + stages: [ + { + name: 'Stage 1', + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + endpoint: 'file://static/tests/iris.nt', }, - { - name: 'Stage 2', - iterator: { - query: 'file://static/example/iterator-stage-2.rq', - }, - generator: [ - { - query: 'file://static/example/generator-stage-2.rq', - endpoint: 'file://static/tests/wikidata.nt', - }, - ], + generator: [ + {query: 'file://static/example/generator-stage-1-1.rq'}, + {query: 'file://static/example/generator-stage-1-2.rq'}, + ], + }, + { + name: 'Stage 2', + iterator: { + query: 'file://static/example/iterator-stage-2.rq', }, - ], - }; + generator: [ + { + query: 'file://static/example/generator-stage-2.rq', + endpoint: 'file://static/tests/wikidata.nt', + }, + ], + }, + ], + }; + }); + + describe('constructor', () => { + it('should set query, engine, endpoint, and source properties correctly', () => { const pipeline = new Pipeline(configuration, {silent: true}); const stageConfig = configuration.stages[0]; const stage = new Stage(pipeline, stageConfig); @@ -73,39 +75,11 @@ describe('Generator Class', () => { it('Should work with multiple generators in parallel for one pipeline', async () => { const filePath = 'pipelines/data/example-pipelineParallel.nt'; - const config: Configuration = { - name: 'Example Pipeline', - description: - 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', - destination: 'file://' + filePath, - stages: [ - { - name: 'Stage 1', - iterator: { - query: 'file://static/example/iterator-stage-1.rq', - endpoint: 'file://static/tests/iris.nt', - }, - generator: [ - {query: 'file://static/example/generator-stage-1-1.rq'}, - {query: 'file://static/example/generator-stage-1-2.rq'}, - ], - }, - { - name: 'Stage 2', - iterator: { - query: 'file://static/example/iterator-stage-2.rq', - }, - generator: [ - { - query: 'file://static/example/generator-stage-2.rq', - endpoint: 'file://static/tests/wikidata.nt', - }, - ], - }, - ], - }; // read file after pipeline has finished - const pipelineParallelGenerators = new Pipeline(config, {silent: true}); + configuration.destination = 'file://' + filePath; + const pipelineParallelGenerators = new Pipeline(configuration, { + silent: true, + }); await pipelineParallelGenerators.run(); const file = fs.readFileSync(filePath, {encoding: 'utf-8'}); const fileLines = file.split('\n').sort(); @@ -164,6 +138,13 @@ describe('Generator Class', () => { ' "Instance 150 of the Iris Virginica"@en .' ); }); + it('throws an error when the generator endpoint returns an error', async () => { + configuration.stages[0].generator[0].endpoint = 'https://nope/'; + const pipeline = new Pipeline(configuration, {silent: true}); + await expect(async () => await pipeline.run()).rejects.toThrow( + 'Generator failed for endpoint https://nope/ with query:' + ); + }); it.skip('should emit "data" and "end" events with the correct number of statements', async () => { const configuration: Configuration = { name: 'Example Pipeline', @@ -354,4 +335,16 @@ describe('Query', () => { ); }); }); + it('rejects as ?this', () => { + expect(() => + Query.from( + getSPARQLQuery( + 'CONSTRUCT { $this ?p ?o } WHERE { BIND(42 AS $this) $this ?p ?o . }', + 'construct' + ) + ) + ).toThrow( + 'must not use the syntax form `AS ?this` because it is a pre-bound variable' + ); + }); }); diff --git a/test/iterator.test.ts b/test/iterator.test.ts index 4eb6604..7d4e363 100644 --- a/test/iterator.test.ts +++ b/test/iterator.test.ts @@ -138,7 +138,7 @@ describe('Iterator Class', () => { new File('file://static/tests/iris-small.nt') ); await expect(async () => await runIterator(iterator)).rejects.toThrow( - 'no results for query:\n SELECT ?this WHERE { ?this . }' + 'No results' ); }); }); diff --git a/test/utils/utilities.test.ts b/test/utils/utilities.test.ts index 6adf296..b78dd5b 100644 --- a/test/utils/utilities.test.ts +++ b/test/utils/utilities.test.ts @@ -307,7 +307,6 @@ describe('Utilities', () => { }); describe('getEndpoint', () => { it('should return File when filePath is provided in Stage', () => { - const filePath = 'file://path/to/file.txt'; const config: Configuration = { name: 'Example Pipeline', description: @@ -318,7 +317,7 @@ describe('Utilities', () => { name: 'Stage 1', iterator: { query: 'file://static/example/iterator-stage-1.rq', - endpoint: filePath, + endpoint: 'file://static/tests/iris.nt', }, generator: [ { @@ -334,7 +333,7 @@ describe('Utilities', () => { const retrievedEndpoint = getEndpoint(stage); expect( isFile(retrievedEndpoint) && - retrievedEndpoint.path === 'file://path/to/file.txt' + retrievedEndpoint.path === 'static/tests/iris.nt' ).to.equal(true); }); it('should return URL when URL is provided in Stage', () => { @@ -391,7 +390,7 @@ describe('Utilities', () => { ], }; expect(() => new Pipeline(config, {silent: true})).to.throw( - 'Error in the iterator of stage `Stage 1`: "invalidExample" is not a valid URL' + 'Error in the configuration of stage “Stage 1”: "invalidExample" is not a valid URL' ); }); it('should throw if stage has undefined endpoint and is first stage', () => { @@ -467,7 +466,7 @@ describe('Utilities', () => { expect(result instanceof QueryEngineSparql).to.equal(true); }); it('should return QueryEngineFile when input is File', () => { - const file = new File('file://exampleFile.txt'); + const file = new File('file://exampleFile.txt', true); const result = getEngine(file); expect(result instanceof QueryEngineFile).to.equal(true); }); @@ -524,52 +523,6 @@ describe('Utilities', () => { value: url.toString(), }); }); - it('should return engine source string when input is PreviousStage with destinationPath', async () => { - const config: Configuration = { - name: 'Example Pipeline', - description: - 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', - destination: 'file://pipelines/data/example-pipeline.nt', - stages: [ - { - name: 'Stage 1', - iterator: { - query: 'file://static/example/iterator-stage-1.rq', - endpoint: 'file://static/tests/iris.nt', - }, - generator: [ - { - query: 'file://static/example/generator-stage-1-1.rq', - }, - ], - }, - { - name: 'Stage 2', - iterator: { - query: 'file://static/example/iterator-stage-2.rq', - }, - generator: [ - { - query: 'file://static/example/generator-stage-2.rq', - endpoint: 'file://static/tests/wikidata.nt', - }, - ], - }, - ], - }; - const pipeline = new Pipeline(config, {silent: true}); - const stage2: Stage = new Stage(pipeline, config.stages[1]); - const stagesSoFar = Array.from(stage2.pipeline.stages.keys()); - const previousStage = new PreviousStage(stage2, stagesSoFar.pop()!); - const engineSource = getEngineSource(previousStage); - expect( - engineSource.value === - path.join( - process.cwd(), - '/pipelines/data/example-pipeline/stage-1.nt' - ) - ).to.equal(true); - }); describe('should throw', () => { beforeEach(() => { const configuration = loadConfiguration('./static/example/config.yml');