diff --git a/.github/workflows/github-ci.yml b/.github/workflows/github-ci.yml index 35cbf10..a968437 100644 --- a/.github/workflows/github-ci.yml +++ b/.github/workflows/github-ci.yml @@ -24,7 +24,8 @@ jobs: strategy: matrix: - node-version: [18.x] + # Using Node LTS https://nodejs.org/en + node-version: [20.10.0] steps: - run: echo "Job was automatically triggered by a ${{ github.event_name }} event on ${{ github.ref }} - ${{ github.repository }}." diff --git a/file:/file.txt b/file:/file.txt new file mode 100644 index 0000000..e69de29 diff --git a/file:/new/file.txt b/file:/new/file.txt new file mode 100644 index 0000000..e69de29 diff --git a/package-lock.json b/package-lock.json index 9fda36e..dac828f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,11 +1,11 @@ { - "name": "ld-workbench", + "name": "ldworkbench", "version": "0.0.1", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "ld-workbench", + "name": "ldworkbench", "version": "0.0.1", "license": "EUPL-1.2", "dependencies": { @@ -23,10 +23,11 @@ "sparqljs": "^3.7.1" }, "bin": { - "ld-workbench": "dist/main.js" + "ldworkbench": "dist/main.js" }, "devDependencies": { "@types/chai": "^4.3.11", + "@types/chai-as-promised": "^7.1.8", "@types/inquirer": "^9.0.7", "@types/js-yaml": "^4.0.9", "@types/lodash.kebabcase": "^4.1.9", @@ -36,6 +37,7 @@ "@types/sparqljs": "^3.1.8", "@typescript-eslint/eslint-plugin": "^6.12.0", "chai": "^4.3.10", + "chai-as-promised": "^7.1.1", "commander": "^11.1.0", "eslint": "^8.54.0", "eslint-config-prettier": "^9.0.0", @@ -3078,6 +3080,15 @@ "integrity": "sha512-qQR1dr2rGIHYlJulmr8Ioq3De0Le9E4MJ5AiaeAETJJpndT1uUNHsGFK3L/UIu+rbkQSdj8J/w2bCsBZc/Y5fQ==", "dev": true }, + "node_modules/@types/chai-as-promised": { + "version": "7.1.8", + "resolved": "https://registry.npmjs.org/@types/chai-as-promised/-/chai-as-promised-7.1.8.tgz", + "integrity": "sha512-ThlRVIJhr69FLlh6IctTXFkmhtP3NpMZ2QGq69StYLyKZFp/HOp1VdKZj7RvfNWYYcJ1xlbLGLLWj1UvP5u/Gw==", + "dev": true, + "dependencies": { + "@types/chai": "*" + } + }, "node_modules/@types/glob": { "version": "7.2.0", "resolved": "https://registry.npmjs.org/@types/glob/-/glob-7.2.0.tgz", @@ -3918,6 +3929,18 @@ "node": ">=4" } }, + "node_modules/chai-as-promised": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/chai-as-promised/-/chai-as-promised-7.1.1.tgz", + "integrity": "sha512-azL6xMoi+uxu6z4rhWQ1jbdUhOMhis2PvscD/xjLqNMkv3BPPp2JyyuTHOrf9BOosGpNQ11v6BKv/g57RXbiaA==", + "dev": true, + "dependencies": { + "check-error": "^1.0.2" + }, + "peerDependencies": { + "chai": ">= 2.1.2 < 5" + } + }, "node_modules/chalk": { "version": "5.3.0", "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.3.0.tgz", diff --git a/package.json b/package.json index 9b7289b..f8d3a6d 100644 --- a/package.json +++ b/package.json @@ -56,6 +56,7 @@ }, "devDependencies": { "@types/chai": "^4.3.11", + "@types/chai-as-promised": "^7.1.8", "@types/inquirer": "^9.0.7", "@types/js-yaml": "^4.0.9", "@types/lodash.kebabcase": "^4.1.9", @@ -65,6 +66,7 @@ "@types/sparqljs": "^3.1.8", "@typescript-eslint/eslint-plugin": "^6.12.0", "chai": "^4.3.10", + "chai-as-promised": "^7.1.1", "commander": "^11.1.0", "eslint": "^8.54.0", "eslint-config-prettier": "^9.0.0", diff --git a/src/lib/File.class.ts b/src/lib/File.class.ts index 9b69dd2..28e3e08 100644 --- a/src/lib/File.class.ts +++ b/src/lib/File.class.ts @@ -3,7 +3,7 @@ import { isFile, isFilePathString } from '../utils/guards.js' import { dirname } from 'path' export default class File { - public static $id = 'File' + public readonly $id = 'File' private $isValid?: boolean public constructor(private $path: string, private readonly skipExistsCheck: boolean = false) {} diff --git a/src/lib/Stage.class.ts b/src/lib/Stage.class.ts index 3d7d2f4..6fd6e35 100644 --- a/src/lib/Stage.class.ts +++ b/src/lib/Stage.class.ts @@ -60,6 +60,8 @@ class Stage extends EventEmitter { this.generator.on('data', quad => { writer.addQuad(quad) quadCount ++ + // slows down run test in Stage.class.test.ts + // this.emit('generatorResult', quadCount) }) this.generator.on('end', _ => { generatorCount++ diff --git a/src/lib/tests/File.class.test.ts b/src/lib/tests/File.class.test.ts new file mode 100644 index 0000000..2aa6e88 --- /dev/null +++ b/src/lib/tests/File.class.test.ts @@ -0,0 +1,82 @@ +import * as fs from 'fs'; +import * as path from 'path'; +import File from "../File.class.js" +import * as chai from 'chai' +import chaiAsPromised from 'chai-as-promised' +chai.use(chaiAsPromised) +const expect = chai.expect + +describe('File Class', () => { + describe('constructor', () => { + it('should set properties correctly', () => { + const file = new File(`file://${path.join('./static/example/config.yml')}`) + expect(file).to.be.an.instanceOf(File); + expect(file).to.have.property('$path'); + expect(file).to.have.property('skipExistsCheck'); + expect(file).to.have.property('$id'); + }); + }); + describe('validate', () => { + it('should validate a valid file path', () => { + const path = './static/example/config.yml'; + const validFilePath = `file://${path}`; + const file = new File(validFilePath) + expect(file.validate()); + expect(file.path).to.equal(path); + }); + + it('should throw an error for an invalid file path', () => { + const filePath = 'invalid/file/path.txt'; + const file = new File(filePath); + expect((file.validate.bind(file))).to.throw('The filename `invalid/file/path.txt` should start with `file://`'); + }); + + it('should throw an error if file does not exist', () => { + const filePath = 'file://nonexistent/file.txt'; + const file = new File(filePath); + expect(file.validate.bind(file)).to.throw('File not found: `nonexistent/file.txt`'); + }); + + it('should skip exists check when skipExistsCheck is true', () => { + const filePath = 'file://nonexistent/file.txt'; + const file = new File(filePath, true); + expect(() => file.validate()).to.not.throw(); + expect(file.path).to.equal('nonexistent/file.txt'); + }); + }); + + describe('getStream', () => { + beforeEach(() => { + const filePath = 'file.txt'; + fs.writeFileSync(filePath, 'Initial content'); + }); + + afterEach(() => { + const filePath = 'file.txt'; + fs.unlinkSync(filePath); + if (fs.existsSync('./new')) { + fs.rmSync('./new', { recursive: true, force: true }); + } + }); + it('should create a write stream for a new file', () => { + const filePath = 'file://new/file.txt'; + const file = new File(filePath); + const writeStream = file.getStream(); + expect(writeStream).to.be.an.instanceOf(fs.WriteStream); + }); + + it('should append to an existing file when append is true', () => { + const filePath = 'file://file.txt'; + const file = new File(filePath); + const writeStream = file.getStream(true); + expect(writeStream).to.be.an.instanceOf(fs.WriteStream); + }); + it('should create parent directories if they do not exist', () => { + const filePath = 'file://new/directory/nested/file.txt'; + const file = new File(filePath, true).validate(); + const writeStream = file.getStream(); + expect(writeStream).to.be.an.instanceOf(fs.WriteStream); + expect(fs.existsSync(path.dirname(filePath.replace("file://", "./")))).to.equal(true); + }); + }); +}); diff --git a/src/lib/tests/Generator.class.test.ts b/src/lib/tests/Generator.class.test.ts new file mode 100644 index 0000000..8aa17a4 --- /dev/null +++ b/src/lib/tests/Generator.class.test.ts @@ -0,0 +1,66 @@ +import Generator from "../Generator.class.js"; +import { EventEmitter } from 'events'; +import Stage from "../Stage.class.js"; +import parseYamlFile from "../../utils/parseYamlFile.js"; +import Pipeline from "../Pipeline.class.js"; +import * as chai from 'chai' +import chaiAsPromised from 'chai-as-promised' +import { NamedNode } from "n3"; +chai.use(chaiAsPromised) +const expect = chai.expect + +describe('Generator Class', () => { + describe('constructor', () => { + it('should set query, engine, endpoint, and source properties correctly', () => { + const configuration = parseYamlFile('./static/example/config.yml') + const pipeline = new Pipeline(configuration) + const stageConfig = configuration.stages[0] + const stage = new Stage(pipeline, stageConfig) + const generator = new Generator(stage) + expect(generator).to.be.an.instanceOf(Generator); + expect(generator).to.be.an.instanceOf(EventEmitter); + expect(generator).to.have.property('query'); + expect(generator).to.have.property('engine'); + expect(generator).to.have.property('endpoint'); + expect(generator).to.have.property('source'); + }); + }); + + describe.skip('run', () => { + it('should emit "data" and "end" events with the correct number of statements', async () => { + const configuration = parseYamlFile('./static/example/config.yml') + const pipeline = new Pipeline(configuration) + const stageConfig = configuration.stages[0] + const stage = new Stage(pipeline, stageConfig) + const generator = new Generator(stage); + const emittedEvents: any[] = []; + + const testNamedNode = new NamedNode('https://triplydb.com/triply/iris/id/floweringPlant/00106'); + + async function runGeneratorWithPromise(): Promise { + return new Promise((resolve, reject) => { + generator.addListener('data', (quad) => { + emittedEvents.push({ event: 'data', quad }); + }); + generator.addListener('end', (numResults) => { + emittedEvents.push({ event: 'end', numResults }); + resolve(true); + }); + generator.addListener('error', (error) => { + reject(error); + }); + generator.run(testNamedNode); + }); + } + + await runGeneratorWithPromise() + expect(emittedEvents).to.have.lengthOf(4); + expect(emittedEvents[0].event).to.equal('data'); + expect(emittedEvents[0].quad._subject.id).to.equal('https://triplydb.com/triply/iris/id/floweringPlant/00106') + expect(emittedEvents[0].quad._predicate.id).to.equal('http://www.w3.org/1999/02/22-rdf-syntax-ns#type') + expect(emittedEvents[0].quad._object.id).to.equal('https://schema.org/Thing') + expect(emittedEvents[emittedEvents.length - 1].event).to.equal('end'); + expect(emittedEvents[emittedEvents.length - 1].numResults).to.equal(3); + }); + }); +}); diff --git a/src/lib/tests/Iterator.class.test.ts b/src/lib/tests/Iterator.class.test.ts new file mode 100644 index 0000000..b53dc4c --- /dev/null +++ b/src/lib/tests/Iterator.class.test.ts @@ -0,0 +1,63 @@ +import Iterator from "../Iterator.class.js"; +import { EventEmitter } from 'events'; +import Stage from "../Stage.class.js"; +import Pipeline from "../Pipeline.class.js"; +import parseYamlFile from "../../utils/parseYamlFile.js"; +import * as chai from 'chai' +import chaiAsPromised from 'chai-as-promised' +chai.use(chaiAsPromised) +const expect = chai.expect + +describe('Iterator Class', () => { + describe('constructor', () => { + it('should set query, endpoint, engine, $offset, and totalResults properties correctly', () => { + const configuration = parseYamlFile('./static/example/config.yml') + const pipeline = new Pipeline(configuration) + const stageConfig = configuration.stages[0] + const stage = new Stage(pipeline, stageConfig) + const iterator = new Iterator(stage); + expect(iterator).to.be.an.instanceOf(Iterator); + expect(iterator).to.be.an.instanceOf(EventEmitter); + expect(iterator).to.have.property('query'); + expect(iterator).to.have.property('endpoint'); + expect(iterator).to.have.property('engine'); + expect(iterator).to.have.property('source'); + expect(iterator).to.have.property('$offset', 0); + expect(iterator).to.have.property('totalResults', 0); + }); + }); + + describe.skip('run', () => { + it('should emit "data" and "end" events with the correct $this and numResults', async () => { + const configuration = parseYamlFile('./static/example/config.yml') + const pipeline = new Pipeline(configuration) + const stageConfig = configuration.stages[0] + const stage = new Stage(pipeline, stageConfig) + const iterator = new Iterator(stage); + const emittedEvents: any = [] + async function runIteratorWithPromise(): Promise { + return new Promise((resolve, reject) => { + iterator.addListener('data', (bindings) => { + emittedEvents.push({ event: 'data', bindings }); + }); + iterator.addListener('end', () => { + emittedEvents.push({ event: 'end' }); + resolve(true); + }); + iterator.addListener('error', (error) => { + reject(error); + }); + iterator.run(); + }); + } + + await runIteratorWithPromise() + expect(emittedEvents).to.have.lengthOf(154); + expect(emittedEvents[0].event).to.equal('data'); + expect(emittedEvents[0].bindings.termType).to.equal('NamedNode') + expect(emittedEvents[0].bindings.value).to.equal('http://dbpedia.org/resource/Iris_virginica') + expect(emittedEvents[emittedEvents.length - 1].event).to.equal('end'); + + }); + }); +}); diff --git a/src/lib/tests/Pipeline.class.test.ts b/src/lib/tests/Pipeline.class.test.ts new file mode 100644 index 0000000..064ad6c --- /dev/null +++ b/src/lib/tests/Pipeline.class.test.ts @@ -0,0 +1,236 @@ +import parseYamlFile from "../../utils/parseYamlFile.js"; +import File from "../File.class.js"; +import type { LDWorkbenchConfiguration } from "../LDWorkbenchConfiguration.js"; +import Pipeline from "../Pipeline.class.js"; +import * as chai from 'chai' +import chaiAsPromised from 'chai-as-promised' +import Stage from "../Stage.class.js"; +chai.use(chaiAsPromised) +const expect = chai.expect + +describe('Pipeline Class', () => { + describe('constructor', () => { + it('should set properties correctly', () => { + const configuration = parseYamlFile('./static/example/config.yml'); + const pipeline = new Pipeline(configuration); + expect(pipeline).to.be.an.instanceOf(Pipeline); + expect(pipeline).to.have.property('stages').that.is.a('Map'); + expect(pipeline).to.have.property('dataDir').that.is.a('string'); + expect(pipeline).to.have.property('$isValidated', false); + expect(pipeline).to.have.property('stageNames').that.is.an('array'); + expect(pipeline).to.have.property('now').that.is.an.instanceOf(Date); + expect(pipeline).to.have.property('destination').that.is.an.instanceOf(File); + }); + }); + + describe('getPreviousStage', () => { + it('should return the previous stage correctly', () => { + const configuration = parseYamlFile('./static/example/config.yml'); + const pipeline = new Pipeline(configuration); + pipeline.validate() + + const stage1 = pipeline.stages.get('Stage 1')!; + const stage2 = pipeline.stages.get('Stage 2')!; + + expect(pipeline.getPreviousStage(stage1)).to.equal(undefined) + expect(pipeline.getPreviousStage(stage2)).to.equal(stage1); + }); + // BUG error can never be reached - Error: Detected a duplicate name for stage `undefined` in your pipeline: each stage must have a unique name. + // will be thrown instead + it.skip('should throw error if stage name is missing', () => { + const configuration = { + name: 'Example Pipeline', + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://pipelines/data/example-pipeline.nt', + stages: [ + { + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + endpoint: 'https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql' + }, + generator: { + query: 'file://static/example/generator-stage-1.rq' + } + }, + { + iterator: { + query: 'file://static/example/iterator-stage-2.rq', + endpoint: 'https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql' + }, + generator: { + query: 'file://static/example/generator-stage-2.rq', + endpoint: 'https://query.wikidata.org/sparql' + } + } + ] + } as unknown as LDWorkbenchConfiguration + const pipeline = new Pipeline(configuration); + const stage2: Stage = new Stage(pipeline, configuration.stages[1]) + pipeline.getPreviousStage(stage2) + + }); + }); + + describe('validate', () => { + it('should throw an error if the pipeline\'s configuration has no stages', () => { + const invalidConfiguration = { + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://pipelines/data/example-pipeline.nt', + stages: [] + } as unknown as LDWorkbenchConfiguration + const pipeline = new Pipeline(invalidConfiguration); + let failed = false + try { + pipeline.validate() + } catch (error) { + if (error instanceof Error) { + if (error.message === 'Your pipeline contains no stages.') { + failed = true + } + else { + throw new Error(`This test expected an error with message "Your pipeline contains no stages.", but received: ${error.message}`) + } + } else { throw error } + } + expect(failed).to.equal(true); + }); + it('should throw an error if the pipeline\'s configuration has no endpoint for the Iterator', () => { + const invalidConfiguration = { + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://pipelines/data/example-pipeline.nt', + stages: [ + { + name: 'Stage 1', + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + }, + generator: { + query: 'file://static/example/generator-stage-1.rq' + } + }, + { + name: 'Stage 2', + iterator: { + query: 'file://static/example/iterator-stage-2.rq', + }, + generator: { + query: 'file://static/example/generator-stage-2.rq', + } + } + ] + } as unknown as LDWorkbenchConfiguration + const pipeline = new Pipeline(invalidConfiguration); + let failed = false + try { + pipeline.validate() + } catch (error) { + if (error instanceof Error) { + if (error.message === 'The first stage of your pipeline must have an endpoint defined for the Iterator.') { + failed = true + } + else { + throw new Error(`This test expected an error with message "The first stage of your pipeline must have an endpoint defined for the Iterator.", but received: ${error.message}`) + } + } else { throw error } + } + expect(failed).to.equal(true); + }); + it('should throw an error if the pipeline\'s configuration has duplicate stage name', () => { + const configDuplicateStageName: LDWorkbenchConfiguration = { + name: 'Example Pipeline', + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://pipelines/data/example-pipeline.nt', + stages: [ + { + name: 'Stage 1', + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + endpoint: 'https://www.example.com/endpoint' + }, + generator: { + query: 'file://static/example/generator-stage-1.rq' + } + }, + { + name: 'Stage 1', + iterator: { + query: 'file://static/example/iterator-stage-2.rq', + }, + generator: { + query: 'file://static/example/generator-stage-2.rq', + endpoint: 'https://query.wikidata.org/sparql' + } + } + ] + } + const pipeline = new Pipeline(configDuplicateStageName); + let failed = false + try { + pipeline.validate() + } catch (error) { + if (error instanceof Error) { + if (error.message === 'Detected a duplicate name for stage `Stage 1` in your pipeline: each stage must have a unique name.') { + failed = true + } + else { + throw new Error(`This test expected an error with message "Detected a duplicate name for stage \`Stage 1\` in your pipeline: each stage must have a unique name.", but received: ${error.message}`) + } + } else { throw error } + } + expect(failed).to.equal(true); + + }); + + it('should succeed if pipeline is valid', () => { + const configDuplicateStageName: LDWorkbenchConfiguration = { + name: 'Example Pipeline', + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://pipelines/data/example-pipeline.nt', + stages: [ + { + name: 'Stage 1', + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + endpoint: 'https://www.example.com/endpoint' + }, + generator: { + query: 'file://static/example/generator-stage-1.rq' + } + }, + { + name: 'Stage 2', + iterator: { + query: 'file://static/example/iterator-stage-2.rq', + }, + generator: { + query: 'file://static/example/generator-stage-2.rq', + endpoint: 'https://query.wikidata.org/sparql' + } + } + ] + } + const pipeline = new Pipeline(configDuplicateStageName); + let failed = false + try { + pipeline.validate() + } catch (error) { + failed = true + if (error instanceof Error) { + throw new Error(`This test was expected to succeed, but failed with: ${error.message}`) + } else { throw error } + } + expect(failed).to.equal(false); + }); + }); + + describe('run', () => { + it('should run the pipeline correctly', async () => { + const configuration = parseYamlFile('./static/example/config.yml') + const pipeline = new Pipeline(configuration) + + await expect(Promise.resolve(pipeline.run())).to.eventually.fulfilled + + }); + }); + +}); diff --git a/src/lib/tests/PreviousStage.class.test.ts b/src/lib/tests/PreviousStage.class.test.ts new file mode 100644 index 0000000..ec0a7d6 --- /dev/null +++ b/src/lib/tests/PreviousStage.class.test.ts @@ -0,0 +1,174 @@ +import PreviousStage from '../PreviousStage.class.js'; +import * as chai from 'chai' +import chaiAsPromised from 'chai-as-promised' +import type { LDWorkbenchConfiguration } from '../LDWorkbenchConfiguration.js'; +import Pipeline from '../Pipeline.class.js'; +import Stage from '../Stage.class.js'; +chai.use(chaiAsPromised) +const expect = chai.expect + +describe('PreviousStage Class', () => { + describe('constructor', () => { + it('should set properties correctly', () => { + const config: LDWorkbenchConfiguration = { + name: 'Example Pipeline', + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://pipelines/data/example-pipeline.nt', + stages: [ + { + name: 'Stage 1', + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + endpoint: 'https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql' + }, + generator: { + query: 'file://static/example/generator-stage-1.rq' + } + }, + { + name: 'Stage 2', + iterator: { + query: 'file://static/example/iterator-stage-2.rq', + }, + generator: { + query: 'file://static/example/generator-stage-2.rq', + endpoint: 'https://query.wikidata.org/sparql' + } + } + ] + } + const pipeline = new Pipeline(config) + pipeline.validate() + const stage: Stage = new Stage(pipeline, config.stages[1]) + const stagesSoFar = Array.from(stage.pipeline.stages.keys()); + const previousStage = new PreviousStage(stage, stagesSoFar.pop()!); + expect(previousStage).to.be.an.instanceOf(PreviousStage); + expect(previousStage).to.have.property('nextStage'); + expect(previousStage).to.have.property('name'); + expect(previousStage.$id).to.equal('PreviousStage'); + }); + }); + describe('load', () => { + it('should throw an error if the previous stage is not found', () => { + const config: LDWorkbenchConfiguration = { + name: 'Example Pipeline', + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://pipelines/data/example-pipeline.nt', + stages: [ + { + name: 'Stage 1', + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + endpoint: 'https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql' + }, + generator: { + query: 'file://static/example/generator-stage-1.rq' + } + }, + { + name: 'Stage 2', + iterator: { + query: 'file://static/example/iterator-stage-2.rq', + }, + generator: { + query: 'file://static/example/generator-stage-2.rq', + endpoint: 'https://query.wikidata.org/sparql' + } + } + ] + } + const pipeline = new Pipeline(config) + pipeline.validate() + const stage: Stage = new Stage(pipeline, config.stages[0]) + const stagesSoFar = Array.from(stage.pipeline.stages.keys()); + const previousStage = new PreviousStage(stage, stagesSoFar.pop()!); + expect(() => previousStage.load()).to.throw("no endpoint was defined, but there is also no previous stage to use") + }); + + it('should return the previous stage correctly', () => { + const config: LDWorkbenchConfiguration = { + name: 'Example Pipeline', + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://pipelines/data/example-pipeline.nt', + stages: [ + { + name: 'Stage 1', + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + endpoint: 'https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql' + }, + generator: { + query: 'file://static/example/generator-stage-1.rq' + } + }, + { + name: 'Stage 2', + iterator: { + query: 'file://static/example/iterator-stage-2.rq', + endpoint: 'https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql' + }, + generator: { + query: 'file://static/example/generator-stage-2.rq', + endpoint: 'https://query.wikidata.org/sparql' + } + } + ] + } + const pipeline = new Pipeline(config) + pipeline.validate() + const stageTwo: Stage = new Stage(pipeline, config.stages[1]) + const stagesSoFar = Array.from(stageTwo.pipeline.stages.keys()); + const previousStage = new PreviousStage(stageTwo, stagesSoFar.pop()!); // should be stage one + const stage2 = pipeline.stages.get('Stage 2')!; + const testPreviousStage = pipeline.getPreviousStage(stage2) + expect(previousStage.load()).to.equal(testPreviousStage) + }); + }); + + describe('is', () => { + it('should return true for a valid PreviousStage instance', () => { + const config: LDWorkbenchConfiguration = { + name: 'Example Pipeline', + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://pipelines/data/example-pipeline.nt', + stages: [ + { + name: 'Stage 1', + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + endpoint: 'https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql' + }, + generator: { + query: 'file://static/example/generator-stage-1.rq' + } + }, + { + name: 'Stage 2', + iterator: { + query: 'file://static/example/iterator-stage-2.rq', + endpoint: 'https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql' + }, + generator: { + query: 'file://static/example/generator-stage-2.rq', + endpoint: 'https://query.wikidata.org/sparql' + } + } + ] + } + const pipeline = new Pipeline(config) + pipeline.validate() + const stage: Stage = new Stage(pipeline, config.stages[1]) + const stagesSoFar = Array.from(stage.pipeline.stages.keys()); + const previousStage = new PreviousStage(stage, stagesSoFar.pop()!); + previousStage.load() + const result = PreviousStage.is(previousStage); + expect(result).to.equal(true); + }); + + it('should return false for an invalid instance', () => { + const invalidInstance = { $id: 'invalid' } + const result = PreviousStage.is(invalidInstance); + expect(result).to.equal(false); + }); + }); +}); diff --git a/src/lib/tests/Stage.class.test.ts b/src/lib/tests/Stage.class.test.ts new file mode 100644 index 0000000..4c59256 --- /dev/null +++ b/src/lib/tests/Stage.class.test.ts @@ -0,0 +1,93 @@ +import Stage from "../Stage.class.js"; +import Pipeline from '../Pipeline.class.js'; +import kebabcase from 'lodash.kebabcase' +import Iterator from '../Iterator.class.js'; +import Generator from '../Generator.class.js'; +import * as chai from 'chai' +import chaiAsPromised from 'chai-as-promised' +import parseYamlFile from "../../utils/parseYamlFile.js"; +import path from "path"; + +chai.use(chaiAsPromised) +const expect = chai.expect + +describe('Stage Class', () => { + describe('constructor', () => { + it('should set properties correctly', () => { + const configuration = parseYamlFile('./static/example/config.yml') + const pipeline = new Pipeline(configuration) + const stageConfig = configuration.stages[0] + const stage = new Stage(pipeline, stageConfig) + expect(stage).to.be.an.instanceOf(Stage); + expect(stage).to.have.property('destination'); + expect(stage).to.have.property('iterator'); + expect(stage).to.have.property('generator'); + expect(stage.iterator).to.be.an.instanceOf(Iterator); + expect(stage.generator).to.be.an.instanceOf(Generator); + expect(stage.pipeline).to.be.an.instanceOf(Pipeline); + expect(stage).to.have.property('pipeline', pipeline); + expect(stage).to.have.property('configuration'); + }); + }); + + describe('destinationPath', () => { + it('should return the correct destination path', () => { + const configuration = parseYamlFile('./static/example/config.yml') + const pipeline = new Pipeline(configuration) + const stageConfig = configuration.stages[0] + const stage = new Stage(pipeline, stageConfig); + const expectedPath = path.join(pipeline.dataDir, kebabcase(stageConfig.name) + '.nt') + expect(stage.destinationPath).to.equal(expectedPath.replace('file://', '')); + }); + }); + + describe('name', () => { + it('should return the correct stage name', () => { + const configuration = parseYamlFile('./static/example/config.yml') + const pipeline = new Pipeline(configuration) + const stageConfig = configuration.stages[0] + const stage = new Stage(pipeline, stageConfig); + expect(stage.name).to.equal(stageConfig.name); + }); + }); + + describe.skip('run', () => { + it('should run the stage correctly', async function () { + this.timeout(5000) + const configuration = parseYamlFile('./static/example/config.yml') + const pipeline = new Pipeline(configuration) + const stageConfig = configuration.stages[0] + const stage = new Stage(pipeline, stageConfig); + + const emittedEvents: any[] = []; + async function runStageWithPromise(): Promise { + return new Promise((resolve, reject) => { + // @mightymax seems to never emit generatorResult, added it as comment to Stage class, but will need to uncomment the timeout in this test above ^ + stage.addListener('generatorResult', (count) => { + emittedEvents.push({ event: 'generatorResult', count }); + }); + stage.addListener('iteratorResult', (namedNode) => { + console.log('🪵 | file: Stage.class.test.ts:70 | stage.addListener | namedNode:', namedNode) + emittedEvents.push({ event: 'iteratorResult', namedNode }); + }); + stage.addListener('end', (iteratorCount, statements) => { + emittedEvents.push({ event: 'end', iteratorCount, statements }); + resolve(true); + }); + stage.addListener('error', (error) => { + reject(error); + }); + stage.run(); + }); + } + await runStageWithPromise() + expect(emittedEvents[0].event).to.equal('iteratorResult') + expect(emittedEvents[0].namedNode.termType).to.equal('NamedNode') + expect(emittedEvents[0].namedNode.value).to.equal('http://dbpedia.org/resource/Iris_virginica') + expect(emittedEvents[emittedEvents.length -1].event).to.equal('end') + expect(emittedEvents[emittedEvents.length -1].iteratorCount).to.equal(153) + expect(emittedEvents[emittedEvents.length -1].statements).to.equal(459) + expect(emittedEvents.length).to.equal(154) + }); + }); +}); diff --git a/src/utils/getEngineSource.ts b/src/utils/getEngineSource.ts index 2e7f2ab..aea6c01 100644 --- a/src/utils/getEngineSource.ts +++ b/src/utils/getEngineSource.ts @@ -9,7 +9,7 @@ export default function getEngineSource(endpoint: Endpoint): string { const previousStage = endpoint.load(); if (!existsSync(previousStage.destinationPath)) { throw new Error( - `The result from stage "${previousStage.name}" (${previousStage.destinationPath}) is not avalilable, make sure to run that stage first` + `The result from stage "${previousStage.name}" (${previousStage.destinationPath}) is not available, make sure to run that stage first` ); } source = path.resolve(previousStage.destinationPath); diff --git a/src/utils/tests/static/correct/conf1.yml b/src/utils/tests/static/correct/conf1.yml new file mode 100644 index 0000000..f265e23 --- /dev/null +++ b/src/utils/tests/static/correct/conf1.yml @@ -0,0 +1,18 @@ +# Metadata for your pipeline: +name: Example Pipeline A +description: > + This is an example pipeline. It uses files that are available in this repository + and SPARQL endpoints that should work. + + +# This is optional, by default it will be stored in the data directory of the pipeline using filename 'statements.nt' +destination: file://pipelines/data/example-pipeline.nt + +# The individual stages for your pipeline +stages: + - name: "Stage 1A" + iterator: + query: file://static/example/iterator-stage-1.rq + endpoint: https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql + generator: + query: file://static/example/generator-stage-1.rq \ No newline at end of file diff --git a/src/utils/tests/static/correct/conf2.yml b/src/utils/tests/static/correct/conf2.yml new file mode 100644 index 0000000..f56addd --- /dev/null +++ b/src/utils/tests/static/correct/conf2.yml @@ -0,0 +1,18 @@ +# Metadata for your pipeline: +name: Example Pipeline B +description: > + This is an example pipeline. It uses files that are available in this repository + and SPARQL endpoints that should work. + + +# This is optional, by default it will be stored in the data directory of the pipeline using filename 'statements.nt' +destination: file://pipelines/data/example-pipeline.nt + +# The individual stages for your pipeline +stages: + - name: "Stage 1B" + iterator: + query: file://static/example/iterator-stage-1.rq + endpoint: https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql + generator: + query: file://static/example/generator-stage-1.rq \ No newline at end of file diff --git a/src/utils/tests/static/duplicate/conf1.yml b/src/utils/tests/static/duplicate/conf1.yml new file mode 100644 index 0000000..ec68498 --- /dev/null +++ b/src/utils/tests/static/duplicate/conf1.yml @@ -0,0 +1,19 @@ +# Metadata for your pipeline: +name: Duplicate Example Pipeline +description: > + This is an example pipeline. It uses files that are available in this repository + and SPARQL endpoints that should work. + + +# This is optional, by default it will be stored in the data directory of the pipeline using filename 'statements.nt' +destination: file://pipelines/data/example-pipeline.nt + +# The individual stages for your pipeline +stages: + - name: "Stage One" + iterator: + query: file://static/example/iterator-stage-1.rq + endpoint: https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql + generator: + query: file://static/example/generator-stage-1.rq + diff --git a/src/utils/tests/static/duplicate/conf2.yml b/src/utils/tests/static/duplicate/conf2.yml new file mode 100644 index 0000000..6d03b48 --- /dev/null +++ b/src/utils/tests/static/duplicate/conf2.yml @@ -0,0 +1,18 @@ +# Metadata for your pipeline: +name: Duplicate Example Pipeline +description: > + This is an example pipeline. It uses files that are available in this repository + and SPARQL endpoints that should work. + + +# This is optional, by default it will be stored in the data directory of the pipeline using filename 'statements.nt' +destination: file://pipelines/data/example-pipeline.nt + +# The individual stages for your pipeline +stages: + - name: "Stage 1" + iterator: + query: file://static/example/iterator-stage-1.rq + endpoint: https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql + generator: + query: file://static/example/generator-stage-1.rq \ No newline at end of file diff --git a/src/utils/tests/static/single/conf.yml b/src/utils/tests/static/single/conf.yml new file mode 100644 index 0000000..e9d7fc0 --- /dev/null +++ b/src/utils/tests/static/single/conf.yml @@ -0,0 +1,24 @@ +# Metadata for your pipeline: +name: Example Pipeline +description: > + This is an example pipeline. It uses files that are available in this repository + and SPARQL endpoints that should work. + + +# This is optional, by default it will be stored in the data directory of the pipeline using filename 'statements.nt' +destination: file://pipelines/data/example-pipeline.nt + +# The individual stages for your pipeline +stages: + - name: "Stage 1" + iterator: + query: file://static/example/iterator-stage-1.rq + endpoint: https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql + generator: + query: file://static/example/generator-stage-1.rq + - name: "Stage 2" + iterator: + query: file://static/example/iterator-stage-2.rq + generator: + query: file://static/example/generator-stage-2.rq + endpoint: https://query.wikidata.org/sparql diff --git a/src/utils/tests/utilities.test.ts b/src/utils/tests/utilities.test.ts index 3f60ac4..7167660 100644 --- a/src/utils/tests/utilities.test.ts +++ b/src/utils/tests/utilities.test.ts @@ -1,11 +1,41 @@ -import { expect } from "chai" import version from '../version.js' import parseYamlFile from '../parseYamlFile.js' import validate from '../validate.js' -import { isConfiguration } from '../guards.js' +import { isConfiguration, isFile, isFilePathString, isPreviousStage } from '../guards.js' import loadConfiguration from '../loadConfiguration.js' import duration from '../duration.js' +import Pipeline from "../../lib/Pipeline.class.js" +import Stage from "../../lib/Stage.class.js" +import PreviousStage from "../../lib/PreviousStage.class.js" +import File from '../../lib/File.class.js' +import path from "path" +import loadPipelines from "../loadPipelines.js" +import chalk from "chalk" +import assert from "assert" +import getEndpoint from "../getEndpoint.js" +import type { LDWorkbenchConfiguration } from "../../lib/LDWorkbenchConfiguration.js" +import getEngine from "../getEngine.js" +import { QueryEngine as QueryEngineSparql } from "@comunica/query-sparql"; +import { QueryEngine as QueryEngineFile } from "@comunica/query-sparql-file"; +import getEngineSource from "../getEngineSource.js" +import { existsSync, rename } from "fs" +import * as chai from 'chai' +import chaiAsPromised from 'chai-as-promised' +import getSPARQLQuery from '../getSPARQLQuery.js' +import getSPARQLQueryString from '../getSPARQLQueryString.js' +chai.use(chaiAsPromised) +const expect = chai.expect +function testDeepEqualTwoObjects(objectOne: any, objectTwo: any): boolean { + try { + // lodash could not properly deep equal check + assert.deepEqual(objectOne, objectTwo) + return true + } catch (error) { + console.error(error) + return false + } +} describe('Utilities', () => { it('should correctly get a version number', () => { expect(version()).match(/^\d+\.\d+\.\d+(?:-rc.+)?$/) @@ -23,13 +53,13 @@ describe('Utilities', () => { expect(() => parseYamlFile('./static/ld-workbench.schema.json')).to.not.throw() }) it('should throw on non-YAML file', () => { - expect(() => {parseYamlFile('./README.md')}).to.throw('Error parsing file: `./README.md`, are you sure it is a YAML file?') + expect(() => { parseYamlFile('./README.md') }).to.throw('Error parsing file: `./README.md`, are you sure it is a YAML file?') }) it('should throw on non existing YAML file', () => { - expect(() => {parseYamlFile('./non-existing-file')}).to.throw('File not found: ./non-existing-file') + expect(() => { parseYamlFile('./non-existing-file') }).to.throw('File not found: ./non-existing-file') }) it('should throw on on directories', () => { - expect(() => {parseYamlFile('./src')}).to.throw('File not found: ./src') + expect(() => { parseYamlFile('./src') }).to.throw('File not found: ./src') }) }) @@ -56,10 +86,725 @@ describe('Utilities', () => { expect(isConfiguration(val)).to.equal(false) } }) - }) + it('should return true for a valid FilePathString with isFilePathString', () => { + const validFilePath = 'file://path/to/file.txt'; + const result = isFilePathString(validFilePath); + expect(result).to.equal(true); + }); + + it('should return false for a non-string value with isFilePathString', () => { + const nonStringValue = 42; + const result = isFilePathString(nonStringValue); + expect(result).to.equal(false); + }); + + it('should return false for a string that does not start with "file://" with isFilePathString', () => { + const invalidFilePath = 'http://example.com/file.txt'; + const result = isFilePathString(invalidFilePath); + expect(result).to.equal(false); + }); + it('should return false for an empty string with isFilePathString', () => { + const emptyString = ''; + const result = isFilePathString(emptyString); + expect(result).to.equal(false); + }); + it('should return false for undefined or null with isFilePathString', () => { + const result1 = isFilePathString(undefined); + const result2 = isFilePathString(null); + expect(result1).to.equal(false); + expect(result2).to.equal(false); + }); + it('should return true for valid PreviousStage object with isPreviousStage', () => { + const configuration = parseYamlFile('./static/example/config.yml') + const pipeline = new Pipeline(configuration) + const stageConfig = configuration.stages[0] + const stage = new Stage(pipeline, stageConfig) + const prevStage = new PreviousStage(stage, configuration.stages.names) + const result = isPreviousStage(prevStage); + expect(result).to.equal(true); + }); + it('should return false for invalid object with isPreviousStage', () => { + const invalidObject = { $id: 'Invalid' }; + const result = isPreviousStage(invalidObject); + expect(result).to.be.equal(false); + }); + it('should return false for non-object values with isPreviousStage', () => { + const nonObjectValue = 'string'; + const result = isPreviousStage(nonObjectValue); + expect(result).to.equal(false); + }); + it('should return true for valid PreviousStage object with isFile', () => { + const f = new File(`file://${path.join('./static/example/config.yml')}`) + const result = isFile(f); + expect(result).to.equal(true); + }); + it('should return false for invalid object with isFile', () => { + const invalidObject = { $id: 'Invalid' }; + const result = isFile(invalidObject); + expect(result).to.be.equal(false); + }); + it('should return false for non-object values with isFile', () => { + const nonObjectValue = 'string'; + const result = isFile(nonObjectValue); + expect(result).to.equal(false); + }); + }) it('should load and validate using the wrapper', () => { expect(isConfiguration(loadConfiguration('./static/example/config.yml'))).to.equal(true) expect(() => isConfiguration(loadConfiguration('./package.json'))).to.throw('he YAML file `./package.json` is not a valid LD Workbench configuration file.') }) + describe('loadPipelines', () => { + it('should throw if configuration file & directory could not be found', () => { + const nonExistentConfFile = 'fileDoesNotExist.yml' + const nonExistentDirWithFile = './dirDoesNotExist/' + nonExistentConfFile + expect(() => loadPipelines(nonExistentConfFile)).to.throw(`Configuration directory/file ${chalk.italic(nonExistentConfFile)} could not be found.\nIf this is your first run of LDWorkbench, you might want to use \`npx ldworkbench --init\` to setup an example workbench project.`) + expect(() => loadPipelines(nonExistentDirWithFile)).to.throw(`Configuration directory/file ${chalk.italic(nonExistentDirWithFile)} could not be found.\nIf this is your first run of LDWorkbench, you might want to use \`npx ldworkbench --init\` to setup an example workbench project.`) + }) + it('should throw if directory has no .yml configuration file', () => { + const dirWithoutConfFile = './data/' + expect(() => loadPipelines(dirWithoutConfFile)).to.throw(`No configuration files found matching pattern ${chalk.italic(`${dirWithoutConfFile}/**/*.yml`)}`) + }) + it('should load multiple configuration files in directory', () => { + const pipelines = loadPipelines('./src/utils/tests/static/correct') + const loadedElements = [...pipelines] + const testElements = [ + [ + 'Example Pipeline A', + { + name: 'Example Pipeline A', + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://pipelines/data/example-pipeline.nt', + stages: [ + { + name: 'Stage 1A', + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + endpoint: 'https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql' + }, + generator: { query: 'file://static/example/generator-stage-1.rq' } + } + ] + } + ], + [ + 'Example Pipeline B', + { + name: 'Example Pipeline B', + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://pipelines/data/example-pipeline.nt', + stages: [ + { + name: 'Stage 1B', + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + endpoint: 'https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql' + }, + generator: { query: 'file://static/example/generator-stage-1.rq' } + } + ] + } + ] + ] + expect(testDeepEqualTwoObjects(loadedElements, testElements)).to.equal(true) + }) + it('should load single configuration file in directory', () => { + const pipeline = loadPipelines('./src/utils/tests/static/single') + const loadedElement = [...pipeline] + const testElement = [[ + 'Example Pipeline', + { + name: 'Example Pipeline', + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://pipelines/data/example-pipeline.nt', + stages: [ + { + name: 'Stage 1', + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + endpoint: 'https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql' + }, + generator: { query: 'file://static/example/generator-stage-1.rq' } + }, + { + name: 'Stage 2', + iterator: { + query: 'file://static/example/iterator-stage-2.rq', + }, + generator: { + query: 'file://static/example/generator-stage-2.rq', + endpoint: 'https://query.wikidata.org/sparql' + } + } + ] + } + ]] + expect(testDeepEqualTwoObjects(loadedElement, testElement)).to.equal(true) + }) + it('should log duplicate names in configuration file', () => { + const originalConsoleError = console.warn; + let capturedConsoleError = ''; + console.warn = (message: string) => { + capturedConsoleError += message + '\n'; + }; + loadPipelines('./src/utils/tests/static/duplicate') + console.warn = originalConsoleError; + expect(capturedConsoleError).to.contain(chalk.yellow(`Warning: skipping a duplicate configuration from file ${chalk.italic('./src/utils/tests/static/duplicate/conf2.yml')} with name ${chalk.italic('Duplicate Example Pipeline')}`)); + }) + }) + describe('getEndpoint', () => { + it('should return File when filePath is provided in Stage', () => { + const filePath = 'file://path/to/file.txt' + const config: LDWorkbenchConfiguration = { + name: 'Example Pipeline', + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://pipelines/data/example-pipeline.nt', + stages: [ + { + name: 'Stage 1', + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + endpoint: filePath + }, + generator: { + query: 'file://static/example/generator-stage-1.rq' + } + } + ] + } + const pipeline = new Pipeline(config) + const stageConfig = config.stages[0] + const stage = new Stage(pipeline, stageConfig) + const retrievedEndpoint = getEndpoint(stage) + expect(isFile(retrievedEndpoint) && (retrievedEndpoint.path === 'file://path/to/file.txt')).to.equal(true) + }) + it('should return URL when URL is provided in Stage', () => { + const url = new URL('https://example.com').toString() + const config: LDWorkbenchConfiguration = { + name: 'Example Pipeline', + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://pipelines/data/example-pipeline.nt', + stages: [ + { + name: 'Stage 1', + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + endpoint: url + }, + generator: { + query: 'file://static/example/generator-stage-1.rq' + } + } + ] + } + const pipeline = new Pipeline(config) + const stageConfig = config.stages[0] + const stage = new Stage(pipeline, stageConfig) + const retrievedEndpoint = getEndpoint(stage) + expect((retrievedEndpoint instanceof URL) && (retrievedEndpoint.href === 'https://example.com/')).to.equal(true) + }) + it('should throw error if invalid URL is provided', () => { + const url = 'invalidExample' // will be accepted + const config: LDWorkbenchConfiguration = { + name: 'Example Pipeline', + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://pipelines/data/example-pipeline.nt', + stages: [ + { + name: 'Stage 1', + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + endpoint: url + }, + generator: { + query: 'file://static/example/generator-stage-1.rq' + } + } + ] + } + const pipeline = new Pipeline(config) + const stageConfig = config.stages[0] + // getEndpoint is use in Stage's Iterator, and it will throw there. + expect(() => new Stage(pipeline, stageConfig)).to.throw('Error in the iterator of stage `Stage 1`: "invalidExample" is not a valid URL') + }) + it('should throw if stage has undefined endpoint and is first stage', () => { + const endpoint = undefined + const config: LDWorkbenchConfiguration = { + name: 'Example Pipeline', + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://pipelines/data/example-pipeline.nt', + stages: [ + { + name: 'Stage 1', + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + endpoint + }, + generator: { + query: 'file://static/example/generator-stage-1.rq' + } + } + ] + } + const pipeline = new Pipeline(config) + const stageConfig = config.stages[0] + expect(() => new Stage(pipeline, stageConfig)).to.throw('Error in the iterator of stage `Stage 1`: no destination defined for the iterator and no previous stage to use that result') + }) + it('should return PreviousStage if stage has undefined endpoint', () => { + const url = new URL('https://example.com').toString() + const config: LDWorkbenchConfiguration = { + name: 'Example Pipeline', + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://pipelines/data/example-pipeline.nt', + stages: [ + { + name: 'Stage 1', + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + endpoint: url + }, + generator: { + query: 'file://static/example/generator-stage-1.rq' + } + }, + { + name: 'Stage 2', + iterator: { + query: 'file://static/example/iterator-stage-2.rq', + }, + generator: { + query: 'file://static/example/generator-stage-2.rq', + endpoint: 'https://query.wikidata.org/sparql' + } + } + ] + } + const pipeline = new Pipeline(config) + pipeline.validate() + const stage = new Stage(pipeline, config.stages[1]) + const retrievedEndpoint = getEndpoint(stage) + expect(isPreviousStage(retrievedEndpoint)).to.equal(true) + }) + }) + describe('getEngine', () => { + it('should return QueryEngineSparql when input is URL', () => { + const url = new URL('https://www.example.com/endpoint') + const result = getEngine(url) + expect(result instanceof QueryEngineSparql).to.equal(true) + }) + it('should return QueryEngineFile when input is File', () => { + const file = new File('file://exampleFile.txt') + const result = getEngine(file) + expect(result instanceof QueryEngineFile).to.equal(true) + }) + it('should return QueryEngineFile when input is PreviousStage', () => { + const config: LDWorkbenchConfiguration = { + name: 'Example Pipeline', + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://pipelines/data/example-pipeline.nt', + stages: [ + { + name: 'Stage 1', + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + endpoint: 'https://www.example.com/endpoint' + }, + generator: { + query: 'file://static/example/generator-stage-1.rq' + } + }, + { + name: 'Stage 2', + iterator: { + query: 'file://static/example/iterator-stage-2.rq', + }, + generator: { + query: 'file://static/example/generator-stage-2.rq', + endpoint: 'https://query.wikidata.org/sparql' + } + } + ] + } + const pipeline = new Pipeline(config) + pipeline.validate() + const stage: Stage = new Stage(pipeline, config.stages[1]) + const stagesSoFar = Array.from(stage.pipeline.stages.keys()); + const previousStage = new PreviousStage(stage, stagesSoFar.pop()!); + const result = getEngine(previousStage) + expect(result instanceof QueryEngineFile).to.equal(true) + }) + }) + describe('getEngineSource', () => { + it('should return string when input is File', () => { + const f = new File(`file://${path.join('./static/example/config.yml')}`) + expect(typeof getEngineSource(f) === "string").to.equal(true) + }) + it('should return string when input is URL', () => { + const url = new URL('https://www.example.com') + expect(typeof getEngineSource(url) === "string").to.equal(true) + }) + it('should return engine source string when input is PreviousStage with destinationPath', async () => { + const config: LDWorkbenchConfiguration = { + name: 'Example Pipeline', + description: 'This is an example pipeline. It uses files that are available in this repository and SPARQL endpoints that should work.\n', + destination: 'file://pipelines/data/example-pipeline.nt', + stages: [ + { + name: 'Stage 1', + iterator: { + query: 'file://static/example/iterator-stage-1.rq', + endpoint: 'https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql' + }, + generator: { + query: 'file://static/example/generator-stage-1.rq' + } + }, + { + name: 'Stage 2', + iterator: { + query: 'file://static/example/iterator-stage-2.rq', + }, + generator: { + query: 'file://static/example/generator-stage-2.rq', + endpoint: 'https://query.wikidata.org/sparql' + } + } + ] + } + const pipeline = new Pipeline(config) + pipeline.validate() + const stage2: Stage = new Stage(pipeline, config.stages[1]) + const stagesSoFar = Array.from(stage2.pipeline.stages.keys()); + const previousStage = new PreviousStage(stage2, stagesSoFar.pop()!); + const engineSource = getEngineSource(previousStage) + expect(engineSource === path.join(process.cwd(), "/pipelines/data/example-pipeline/stage-1.nt")).to.equal(true) + }) + describe('should throw', () => { + beforeEach(() => { + const configuration = parseYamlFile('./static/example/config.yml') + const pipeline = new Pipeline(configuration) + pipeline.validate() + const stage: Stage = new Stage(pipeline, configuration.stages[1]) + const stagesSoFar = Array.from(stage.pipeline.stages.keys()); + const previousStage = new PreviousStage(stage, stagesSoFar.pop()!); + const loadedPreviousStage = previousStage.load() + const destPath = loadedPreviousStage?.destinationPath + const editedDesPath = destPath + '123' + if (destPath !== undefined) { + if (existsSync(destPath)) { + rename(destPath, editedDesPath, (err) => { + if (err != null) { + throw err + } + }) + } + } else { + throw new Error('Test Failed, no destination path found') + } + }) + afterEach(() => { + const configuration = parseYamlFile('./static/example/config.yml') + const pipeline = new Pipeline(configuration) + pipeline.validate() + const stage: Stage = new Stage(pipeline, configuration.stages[1]) + const stagesSoFar = Array.from(stage.pipeline.stages.keys()); + const previousStage = new PreviousStage(stage, stagesSoFar.pop()!); + const loadedPreviousStage = previousStage.load() + const destPath = loadedPreviousStage?.destinationPath + const editedDesPath = destPath + '123' + if (existsSync(editedDesPath)) { + + rename(editedDesPath, destPath, (err) => { + if (err != null) { + throw err + } + }) + } + }) + it('should throw when input is PreviousStage and destinationPath does not exist', () => { + const configuration = parseYamlFile('./static/example/config.yml') + const pipeline = new Pipeline(configuration) + pipeline.validate() + const stage: Stage = new Stage(pipeline, configuration.stages[1]) + const stagesSoFar = Array.from(stage.pipeline.stages.keys()); + const previousStage = new PreviousStage(stage, stagesSoFar.pop()!); + const loadedPreviousStage = previousStage.load() + const destPath = loadedPreviousStage?.destinationPath + + expect(() => getEngineSource(previousStage)).to.throw(`The result from stage "${loadedPreviousStage?.name}" (${destPath}) is not available, make sure to run that stage first`) + }) + }) + }) + describe('getSPARQLQuery', () => { + it('should return SPARQL query when input is filepath with query file', () => { + const queryFile = 'file://static/example/iterator-stage-1.rq' + const result = getSPARQLQuery(queryFile, 'select') + expect(result.queryType).to.equal('SELECT') + expect(result.limit).to.equal(10) + expect(result.prefixes.dbo).to.equal('http://dbpedia.org/ontology/') + }) + it('should return SPARQL query when input is string with query', () => { + const queryString = `prefix dbo: + prefix rdf: + prefix rdfs: + select * where { + $this a/rdfs:subClassOf* dbo:Plant + } limit 10 + ` + const result = getSPARQLQuery(queryString, 'select') + expect(result.queryType).to.equal('SELECT') + expect(result.limit).to.equal(10) + expect(result.prefixes.dbo).to.equal('http://dbpedia.org/ontology/') + + }) + it('should throw when filepath is given and does not exist', () => { + const nonExistentFile = 'file://./does/not/exits.txt' + expect(getSPARQLQuery.bind(getSPARQLQuery, nonExistentFile, 'construct')).to.throw(`File not found: ${chalk.italic(nonExistentFile.replace('file://', ''))}`) + }) + + it('should throw if query is not a SPARQL query', () => { + const sqlQuery = `SELECT first_name, last_name, birthdate + FROM employees + WHERE department = 'IT' + ORDER BY last_name, first_name; + ` + let failed: boolean + try { + getSPARQLQuery(sqlQuery, 'select') + failed = false + } catch (error) { + // @ts-expect-error error has no type so we check if hash is invalid + if (error.hash.token === "INVALID"){ + failed = true + } + else { + failed = false + console.log(error) + } + } + expect(failed).to.equal(true) + }) + describe('should throw if CONSTRUCT query contains minus, service, values', () => { + it('should throw for minus', () => { + const minusQuery = `PREFIX rdf: + PREFIX ex: + + CONSTRUCT { + ?city rdf:type ex:City. + } + WHERE { + ?city rdf:type ex:City. + MINUS { ?city ex:isCapitalOf ?country. } + } + + ` + expect(() => getSPARQLQuery(minusQuery, 'construct')).to.throw('SPARQL construct queries must not contain a MINUS clause') + + }) + it('should throw for service', () => { + const serviceQuery = `PREFIX foaf: + PREFIX ex: + + CONSTRUCT { + ?person foaf:name ?name. + ?person ex:hasEmail ?email. + } + WHERE { + ?person foaf:name ?name. + SERVICE { + ?person ex:hasEmail ?email. + } + } + ` + expect(() => getSPARQLQuery(serviceQuery, 'construct')).to.throw('SPARQL construct queries must not contain a SERVICE clause') + }) + it('should throw for values', () => { + const valuesQuery = `PREFIX ex: + + CONSTRUCT { + ?city ex:hasPopulation ?population. + } + WHERE { + VALUES ?city { ex:City1 ex:City2 ex:City3 } + ?city ex:hasPopulation ?population. + } + ` + expect(() => getSPARQLQuery(valuesQuery, 'construct')).to.throw('SPARQL construct queries must not contain a VALUES clause') + + }) + }) + describe('should throw if CONSTRUCT query contains optional, union, group, graph with minus, service, values', () => { + it('should throw for minus with optional', () => { + const minusOptionalQuery = `PREFIX ex: + + CONSTRUCT { + ?city ex:hasPopulation ?population. + } + WHERE { + ?city ex:hasPopulation ?population. + + OPTIONAL { + MINUS { + ?city ex:hasPopulation ?otherPopulation. + FILTER (?population = ?otherPopulation) + } + } + } + ` + expect(() => getSPARQLQuery(minusOptionalQuery, 'construct')).to.throw('SPARQL construct queries must not contain a MINUS clause') + }) + it('should throw for service with union', () => { + const serviceUnionQuery = `PREFIX ex: + + CONSTRUCT { + ?place ex:hasPopulation ?population. + } + WHERE { + { + ?place a ex:City. + ?place ex:hasPopulation ?population. + } + UNION + { + SERVICE { + ?place a ex:Country. + ?place ex:hasPopulation ?population. + } + } + } + + ` + expect(() => getSPARQLQuery(serviceUnionQuery, 'construct')).to.throw('SPARQL construct queries must not contain a SERVICE clause') + + }) + it('should throw for minus with group by', () => { + const minusGroupByQuery = `PREFIX ex: + + CONSTRUCT { + ?cityType ex:averagePopulation ?averagePopulation. + } + WHERE { + { + SELECT ?cityType (AVG(?population) as ?averagePopulation) + WHERE { + ?city ex:hasType ?cityType. + ?city ex:hasPopulation ?population. + } + GROUP BY ?cityType + } + MINUS + { + SELECT ?cityType + WHERE { + ?city ex:hasType ?cityType. + FILTER NOT EXISTS { + ?city ex:hasPopulation ?population. + } + } + } + } + ` + expect(() => getSPARQLQuery(minusGroupByQuery, 'construct')).to.throw('SPARQL construct queries must not contain a MINUS clause') + + }) + it('should throw for values', () => { + const valuesGraphQuery = `PREFIX ex: + + CONSTRUCT { + ?city ex:hasPopulation ?population. + } + WHERE { + GRAPH ?graph { + VALUES (?city ?population) { + (ex:City1 10000) + (ex:City2 15000) + (ex:City3 20000) + } + + ?city ex:hasPopulation ?population. + } + } + ` + expect(() => getSPARQLQuery(valuesGraphQuery, 'construct')).to.throw('SPARQL construct queries must not contain a VALUES clause') + }) + }) + }) + describe('getSPARQLQueryString', () => { + it('should return query string for correct SELECT/CONSTRUCT SPARQL query', () => { + const selectQuery = `PREFIX ex: + + SELECT ?city ?population + WHERE { + ?city ex:hasPopulation ?population. + } + ` + const constructQuery = `PREFIX ex: + + CONSTRUCT { + ?city ex:hasPopulation ?population. + } + WHERE { + ?city ex:hasPopulation ?population. + } + ` + + const expectedSelectQuery = `PREFIX ex: +SELECT ?city ?population WHERE { ?city ex:hasPopulation ?population. }` + const expectedConstructQuery = `PREFIX ex: +CONSTRUCT { ?city ex:hasPopulation ?population. } +WHERE { ?city ex:hasPopulation ?population. }` + + const selectStr = getSPARQLQueryString(getSPARQLQuery(selectQuery, 'select')) + const constructStr = getSPARQLQueryString(getSPARQLQuery(constructQuery, 'construct')) + + expect(selectStr).to.equal(expectedSelectQuery) + expect(constructStr).to.equal(expectedConstructQuery) + }) + describe('should throw error for incorrect SPARQL queries', () => { + it('should throw for incorrect syntax SPARQL query', () => { + const incorrectQuery =`SELECT ?subject ?predicate ?object + WHERE { + ?subject ?predicate ?object + FILTER (?object > 100) + ` + expect(() => getSPARQLQueryString(getSPARQLQuery(incorrectQuery, 'select'))).to.throw() + }) + it('should throw for empty string', () => { + expect(() => getSPARQLQueryString(getSPARQLQuery('', 'select'))).to.throw('Unexpected querytype undefined') + + }) + it('should throw for UPDATE SPARQL query', () => { + const updateQuery = `PREFIX ex: + + DELETE { + ex:City1 ex:hasPopulation ?newPopulation. + } + WHERE { + ex:City1 ex:hasPopulation ?oldPopulation. + FILTER (?oldPopulation = "some_old_value") + } + ` + expect(() => getSPARQLQueryString(getSPARQLQuery(updateQuery, 'select'))).to.throw('Unexpected querytype update') + + }) + it('should throw for ASK SPARQL query', () => { + const askQuery = `PREFIX ex: + + ASK + WHERE { + ex:City1 ex:hasPopulation ?population. + FILTER (?population > 1000000) + } + ` + expect(() => getSPARQLQueryString(getSPARQLQuery(askQuery, 'select'))).to.throw('Unexpected querytype ASK') + }) + it('should throw for DESCRIBE SPARQL query', () => { + const describeQuery = `PREFIX ex: + + DESCRIBE ex:City1 + ` + expect(() => getSPARQLQueryString(getSPARQLQuery(describeQuery, 'select'))).to.throw('Unexpected querytype DESCRIBE') + }) + }) + }) })