Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding tests for existing work, docker/make spinup, CI #14

Merged
merged 1 commit into from
Sep 18, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.git
result.log
xspec
34 changes: 34 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
---
services:
- docker

install:
- make up
- docker-compose run --user='root' --entrypoint chown xspec -R xspec:xspec /transforms
- docker-compose run --user='root' --entrypoint chown xspec -R xspec:xspec /validations

script:
- make test-travis

after_success:
- make down

before_deploy:
- true

# deploy:
# - provider: script
# script: bash .travis/deploy-dev.sh
# on:
# branch: dev
# - provider: script
# script: bash .travis/deploy-prod.sh
# on:
# branch:
# - master

notifications:
slack:
rooms:
- secure: "AFcpS9RNjQPEciTsUTPA6NBNxI+5wD58Lwa14Zeg75fRJipcvkwSLpBq9DlnQQeOJx8vwEYf1H7Gu4BHHX9ZofBw08WggqOdSe/K7mSq7Bx7DHfgJA0K+nzSfbXCYA8KJ/SbaeLD4TNLSs8s04vGhXRjzDTQPJ/IxufPVj3VDRjq3OmFIWi4bOHOCWML1jJ2C1a7dKlYxRKe2KDOxm/kVMi5QUCovVxndiDeOlxlhQ9o7etZb7kA/uP4Yhi7E8xMbfWuzba/nXnNAHb3pnV+RRsu9YkT7tVzk8kZ5Wfm+cgKRsnE3Q5mEt+UnZcLbDcByNzPXF5aoB8YvQumsOeq5+ZTfrozpq0H/ivb+HwDZ3ScSxJKxAOqGGj+5GFqEX9YaTrY3ok3mjbaW8G/zpKXXZgPR8PjaDTD1Qkbi0dSUbNpMuugi1IcP/mBG5O190a0NFs95WLoB8CSTfKRBReqU1bUSttnLexjRgE3yZWk6Dh3L6AcQtPLcDLQ4ec33vMjYG/Tvtv4c5uMVgFLX6qjqGmaAWo4DGU5cWyQyNBSlZjWdwgdnMxDGF8bmKx4VMyygjN07ybl9ERlmxjYx0NYCe6zHhelkwlDal1ZPKzMAC2z4x5JJaw6ggODUFnv6vQpsewU24SrQAxFUOWYqV3hEE2gyhF6XU1DvytxSYW4qfg="
- secure: "ktf9IVP4NaM8/r4QEf0x+mK+QhAdaDiQxA8WoIsbmKiR/DbN9vfT76Ax14r4MvAnLWc/XLNYHT873doQ1Y7oFBEB3SwUzDnSIHT/ndHD+qDIlnxTmS74hjSBRWlc5lzRFDOmxA6wui3yOeQ4SHQ2xlz103e+f7zVvdjmaS98tm76WrfNbF4Hl+JBpYvCURYp4TCsZIZK9GJWXLUymBkkFn5dl6HiHQVoubLG/m9UcxQ0+r2ICQNLCygpftLdBR03O/oFaq9i5HcgiWiGl2TfBw4SSjIx+uhNRWYq/Okor+zuDklfIVk25iyJpgA1UfbX5gakFJqqH0QG7eYiLhIkr6DnTPmapBtG3wKj3us5xMCkaWasbTKZ3SM6gAvdjlkXCD54XlDkfZsKvG3iJOwxvCCgAKkolz6QF4/isPxujkTFlrgtLkHx2w+VShkuEKtYxPjvEPEQ3DWTfi5h7lIUMl/DfieJZoQBFRD6+ApYNnauPsTB96+9q1qKH0DXfVjtwTmVA3OsZVDxAiZDGI0WIn/jHgsk/Ixkz00C6ts8qdlEbrfLWBnIsu2vvoC7YEowBQ1T76WHx6buj+4DbfY/rdSuhYw3K94Rlb8yh2/obC/nRK6lR/gQqCU2w3UHqKj5la+K7Qoixa9RhP/fIvhh419h8w84fZl6poCQH204nQI="
5 changes: 5 additions & 0 deletions .travis/deploy-dev.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
set -e

/usr/local/bin/docker-compose exec combine-django python /opt/combine/manage.py validation 'test' 'sch' True 'https://raw.githubusercontent.com/tulibraries/aggregator_mdx/master/validations/padigital_reqd_fields.sch'
/usr/local/bin/docker-compose exec combine-django python /opt/combine/manage.py transformation 'test' 'xslt' 'https://raw.githubusercontent.com/tulibraries/aggregator_mdx/fix-dupes/transforms/temple_p16002coll25.xsl'
8 changes: 8 additions & 0 deletions .travis/deploy-prod.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
set -e

ssh -J $DEPLOY_USER@$TRAEFIK_IP -p $SSH_PORT $DEPLOY_USER@$COMBINE_PROD_IP -p $SSH_PORT

for xsl in $(find transforms -type f -name '*.xsl'); do \
/usr/local/bin/docker-compose exec -u combine combine-django python /opt/combine/manage.py validation 'test' 'sch' True 'https://raw.githubusercontent.com/tulibraries/aggregator_mdx/master/validations/padigital_reqd_fields.sch'
/usr/local/bin/docker-compose exec -u combine combine-django python /opt/combine/manage.py transformation 'test' 'xslt' 'https://raw.githubusercontent.com/tulibraries/aggregator_mdx/fix-dupes/transforms/temple_p16002coll25.xsl'
28 changes: 28 additions & 0 deletions .travis/run-tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash
set -e

for xspectest in $(find transforms -type f -name '*.xspec'); do \
docker-compose run xspec "/$xspectest" &> .travis/result.log;
if grep -q ".*failed:\s[1-9]" .travis/result.log || grep -q -E "\*+\sError\s(running|compiling)\sthe\stest\ssuite" .travis/result.log;
then
echo "FAILED: $xspectest";
echo "---------- result.log";
cat .travis/result.log;
echo "----------";
exit 1;
else echo "OK: $xspectest";
fi
done

for xspectest in $(find validations -type f -name '*.xspec'); do \
docker-compose run xspec -s "/$xspectest" &> .travis/result.log;
if grep -q ".*failed:\s[1-9]" .travis/result.log || grep -q -E "\*+\sError\s(running|compiling)\sthe\stest\ssuite" .travis/result.log;
then
echo "FAILED: $xspectest";
echo "---------- result.log";
cat .travis/result.log;
echo "----------";
exit 1;
else echo "OK: $xspectest";
fi
done
33 changes: 33 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
FROM openjdk:8-jre

ENV XSPEC_VERSION=1.3.0
ENV SAXON_VERSION=9.9.1-4

# install XSpec
RUN curl -fSL -o xspec-${XSPEC_VERSION}.tar.gz https://github.com/xspec/xspec/archive/v${XSPEC_VERSION}.tar.gz && \
tar xvzf xspec-${XSPEC_VERSION}.tar.gz && \
mv /xspec-${XSPEC_VERSION} /xspec && \
rm xspec-${XSPEC_VERSION}.tar.gz
ENV XSPEC_HOME /xspec
RUN mkdir -p /transforms

WORKDIR /xspec

# install Saxon HE
ENV SAXON_DOWNLOAD_SHA1 96574cbdfeea782b252a071d0c33295f4ff543ca
RUN mkdir -p saxon && \
export SAXON_CP=/xspec/saxon/saxon9he.jar && \
curl -fSL -o ${SAXON_CP} http://central.maven.org/maven2/net/sf/saxon/Saxon-HE/${SAXON_VERSION}/Saxon-HE-${SAXON_VERSION}.jar && \
echo ${SAXON_DOWNLOAD_SHA1} ${SAXON_CP} | sha1sum -c - && \
chmod +x ${SAXON_CP}
ENV SAXON_CP /xspec/saxon/saxon9he.jar

# use non-privileged user to run xspec
RUN groupadd -r xspec && \
useradd -s /bin/bash -r -g xspec xspec && \
chown xspec:xspec -R /xspec && \
chown xspec:xspec -R /transforms
USER xspec

ENTRYPOINT ["/xspec/bin/xspec.sh"]
CMD ["-h"]
26 changes: 26 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
up: down
@echo "Building xspec containers, networks, volumes"
docker-compose pull
docker-compose up --build -d

test: up
@echo "Testing transforms/*.xspec with Docker"
for xspectest in $(shell find transforms -type f -name '*.xspec'); do \
docker-compose run xspec "/$$xspectest" ; \
done
@echo "Testing validations/*.xspec with Docker"
for xspectest in $(shell find validations -type f -name '*.xspec'); do \
docker-compose run xspec -s "/$$xspectest" ; \
done

test-travis:
@echo "Travis testing *.xspec with Docker & shell scripts"
bash .travis/run-tests.sh

stop:
@echo "Stopping xspec containers, networks, volumes"
docker-compose stop

down: stop
@echo "Killing xspec containers, networks, volumes"
docker-compose rm -fv
51 changes: 51 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1 +1,52 @@
# aggregator_mdx
This repository contains metadata transformations, validations, Elasticsearch mappers, and sample data (fixtures) used by the PA Digital aggregator setup. The validations and transformations are Schematron and XSL; while the mappers are for use in the Elasticsearch instance behind Combine. All pieces here are used in the PA Digital Combine, albeit the validations and transformations are created to be platform-independent (within realm of XML technologies).

### Transforms

The XSLT (all version 2) is written in 3 levels:

- collection-specific XSLT (e.g. `transforms/temple_p16002coll25.xsl`): these are the files you actually run against the desired XML files. This imports the provider-specific XSLT (next).
- provider-specific XSLT (e.g. `transforms/temple.xsl`): this file is imported above and contains XML node templates that are reused across collections. If/when collections are consistent enough to use the same XSLT, this can be made into that (for example, `dplah.xsl` could easily be a single collection & provider-level transform, though it currently imports shared templates from `temple.xsl` for sake of reuse). These imports the remediation XSLT (next).
- remediation-specific XSLT (`tranforms/remediations/*.xsl`): these files have normalization & enhancements.
- `remediations/dedupe.xsl` performs a deduplication of elements and values within a record.
- `remediations/filter.xsl` is a list of identifiers that cause a record to be filtered out from the transform output.
- `remediations/lookups.xsl` has lookup parameters used by the above templates to normalize string values against a variety of vocabularies, including DCMI Types, the DPLA-recommended Getty AAT subset, Lexvo Language look-ups, month abbreviations, etc. An example of using these lookup params is commented out in `temple.xsl` (see the template for `dc:language`).

To run one of the collection-specific XSLT, you need to have all 3 files in the same directory structure.

#### Using XSLT in Combine
Combine has specific hang-ups when using XSLT that imports other XSLT documents. See: https://combine.readthedocs.io/en/master/configuration.html?highlight=xslt#local-includes

Basically, to load these XSLT documents into Combine, each document with an `xsl:include` requires a change in the file to either import a HTTP URL of the included files (like a raw document GitHub URL) or be changed in Combine to reference the auto-generated filepath for those included within Combine.

### Validations

To be written up.

### ES Mappers

To be written up.

## Local Development & Testing

Unit tests for the XSLT are written using [XSpec](https://github.com/xspec/xspec). This testing library requires both the Saxon processor & Java to be installed. See the [XSpec installation instructions](https://github.com/xspec/xspec/wiki/Installation-on-Mac-and-Linux#requirements) for more if you want to run locally; otherwise, follow these Docker local development and testing setup instructions:

```
$ git clone https://github.com/tulibraries/aggregator_mdx.git
$ cd aggregator_mdx
$ make test
[build output]
[test output]
```

These commands run any `*.xspec` files found within the `transforms` or `validations` directories.

To add local tests, current practice is to create a similarly-named xspec file in the same directory as the file you are testing, then writing a test to confirm each case per validation rule, XSL matched template, or XSL named template. See the tutorial folder in [XSpec](https://github.com/xspec/xspec) for more help. Coverage is not currently calculated, but will be added in a future devOps rotation.

## Deployment to Combine

CI/CD for this repository is handled by Travis.

For CI, travis runs `make up` (building the same test infrastructure used above locally), then `make test-travis`. This is a modified version of `make tests`, so that failed tests actually return a non-0 code (and thus make the Travis build fail). You can see these commands in the `.travis.yml` file.

For CD, travis is planned to run scripts that will connect over SSH to the deployed infrastructure running Combine Docker, then run a load (create or update) command for every `*.xsl` file found within the `transforms` directory and every `*.sch` file found within the `validations` directory. This is waiting for code and infrastructure work on the Combine Docker projects.
10 changes: 10 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
version: '3'

services:
xspec:
build:
context: .
dockerfile: Dockerfile
volumes:
- ./transforms:/transforms
- ./validations:/validations
50 changes: 26 additions & 24 deletions validations/padigital_reqd_fields.sch
Original file line number Diff line number Diff line change
@@ -1,47 +1,49 @@
<?xml version="1.0" encoding="UTF-8"?>
<schema xmlns="http://purl.oclc.org/dsdl/schematron"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:edm="http://www.europeana.eu/schemas/edm/">
xmlns:edm="http://www.europeana.eu/schemas/edm/"
xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/">
<ns prefix="dcterms" uri="http://purl.org/dc/terms/"/>
<ns prefix="edm" uri="http://www.europeana.eu/schemas/edm/"/>
<ns prefix="oai_dc" uri="http://www.openarchives.org/OAI/2.0/oai_dc/"/>
<!-- Required Fields -->
<pattern>
<pattern id="RequiredElementsPattern">
<title>Required PA Digital Elements</title>
<rule context="record">
<assert test="dcterms:title">There must be a title</assert>
<assert test="dcterms:rights or edm:rights">There must be a rights statement</assert>
<assert test="edm:isShownAt">There must be a trackback URL</assert>
<assert test="edm:preview">There must be a thumbnail URL</assert>
<rule context="oai_dc:dc">
<assert test="dcterms:title" id="Required1" role="error">There must be a title</assert>
<assert test="dcterms:rights or edm:rights" id="Required2" role="error">There must be a rights statement</assert>
<assert test="edm:isShownAt" id="Required3" role="error">There must be a trackback URL</assert>
<assert test="edm:preview" id="Required4" role="error">There must be a thumbnail URL</assert>
</rule>
</pattern>
<pattern>
<pattern id="TitleElementPattern">
<title>Additional Title Requirements</title>
<rule context="record/dcterms:title">
<assert test="normalize-space(.)">The title element must contain text</assert>
<rule context="oai_dc:dc/dcterms:title">
<assert test="normalize-space(.)" id="Title1" role="error">The title element must contain text</assert>
</rule>
</pattern>
<pattern>
</pattern>
<pattern id="ItemURLElementPattern">
<title>Additional Trackback URL Requirements</title>
<rule context="record/edm:isShownAt">
<assert test="normalize-space(.)">The trackback URL must contain text</assert>
<rule context="oai_dc:dc/edm:isShownAt">
<assert test="normalize-space(.)" id="ItemURL1" role="error">The trackback URL must contain text</assert>
</rule>
</pattern>
<pattern>
</pattern>
<pattern id="ThumbnailURLElementPattern">
<title>Additional Thumbnail URL Requirements</title>
<rule context="record/edm:preview">
<assert test="normalize-space(.)">The thumbnail URL must contain text</assert>
<rule context="oai_dc:dc/edm:preview">
<assert test="normalize-space(.)" id="ThumbnailURL1" role="error">The thumbnail URL must contain text</assert>
</rule>
</pattern>
<pattern>
<pattern id="DCTRightsElementPattern">
<title>Additional Rights Requirements</title>
<rule context="record/dcterms:rights">
<assert test="normalize-space(.)">dcterms:rights must contain text</assert>
<rule context="oai_dc:dc/dcterms:rights">
<assert test="normalize-space(.)" id="DCTRights1" role="error">dcterms:rights must contain text</assert>
</rule>
</pattern>
<pattern>
<pattern id="EDMRightsElementPattern">
<title>Additional Rights Requirements</title>
<rule context="record/edm:rights">
<assert test="normalize-space(.)">edm:rights must contain text</assert>
<rule context="oai_dc:dc/edm:rights">
<assert test="normalize-space(.)" id="EDMRights1" role="error">edm:rights must contain text</assert>
</rule>
</pattern>
</schema>
Loading