diff --git a/.github/workflows/publish-to-pypi.yaml b/.github/workflows/publish-to-pypi.yaml new file mode 100644 index 00000000..6c579ad0 --- /dev/null +++ b/.github/workflows/publish-to-pypi.yaml @@ -0,0 +1,29 @@ +name: Publish +on: + release: + types: + - published +jobs: + PyPI: + name: publish release to PyPI + runs-on: ubuntu-latest + environment: release + permissions: + id-token: write + env: + SDIST: python-icat-${{ github.event.release.tag_name }}.tar.gz + steps: + - name: Fetch assets + uses: cb80/dlassets@latest + with: + tag: ${{ github.event.release.tag_name }} + to: assets + - name: Check assets + run: | + ls -la assets + - name: Copy distfile to dist directory + run: | + mkdir -p dist + cp -p assets/$SDIST dist + - name: Upload distfile to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/rst-lint.yaml b/.github/workflows/rst-lint.yaml new file mode 100644 index 00000000..187ce87c --- /dev/null +++ b/.github/workflows/rst-lint.yaml @@ -0,0 +1,30 @@ +name: Check ReST input files +on: + push: + branches: + - develop + - master + pull_request: +jobs: + doc8: + runs-on: ubuntu-latest + steps: + - name: Check out repository code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Set up Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: 3.11 + - name: Install dependencies + run: | + pip install setuptools packaging git-props suds + - name: Run conf.py + run: | + python setup.py build + python doc/src/conf.py + - name: doc8-check + uses: deep-entertainment/doc8-action@v4 + with: + scanPaths: "doc/src" diff --git a/.gitignore b/.gitignore index 2e60c739..67904d8e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,12 @@ __pycache__/ +/.local/ /MANIFEST /_meta.py /build/ /dist/ -/icat/__init__.py /tests/data/example_data.yaml /tests/data/icat.cfg +/tests/data/icatdata-*.xsd /tests/data/icatdump-*.xml /tests/data/icatdump-*.yaml /tests/data/ingest-*.xml @@ -13,4 +14,5 @@ __pycache__/ /tests/data/ingest.xslt /tests/data/metadata-*-inl.xml /tests/data/metadata-*-sep.xml +/tests/data/metadata-sample.xml /tests/scripts/ diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 8a892916..409b807b 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -11,7 +11,7 @@ build: post_checkout: - git fetch --unshallow post_install: - - python setup.py meta + - python setup.py build sphinx: configuration: doc/src/conf.py diff --git a/.rtd-require b/.rtd-require index 2de815cd..742c7f5b 100644 --- a/.rtd-require +++ b/.rtd-require @@ -1,9 +1,8 @@ PyYAML +git-props lxml packaging setuptools -setuptools_scm suds -jinja2<3.1 -sphinx>=2,<3 -sphinx-rtd-theme>=0.5,<1 +sphinx-copybutton +sphinx_rtd_theme diff --git a/CHANGES.rst b/CHANGES.rst index 7a21a6cd..2941612e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,55 @@ Changelog ========= +.. _changes-1_3_0: + +1.3.0 (2024-03-21) +~~~~~~~~~~~~~~~~~~ + +New features +------------ + ++ `#143`_, `#144`_: Make it easier to configure XSLT files to use for + processing the input in custom versions of + :class:`icat.ingest.IngestReader`. + ++ `#148`_, `#149`_: Inject an additional element with environment + information into the input data in :class:`icat.ingest.IngestReader`. + ++ `#146`_, `#147`_, `#151`_: Better error handling in + :class:`icat.ingest.IngestReader`. + +Incompatible changes +-------------------- + ++ `#144`_: Drop class attribute + :attr:`icat.ingest.IngestReader.XSLT_name` in favour of + :attr:`icat.ingest.IngestReader.XSLT_Map`. + + Note that :mod:`icat.ingest` has been declared experimental for now. + +Bug fixes and minor changes +--------------------------- + ++ `#141`_, `#142`_, `#150`_: Review documentation. + ++ `#145`_: Review build tool chain. + +.. _#141: https://github.com/icatproject/python-icat/issues/141 +.. _#142: https://github.com/icatproject/python-icat/pull/142 +.. _#143: https://github.com/icatproject/python-icat/issues/143 +.. _#144: https://github.com/icatproject/python-icat/pull/144 +.. _#145: https://github.com/icatproject/python-icat/pull/145 +.. _#146: https://github.com/icatproject/python-icat/issues/146 +.. _#147: https://github.com/icatproject/python-icat/pull/147 +.. _#148: https://github.com/icatproject/python-icat/issues/148 +.. _#149: https://github.com/icatproject/python-icat/pull/149 +.. _#150: https://github.com/icatproject/python-icat/pull/150 +.. _#151: https://github.com/icatproject/python-icat/pull/151 + + +.. _changes-1_2_0: + 1.2.0 (2023-10-31) ~~~~~~~~~~~~~~~~~~ @@ -50,6 +99,8 @@ Bug fixes and minor changes .. _#140: https://github.com/icatproject/python-icat/pull/140 +.. _changes-1_1_0: + 1.1.0 (2023-06-30) ~~~~~~~~~~~~~~~~~~ @@ -105,6 +156,8 @@ Bug fixes and minor changes .. _#129: https://github.com/icatproject/python-icat/pull/129 +.. _changes-1_0_0: + 1.0.0 (2022-12-21) ~~~~~~~~~~~~~~~~~~ @@ -197,6 +250,8 @@ Bug fixes and minor changes .. _#106: https://github.com/icatproject/python-icat/pull/106 +.. _changes-0_21_0: + 0.21.0 (2022-01-28) ~~~~~~~~~~~~~~~~~~~ @@ -215,6 +270,8 @@ New features .. _#100: https://github.com/icatproject/python-icat/pull/100 +.. _changes-0_20_1: + 0.20.1 (2021-11-04) ~~~~~~~~~~~~~~~~~~~ @@ -226,6 +283,8 @@ Bug fixes and minor changes .. _#96: https://github.com/icatproject/python-icat/pull/96 +.. _changes-0_20_0: + 0.20.0 (2021-10-29) ~~~~~~~~~~~~~~~~~~~ @@ -262,6 +321,8 @@ Bug fixes and minor changes .. _#95: https://github.com/icatproject/python-icat/pull/95 +.. _changes-0_19_0: + 0.19.0 (2021-07-20) ~~~~~~~~~~~~~~~~~~~ @@ -290,6 +351,8 @@ Bug fixes and minor changes .. _#85: https://github.com/icatproject/python-icat/pull/85 +.. _changes-0_18_1: + 0.18.1 (2021-04-13) ~~~~~~~~~~~~~~~~~~~ @@ -307,6 +370,8 @@ Bug fixes and minor changes .. _#82: https://github.com/icatproject/python-icat/pull/82 +.. _changes-0_18_0: + 0.18.0 (2021-03-29) ~~~~~~~~~~~~~~~~~~~ @@ -343,6 +408,8 @@ Bug fixes and minor changes .. _#80: https://github.com/icatproject/python-icat/pull/80 +.. _changes-0_17_0: + 0.17.0 (2020-04-30) ~~~~~~~~~~~~~~~~~~~ @@ -434,6 +501,8 @@ Misc .. _#72: https://github.com/icatproject/python-icat/issues/72 +.. _changes-0_16_0: + 0.16.0 (2019-09-26) ~~~~~~~~~~~~~~~~~~~ @@ -458,6 +527,8 @@ Bug fixes and minor changes .. _#60: https://github.com/icatproject/python-icat/pull/60 +.. _changes-0_15_1: + 0.15.1 (2019-07-12) ~~~~~~~~~~~~~~~~~~~ @@ -479,6 +550,8 @@ Bug fixes and minor changes .. _#57: https://github.com/icatproject/python-icat/issues/57 +.. _changes-0_15_0: + 0.15.0 (2019-03-27) ~~~~~~~~~~~~~~~~~~~ @@ -517,6 +590,8 @@ Bug fixes and minor changes .. _#54: https://github.com/icatproject/python-icat/issues/54 +.. _changes-0_14_2: + 0.14.2 (2018-10-25) ~~~~~~~~~~~~~~~~~~~ @@ -529,6 +604,8 @@ Bug fixes and minor changes probably not need it. +.. _changes-0_14_1: + 0.14.1 (2018-06-05) ~~~~~~~~~~~~~~~~~~~ @@ -539,6 +616,8 @@ Bug fixes and minor changes for the Write API call. +.. _changes-0_14_0: + 0.14.0 (2018-06-01) ~~~~~~~~~~~~~~~~~~~ @@ -594,6 +673,8 @@ Bug fixes and minor changes .. _#48: https://github.com/icatproject/python-icat/issues/48 +.. _changes-0_13_1: + 0.13.1 (2017-07-12) ~~~~~~~~~~~~~~~~~~~ @@ -606,6 +687,8 @@ Bug fixes and minor changes .. _#38: https://github.com/icatproject/python-icat/issues/38 +.. _changes-0_13_0: + 0.13.0 (2017-06-09) ~~~~~~~~~~~~~~~~~~~ @@ -764,6 +847,8 @@ Bug fixes and minor changes .. _pytest-dependency: https://pypi.python.org/pypi/pytest_dependency/ +.. _changes-0_12_0: + 0.12.0 (2016-10-10) ~~~~~~~~~~~~~~~~~~~ @@ -803,6 +888,8 @@ Bug fixes and minor changes .. _#28: https://github.com/icatproject/python-icat/issues/28 +.. _changes-0_11_0: + 0.11.0 (2016-06-01) ~~~~~~~~~~~~~~~~~~~ @@ -862,6 +949,8 @@ Misc .. _distutils_pytest: https://github.com/RKrahl/distutils-pytest +.. _changes-0_10_0: + 0.10.0 (2015-12-06) ~~~~~~~~~~~~~~~~~~~ @@ -930,6 +1019,8 @@ Bug fixes and minor changes .. _#15: https://github.com/icatproject/python-icat/issues/15 +.. _changes-0_9_0: + 0.9.0 (2015-08-13) ~~~~~~~~~~~~~~~~~~ @@ -1033,6 +1124,8 @@ Bug fixes and minor changes .. _#10: https://github.com/icatproject/python-icat/issues/10 +.. _changes-0_8_0: + 0.8.0 (2015-05-08) ~~~~~~~~~~~~~~~~~~ @@ -1122,6 +1215,8 @@ Bug fixes and minor changes :meth:`icat.query.Query.__repr__`. +.. _changes-0_7_0: + 0.7.0 (2015-02-11) ~~~~~~~~~~~~~~~~~~ @@ -1153,6 +1248,8 @@ New features :meth:`icat.ids.IDSClient.getLink` method. +.. _changes-0_6_0: + 0.6.0 (2014-12-15) ~~~~~~~~~~~~~~~~~~ @@ -1280,6 +1377,8 @@ Minor changes and fixes + Add comparison operators to class :class:`icat.listproxy.ListProxy`. +.. _changes-0_5_1: + 0.5.1 (2014-07-07) ~~~~~~~~~~~~~~~~~~ @@ -1323,6 +1422,8 @@ Minor changes and fixes modifications, such as running 2to3 on them. +.. _changes-0_5_0: + 0.5.0 (2014-06-24) ~~~~~~~~~~~~~~~~~~ @@ -1365,6 +1466,8 @@ Minor changes and fixes .. __: https://github.com/icatproject/icat.server/issues/112 +.. _changes-0_4_0: + 0.4.0 (2014-02-11) ~~~~~~~~~~~~~~~~~~ @@ -1412,6 +1515,8 @@ Minor changes and fixes :ref:`icatrestore `. +.. _changes-0_3_0: + 0.3.0 (2014-01-10) ~~~~~~~~~~~~~~~~~~ @@ -1458,6 +1563,8 @@ Minor changes and fixes + Add example scripts :ref:`icatdump` and :ref:`icatrestore `. +.. _changes-0_2_0: + 0.2.0 (2013-11-18) ~~~~~~~~~~~~~~~~~~ @@ -1498,6 +1605,8 @@ Minor changes and fixes import :mod:`icat` and :mod:`icat.config`. +.. _changes-0_1_0: + 0.1.0 (2013-11-01) ~~~~~~~~~~~~~~~~~~ diff --git a/MANIFEST.in b/MANIFEST.in index fe080b6d..1e4d72c2 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -10,15 +10,19 @@ include doc/examples/icatdump-*.xml include doc/examples/icatdump-*.yaml include doc/examples/ingest-*.xml include doc/examples/metadata-*.xml +include doc/examples/metadata.xml include doc/icatdata*.xsd include doc/man/* include doc/tutorial/*.py include etc/ingest-*.xsd include etc/ingest.xslt include tests/conftest.py +include tests/data/ingest-env.xslt include tests/data/legacy-icatdump-*.xml include tests/data/legacy-icatdump-*.yaml -include tests/data/metadata-5.0-badref.xml +include tests/data/metadata-*.xml +include tests/data/myingest.xsd +include tests/data/myingest.xslt include tests/data/ref-icatdump-*.xml include tests/data/ref-icatdump-*.yaml include tests/data/summary* diff --git a/Makefile b/Makefile index 41ac4b47..bdce49b9 100644 --- a/Makefile +++ b/Makefile @@ -10,24 +10,25 @@ test: sdist: doc-man $(PYTHON) setup.py sdist -doc-html: meta - $(MAKE) -C doc html PYTHONPATH=$(CURDIR) +doc-html: build + $(MAKE) -C doc html -doc-man: meta - $(MAKE) -C doc man PYTHONPATH=$(CURDIR) +doc-man: build + $(MAKE) -C doc man clean: rm -rf build - rm -rf __pycache__ icat/__pycache__ + rm -rf __pycache__ rm -rf tests/data/example_data.yaml + rm -rf tests/data/icatdata-*.xsd rm -rf tests/data/icatdump-* tests/data/ingest-*.xml rm -rf tests/data/ingest-*.xsd tests/data/ingest.xslt rm -rf tests/data/metadata-*-inl.xml tests/data/metadata-*-sep.xml + rm -rf tests/data/metadata-sample.xml rm -rf tests/scripts distclean: clean rm -f MANIFEST _meta.py - rm -f icat/__init__.py rm -rf dist rm -rf tests/.pytest_cache $(MAKE) -C doc distclean diff --git a/README.rst b/README.rst index 1d1b19ca..54cacb19 100644 --- a/README.rst +++ b/README.rst @@ -21,10 +21,10 @@ is based on Suds and extends it with ICAT specific features. Download -------- -The latest release version can be found in the -`Python Package Index (PyPI)`__. +The latest release version can be found at the +`release page on GitHub`__. -.. __: `PyPI site`_ +.. __: `GitHub release`_ Documentation @@ -50,7 +50,7 @@ the reason why the example scripts require PyYAML. Copyright and License --------------------- -Copyright 2013–2023 +Copyright 2013–2024 Helmholtz-Zentrum Berlin für Materialien und Energie GmbH Licensed under the `Apache License`_, Version 2.0 (the "License"); you @@ -64,6 +64,6 @@ permissions and limitations under the License. .. _ICAT: https://icatproject.org/ -.. _PyPI site: https://pypi.org/project/python-icat/ +.. _GitHub release: https://github.com/icatproject/python-icat/releases/latest .. _Read the Docs site: https://python-icat.readthedocs.io/ .. _Apache License: https://www.apache.org/licenses/LICENSE-2.0 diff --git a/doc/.gitignore b/doc/.gitignore index e938dd2d..b6a292cd 100644 --- a/doc/.gitignore +++ b/doc/.gitignore @@ -1,3 +1,4 @@ +/src/_meta.rst /devhelp/ /dirhtml/ /doctest/ diff --git a/doc/Makefile b/doc/Makefile index 9cc7cebc..7358c71a 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -20,6 +20,7 @@ $(BUILDERS): $(STATIC_SOURCEDIRS) distclean: rm -rf doctrees $(BUILDERS) + rm -f src/_meta.rst $(STATIC_SOURCEDIRS): mkdir $@ diff --git a/doc/examples/add-investigation-data.py b/doc/examples/add-investigation-data.py old mode 100755 new mode 100644 diff --git a/doc/examples/add-job.py b/doc/examples/add-job.py old mode 100755 new mode 100644 diff --git a/doc/examples/addfile.py b/doc/examples/addfile.py old mode 100755 new mode 100644 diff --git a/doc/examples/create-datafile.py b/doc/examples/create-datafile.py old mode 100755 new mode 100644 diff --git a/doc/examples/create-investigation.py b/doc/examples/create-investigation.py old mode 100755 new mode 100644 diff --git a/doc/examples/create-parametertypes.py b/doc/examples/create-parametertypes.py old mode 100755 new mode 100644 diff --git a/doc/examples/create-sampletype.py b/doc/examples/create-sampletype.py old mode 100755 new mode 100644 diff --git a/doc/examples/downloaddata.py b/doc/examples/downloaddata.py old mode 100755 new mode 100644 diff --git a/doc/examples/dumpinvestigation.py b/doc/examples/dumpinvestigation.py old mode 100755 new mode 100644 diff --git a/doc/examples/dumprules.py b/doc/examples/dumprules.py old mode 100755 new mode 100644 diff --git a/doc/examples/getversion.py b/doc/examples/getversion.py old mode 100755 new mode 100644 diff --git a/doc/examples/icatdump-simple.xml b/doc/examples/icatdump-simple.xml new file mode 100644 index 00000000..63dc689d --- /dev/null +++ b/doc/examples/icatdump-simple.xml @@ -0,0 +1,94 @@ + + + + 2024-01-03T13:21:15+00:00 + https://icat.example.com:8181/ICATService/ICAT?wsdl + 6.0.0 + icatdump (python-icat 1.2.0) + + + + Goethe University Frankfurt, Faculty of Philosophy and History + ahau@example.org + Hau + Arnold Hau + Arnold + db/ahau + 0000-0002-3263 + + + Université Paul-Valéry Montpellier 3 + jbotu@example.org + Botul + Jean-Baptiste Botul + Jean-Baptiste + db/jbotu + 0000-0002-3264 + + + jdoe@example.org + Doe + John Doe + John + db/jdoe + + + University of Nancago + nbour@example.org + Bourbaki + Nicolas Bourbaki + Nicolas + db/nbour + 0000-0002-3266 + + + investigation_10100601-ST_owner + + + + + + investigation_10100601-ST_reader + + + + + + + + + + + + investigation_10100601-ST_writer + + + + + + + + DOI:00.0815/inv-00601 + 2010-10-12T15:00:00+00:00 + 4 + 127125 + 10100601-ST + 2010-09-30T10:27:24+00:00 + Ni-Mn-Ga flat cone + 1.1-N + + + owner + + + + reader + + + + writer + + + + + diff --git a/doc/examples/icatdump-simple.yaml b/doc/examples/icatdump-simple.yaml new file mode 100644 index 00000000..26648f3b --- /dev/null +++ b/doc/examples/icatdump-simple.yaml @@ -0,0 +1,71 @@ +%YAML 1.1 +# Date: Wed, 03 Jan 2024 13:24:51 +0000 +# Service: https://icat.example.com:8181/ICATService/ICAT?wsdl +# ICAT-API: 6.0.0 +# Generator: icatdump (python-icat 1.2.0) +--- +grouping: + Grouping_name-investigation=5F10100601=2DST=5Fowner: + name: investigation_10100601-ST_owner + userGroups: + - user: User_name-db=2Fahau + Grouping_name-investigation=5F10100601=2DST=5Freader: + name: investigation_10100601-ST_reader + userGroups: + - user: User_name-db=2Fjbotu + - user: User_name-db=2Fjdoe + - user: User_name-db=2Fnbour + Grouping_name-investigation=5F10100601=2DST=5Fwriter: + name: investigation_10100601-ST_writer + userGroups: + - user: User_name-db=2Fahau +user: + User_name-db=2Fahau: + affiliation: Goethe University Frankfurt, Faculty of Philosophy and History + email: ahau@example.org + familyName: Hau + fullName: Arnold Hau + givenName: Arnold + name: db/ahau + orcidId: 0000-0002-3263 + User_name-db=2Fjbotu: + affiliation: "Universit\xE9 Paul-Val\xE9ry Montpellier 3" + email: jbotu@example.org + familyName: Botul + fullName: Jean-Baptiste Botul + givenName: Jean-Baptiste + name: db/jbotu + orcidId: 0000-0002-3264 + User_name-db=2Fjdoe: + email: jdoe@example.org + familyName: Doe + fullName: John Doe + givenName: John + name: db/jdoe + User_name-db=2Fnbour: + affiliation: University of Nancago + email: nbour@example.org + familyName: Bourbaki + fullName: Nicolas Bourbaki + givenName: Nicolas + name: db/nbour + orcidId: 0000-0002-3266 +--- +investigation: + Investigation_facility-(name-ESNF)_name-10100601=2DST_visitId-1=2E1=2DN: + doi: DOI:00.0815/inv-00601 + endDate: '2010-10-12T15:00:00+00:00' + facility: Facility_name-ESNF + fileCount: 4 + fileSize: 127125 + investigationGroups: + - grouping: Grouping_name-investigation=5F10100601=2DST=5Fowner + role: owner + - grouping: Grouping_name-investigation=5F10100601=2DST=5Freader + role: reader + - grouping: Grouping_name-investigation=5F10100601=2DST=5Fwriter + role: writer + name: 10100601-ST + startDate: '2010-09-30T10:27:24+00:00' + title: Ni-Mn-Ga flat cone + visitId: 1.1-N diff --git a/doc/examples/icatexport.py b/doc/examples/icatexport.py old mode 100755 new mode 100644 diff --git a/doc/examples/icatimport.py b/doc/examples/icatimport.py old mode 100755 new mode 100644 diff --git a/doc/examples/icatsummary.py b/doc/examples/icatsummary.py old mode 100755 new mode 100644 diff --git a/doc/examples/ingest.py b/doc/examples/ingest.py old mode 100755 new mode 100644 diff --git a/doc/examples/init-icat.py b/doc/examples/init-icat.py old mode 100755 new mode 100644 diff --git a/doc/examples/login.py b/doc/examples/login.py old mode 100755 new mode 100644 diff --git a/doc/examples/metadata.xml b/doc/examples/metadata.xml new file mode 100644 index 00000000..121b0432 --- /dev/null +++ b/doc/examples/metadata.xml @@ -0,0 +1,94 @@ + + + + 2024-02-02T12:52:00+01:00 + metadata-writer 0.28 + + + + e202553 + Dy01Cp02 at 2.7 K + 2020-09-30T18:02:17+02:00 + 2020-09-30T20:18:36+02:00 + + + + + + + + + + e202554 + Dy01Cp02 at 5.1 K + 2020-09-30T20:29:19+02:00 + 2020-09-30T21:23:49+02:00 + + + + + + + + + + e202555 + Dy01Cp02 at 2.7 K + 2020-09-30T21:35:16+02:00 + 2020-09-30T23:04:27+02:00 + + + + + + + + + + e202556 + reference + 2020-09-30T23:04:31+02:00 + 2020-10-01T01:26:07+02:00 + + + + + + + + + neutron + + + + + 5.3 + + + + + 2.74103 + 2.7408 + 2.7414 + + + + + neutron + + + + + 5.3 + + + + + 5.1239 + 5.1045 + 5.1823 + + + + + diff --git a/doc/src/83F336432C7FCC91.pub b/doc/src/83F336432C7FCC91.pub new file mode 100644 index 00000000..330f2f80 --- /dev/null +++ b/doc/src/83F336432C7FCC91.pub @@ -0,0 +1,44 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mQENBFE3WkEBCADM4jKAQMsVlnU5NxbJ5JmpqhPRj54eSkDcvIjPcEQLkMmQjCDT +HHwN5ZjzHNTj7nXkvmjjWMgyzjpNmdUAofsh6MBp1etXNzYNkoEs+urRlw1wuRaU +NMK4Pf0G35THrQ0nJdmmCGkzxiTgQTitLVA52zZclq3Vqo/ZsO26gkLB2ErhZJZE +2q+TL6BBr98m+1zXpG5kqF/IE4pF4Yl1Oysp8imAAbodr+6X1DGfOM2h1NwMSbAo +Uw49hR4PIwxKP5Sluv6GNUVgyPaOrk8LVE4c+H0lswmz6nZOlxhhbtplN0KViqki +6pqyrOuwv3ZgzUXO4bjEexScyWe2PxKUzjFFABEBAAG0K1JvbGYgS3JhaGwgPHJv +bGYua3JhaGxAaGVsbWhvbHR6LWJlcmxpbi5kZT6JATkEEwECACMFAlE3WkECGwMH +CwkIBwMCAQYVCAIJCgsEFgIDAQIeAQIXgAAKCRCD8zZDLH/Mkcj5CAC0x2GU88xD +eBR1MyGq9nUTDgjO/EkiztDZirBg1FLGwCVtXY3yZc0nSriEj4oF8lNiGU539rU1 +R+z76UCDTlq/xq2/a1BazStkHuv+OuUfoA/Hl5/Tvp+dwk7BXG6dlyr6joT3i9Pz +RgH/kFe1RAJNnT/oy5LTRsydcWb/mCey/O/ON47zlKzNbbGvL6YPwmsyaO22vUmO +JsH4JZM36BDu3Wt2LPB+A51ZanzlxkfA3Mcc0cIe9PsSqufvnV/kG4cQxJedgXes +lVniggXbtsudl8EqmUpq/yS+/X3BLBfidTA2Yicx6udmR5ZFQHoCrOlcTfylW0mz +x5rhClZPgrgaiEYEExECAAYFAlQVluoACgkQUcvGPyCdlGaaRgCg0s2cWgUXWeb7 +noexGZNxnmQIMrgAoJqBXBVVrWfd7bwdWT1IEnyGMiCeiEYEExECAAYFAlQVlyYA +CgkQO0qCjX1HQDs8HACfduvRjIu+wmrvyN+ikPXHN6ZJYOAAni4k+F5m7P9RkUK/ +MPW34JrqaIg8iQIcBBMBAgAGBQJYRG+HAAoJEAihJkF1ND5uePgP/3okgaIQOwcy +7lN2SiP1k/UxjmqynrdrsTWdGRm+wyJ9Er9WlHgMQavaxk2XOpTQ8DcAuczpNyOb +qaYI6l+xd8mDvdJ7lbYZboiZj62nb/yUwRAyN3TJ7PRjuWXqLZjVnywQzYN66Z2v +kuxewEqZUeLVlUcg7IEwwCOErAmHFfYmIER7Q0Hyvc8gdkbFzgQ5UNHyLUngMe+6 +VGLlkoyRykF9DDCmqMQO06Ork78gsTVTHr0LEMG3HyKiQ8rLZouSQS9tiw7RVIji +nbf1EWRvVwgSXPSsx545uVwUOSyXlozK7AzFxjlFJU8G9+h1fXYlkviFPrsU2vwa +6q8GiVnaLpwa2QC9iznPTzSnUFh9Eqg8aO4DqpH28L+o5PTClmWUGncqigmYGipm +2s0AKdtRFVXcz7fmH8JKi9u9dBtJPIbdA3Kq/D6+1GkiS5V0aELWI+0424RJ5qlO +MHukVUxg0QH/MJnzfRT3MAV5gBpJC5KrijwS7FN8m+CQN35+OMoiBbpOKt/+wQgF +K31D/M55CZoaeVtkcLiTRjUig2Dwr/16IMd5IcpetNoIcUILDENcWh0mYo02kaJt +nldsZIAi77goxdgKu41AIIhEv0FmlXp6OB/QoEJRiDOVtxSW7bG1F+JbularecE2 +t5PehBq5k35vxo8tteL1xQIP+8nnOtUJuQENBFE3WkEBCADB84pLmmsdFjV5R+0e +zL2COBZBUxUPSIuKOdEfHkR5M5AxbXdg9GwxDMZE1TLAdX8sn1ymwUlZt6dSUFO0 +hg0LdZAOMvjvFb6dF+RE7gfeOsH0usTN32NUzW0/S1E2V8LRlplGIXtHa9YZArQw +k97gpFATheh4K/QHvrIyneVam+B+6WH8zJtBfGmWtjfBLwSiWohQPQAvYBW6hi86 ++I3z0yCrOhgM/N9uylgWu+BQzoQ8/Jv2g22bzSa1mbCP1OVp587HpJy9WbX/aKH4 +7I/vp0qLysWekbuX5OOjsiItW2Yv7oK/S7OtoagTUqX3KG1KRTJZHTTS03dy3DME +fqNtABEBAAGJAR8EGAECAAkFAlE3WkECGwwACgkQg/M2Qyx/zJEJcAgAsE8NNJYX +/3Vdd9WQih4Xg2Pvz66Z9jwTyS9Rb3boB0gtZMgqsHQBdF9iYNVxREpiVDPA0YKR +x1iTjFblt9Ryq7MZVPhRI1cfDfHKCw6bMz1hZDBRr1BSZVjiru74OCebreeOMhzI +zmyP7GSi0q5edZO0zpYkOlme3dQBatSkEAnSDOA9ct6EEMG3ZsQda1YXa9BMKj7e +B+UdFUdGb5SB8buW5RKLMTD485gKpvxWpYptP5DD3r3mThc2m5uWdiAM+jqm9Flc +NlD0bZ8tdZpbPOgxnbAuy7HEPaS/VnGZHouwZWpb484dynCO7+Oi1f2y2tPx0uXV +DRFDDLLR3oBEag== +=+2H3 +-----END PGP PUBLIC KEY BLOCK----- diff --git a/doc/src/_static/css/captions.css b/doc/src/_static/css/captions.css new file mode 100644 index 00000000..8321eee7 --- /dev/null +++ b/doc/src/_static/css/captions.css @@ -0,0 +1,14 @@ +.rst-content div.figure p.caption, .rst-content table.docutils caption, .rst-content div.code-block-caption{ + color: #404040; + font-style: italic; + font-size: 90%; + line-height: normal; + text-align: left; +} +.rst-content div.figure p.caption span.caption-number, .rst-content table.docutils caption span.caption-number, .rst-content div.code-block-caption span.caption-number{ + font-weight: bold; +} +.rst-content div.code-block-caption a.headerlink, .rst-content table.docutils caption a.headerlink{ + display: none; + visibility: hidden; +} diff --git a/doc/src/client.rst b/doc/src/client.rst index 0f0b99dc..e32dc2df 100644 --- a/doc/src/client.rst +++ b/doc/src/client.rst @@ -29,7 +29,7 @@ manages the interaction with an ICAT service as a client. Version of the ICAT server this client connects to. - .. versionchanged:: 1.0.0 + .. versionchanged:: 1.0.0 changed type to :class:`icat.helper.Version` .. attribute:: autoLogout diff --git a/doc/src/conf.py b/doc/src/conf.py index f41f3773..bd8fdc6f 100644 --- a/doc/src/conf.py +++ b/doc/src/conf.py @@ -9,24 +9,52 @@ from pathlib import Path import sys -maindir = Path(__file__).resolve().parent.parent.parent -sys.path[0] = str(maindir) - -import _meta +docsrcdir = Path(__file__).resolve().parent +maindir = docsrcdir.parent.parent +buildlib = maindir / "build" / "lib" +sys.path[0] = str(buildlib) +sys.dont_write_bytecode = True +import icat._meta # -- Project information ----------------------------------------------------- project = 'python-icat' -copyright = ('2013–2023, ' +copyright = ('2013–2024, ' 'Helmholtz-Zentrum Berlin für Materialien und Energie GmbH') author = 'Rolf Krahl' # The full version, including alpha/beta/rc tags -release = _meta.__version__ +release = icat._meta.version # The short X.Y version version = ".".join(release.split(".")[0:2]) +# Write a _meta.rst that defines some custom substitutions +def make_meta_rst(last_release): + template = """:orphan: + +.. |distribution_source| replace:: %(dist_src_name)s +.. |distribution_signature| replace:: %(dist_sig_name)s +.. _distribution_source: %(dist_src_url)s +.. _distribution_signature: %(dist_sig_url)s +""" + github_repo = "https://github.com/icatproject/python-icat" + dist_src_name = "python-icat-%s.tar.gz" % last_release + dist_src_url = ("%s/releases/download/%s/%s" + % (github_repo, last_release, dist_src_name)) + dist_sig_name = "python-icat-%s.tar.gz.asc" % last_release + dist_sig_url = ("%s/releases/download/%s/%s" + % (github_repo, last_release, dist_sig_name)) + subst = { + 'dist_src_name': dist_src_name, + 'dist_src_url': dist_src_url, + 'dist_sig_name': dist_sig_name, + 'dist_sig_url': dist_sig_url, + } + with (docsrcdir / '_meta.rst').open('wt') as f: + print(template % subst, file=f) + +make_meta_rst(icat._meta.release) # -- General configuration --------------------------------------------------- @@ -41,6 +69,11 @@ 'sphinx.ext.autodoc', 'sphinx.ext.intersphinx', ] +try: + import sphinx_copybutton + extensions.append('sphinx_copybutton') +except ImportError: + pass # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -51,12 +84,23 @@ # The master toctree document. master_doc = 'index' +# Enable automatic numbering of figures, tables and code-blocks +numfig = True + +# Strings to format figure, table, code-block, and section numbers +numfig_format = { + 'figure': "Figure %s", + 'table': "Table %s", + 'code-block': "Snippet %s", + 'section': "Section %s", +} + # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = 'en' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -73,7 +117,10 @@ # -- Options for intersphinx extension --------------------------------------- -intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} +intersphinx_mapping = { + 'python': ('https://docs.python.org/3', None), + 'lxml': ('https://lxml.de/apidoc/', None), +} # -- Options for HTML output ------------------------------------------------- @@ -109,6 +156,7 @@ html_favicon = "images/favicon-32x32.png" html_css_files = [ + 'css/captions.css', 'css/spacing.css', ] diff --git a/doc/src/config.rst b/doc/src/config.rst index 0f5c42bd..14688e5e 100644 --- a/doc/src/config.rst +++ b/doc/src/config.rst @@ -62,8 +62,8 @@ added. The main class that client programs interact with is .. attribute:: client The :class:`icat.client.Client` object initialized according to - the configuration. This is also the first element in the - return value from :meth:`getconfig`. + the configuration. This is also the first element in the + return value from :meth:`getconfig`. .. attribute:: client_kwargs @@ -138,40 +138,42 @@ A few derived variables are also set in (username and password if authenticator information is not available) suitable to be passed to :meth:`icat.client.Client.login`. -The command line arguments, environment variables, and default values -for the configuration variables are as follows: - -+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ -| Name | Command line | Environment | Default | Mandatory | Notes | -+=================+=============================+=======================+================+===========+==============+ -| `configFile` | ``-c``, ``--configfile`` | ``ICAT_CFG`` | depends | no | \(1) | -+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ -| `configSection` | ``-s``, ``--configsection`` | ``ICAT_CFG_SECTION`` | :const:`None` | no | | -+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ -| `url` | ``-w``, ``--url`` | ``ICAT_SERVICE`` | | yes | | -+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ -| `idsurl` | ``--idsurl`` | ``ICAT_DATA_SERVICE`` | :const:`None` | depends | \(2) | -+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ -| `checkCert` | ``--check-certificate``, | | :const:`True` | no | | -| | ``--no-check-certificate`` | | | | | -+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ -| `http_proxy` | ``--http-proxy`` | ``http_proxy`` | :const:`None` | no | | -+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ -| `https_proxy` | ``--https-proxy`` | ``https_proxy`` | :const:`None` | no | | -+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ -| `no_proxy` | ``--no-proxy`` | ``no_proxy`` | :const:`None` | no | | -+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ -| `auth` | ``-a``, ``--auth`` | ``ICAT_AUTH`` | | yes | \(3) | -+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ -| `username` | ``-u``, ``--user`` | ``ICAT_USER`` | | yes | \(3),(4) | -+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ -| `password` | ``-p``, ``--pass`` | | interactive | yes | \(3),(4),(5) | -+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ -| `promptPass` | ``-P``, ``--prompt-pass`` | | :const:`False` | no | \(3),(4),(5) | -+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ - -Mandatory means that an error will be raised in -:meth:`icat.config.Config.getconfig` if no value is found for the +.. table:: Command line arguments, environment variables, and default values + for the configuration variables. + :name: tab-config-vars + + +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ + | Name | Command line | Environment | Default | Mandatory | Notes | + +=================+=============================+=======================+================+===========+==============+ + | `configFile` | ``-c``, ``--configfile`` | ``ICAT_CFG`` | depends | no | \(1) | + +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ + | `configSection` | ``-s``, ``--configsection`` | ``ICAT_CFG_SECTION`` | :const:`None` | no | | + +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ + | `url` | ``-w``, ``--url`` | ``ICAT_SERVICE`` | | yes | | + +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ + | `idsurl` | ``--idsurl`` | ``ICAT_DATA_SERVICE`` | :const:`None` | depends | \(2) | + +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ + | `checkCert` | ``--check-certificate``, | | :const:`True` | no | | + | | ``--no-check-certificate`` | | | | | + +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ + | `http_proxy` | ``--http-proxy`` | ``http_proxy`` | :const:`None` | no | | + +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ + | `https_proxy` | ``--https-proxy`` | ``https_proxy`` | :const:`None` | no | | + +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ + | `no_proxy` | ``--no-proxy`` | ``no_proxy`` | :const:`None` | no | | + +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ + | `auth` | ``-a``, ``--auth`` | ``ICAT_AUTH`` | | yes | \(3) | + +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ + | `username` | ``-u``, ``--user`` | ``ICAT_USER`` | | yes | \(3),(4) | + +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ + | `password` | ``-p``, ``--pass`` | | interactive | yes | \(3),(4),(5) | + +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ + | `promptPass` | ``-P``, ``--prompt-pass`` | | :const:`False` | no | \(3),(4),(5) | + +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+ + +See :numref:`tab-config-vars` for an overview of predefined +configuration variables. Mandatory means that an error will be raised +in :meth:`icat.config.Config.getconfig` if no value is found for the configuration variable in question. Notes: diff --git a/doc/src/dumpfile.rst b/doc/src/dumpfile.rst index 1fc44d6e..d87e8c9f 100644 --- a/doc/src/dumpfile.rst +++ b/doc/src/dumpfile.rst @@ -6,8 +6,8 @@ This module provides the base classes :class:`icat.dumpfile.DumpFileReader` and :class:`icat.dumpfile.DumpFileWriter` that define the API and the -logic for reading and writing ICAT data files. The actual work is -done in file format specific backend modules that should provide +logic for reading and writing :ref:`ICAT-data-files`. The actual work +is done in file format specific backend modules that should provide subclasses that must implement the abstract methods. .. autoclass:: icat.dumpfile.DumpFileReader @@ -23,63 +23,3 @@ subclasses that must implement the abstract methods. .. autofunction:: icat.dumpfile.register_backend .. autofunction:: icat.dumpfile.open_dumpfile - - -.. _ICAT-data-files: - -ICAT data files ---------------- - -ICAT data files provide a way to serialize ICAT content to a flat -file. This section describes the logical structure of ICAT data -files. The actual file format depends on the backend, python-icat -provides backends using XML and YAML. - -There is a one-to-one correspondence of the objects in the data -file and the corresponding object in ICAT according to the ICAT -schema, including all attributes and relations to other objects. -Special unique keys are used to encode the relations. -:meth:`icat.entity.Entity.getUniqueKey` may be used to get such a -unique key for an entity object and -:meth:`icat.client.Client.searchUniqueKey` may be used to search an -object by its key. Otherwise these keys should be considered as -opaque ids. - -Data files are partitioned in chunks. This is done to avoid having -the whole file, e.g. the complete inventory of the ICAT, at once in -memory. The problem is that objects contain references to other -objects (e.g. Datafiles refer to Datasets, the latter refer to -Investigations, and so forth). We keep an index of the objects in -order to resolve these references. But there is a memory versus time -tradeoff: we cannot keep all the objects in the index, that would -again mean the complete inventory of the ICAT. And we can't know -beforehand which object is going to be referenced later on, so we -don't know which one to keep and which one to discard from the index. -Fortunately we can query objects we discarded once back from the ICAT -server. But this is expensive. So the strategy is as follows: keep -all objects from the current chunk in the index and discard the -complete index each time a chunk has been processed. This will work -fine if objects are mostly referencing other objects from the same -chunk and only a few references go across chunk boundaries. - -Therefore, we want these chunks to be small enough to fit into memory, -but at the same time large enough to keep as many relations between -objects as possible local in a chunk. It is in the responsibility of -the writer of the data file to create the chunks in this manner. - -The objects that get written to the data file and how this file is -organized is controlled by lists of ICAT search expressions, see -:meth:`icat.dumpfile.DumpFileWriter.writeobjs`. There is some degree -of flexibility: an object may include related objects in an -one-to-many relation, just by including them in the search expression. -In this case, these related objects should not have a search -expression on their own again. For instance, the search expression -for Grouping may include UserGroup. The UserGroups will then be -embedded in their respective grouping in the data file. There should -not be a search expression for UserGroup then. - -Objects related in a many-to-one relation must always be included in -the search expression. This is also true if the object is -indirectly related to one of the included objects. In this case, -only a reference to the related object will be included in the data -file. The related object must have its own list entry. diff --git a/doc/src/file-icatdata.rst b/doc/src/file-icatdata.rst new file mode 100644 index 00000000..c4894c03 --- /dev/null +++ b/doc/src/file-icatdata.rst @@ -0,0 +1,324 @@ +.. _ICAT-data-files: + +ICAT data files +=============== + +ICAT data files provide a way to serialize ICAT content to a flat +file. These files are read by the :ref:`icatingest` and written by +the :ref:`icatdump` command line scripts respectively. The program +logic for reading and writing the files is provided in the +:mod:`icat.dumpfile` module. + +The actual file format depends on the version of the ICAT schema and +on the backend: python-icat provides backends using XML and YAML. + +.. _ICAT-data-files-structure: + +Logical structure of ICAT data files +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Data files are partitioned in chunks. This is done to avoid having +the whole file, e.g. the complete inventory of the ICAT, at once in +memory. The problem is that objects contain references to other +objects, e.g. Datafiles refer to Datasets, the latter refer to +Investigations, and so forth. We keep an index of the objects as +a cache in order to resolve these references. But there is a memory +versus time tradeoff: in order to avoid the index to grow beyond +bounds, objects need to be discarded from the index from time to time. +References to objects that can not be resolved from the index need to +be searched from the ICAT server, which is of course expensive. So +the strategy is as follows: keep all objects from the current chunk in +the index and discard the complete index each time a chunk has been +processed. [#dc]_ This will work fine if objects are mostly +referencing other objects from the same chunk and only a few +references go across chunk boundaries. + +Therefore, we want these chunks to be small enough to fit into memory, +but at the same time large enough to keep as many relations between +objects as possible local in a chunk. It is in the responsibility of +the writer of the data file to create the chunks in this manner. + +The data chunks contain ICAT object definitions, e.g. serializations +of individual ICAT objects, including all attribute values and +many-to-one relations. The many-to-one relations are provided as +references to other objects that must exist in the ICAT server at the +moment that this object definition is read. + +There is some degree of flexibility with respect to related objects in +one-to-many relations: object definitions for these related objects +may be included in the object definitions of the parent object. When +the parent is read, these related objects will be created along with +the parent in one single cascading call. Thus, the related objects +must not be included again as a separate object in the ICAT data file. +For instance, an ICAT data file may include User, Grouping, and +UserGroup as separate objects. In this case, the UserGroup entries +must properly reference User and Grouping as their related objects. +Alternatively the file may only contain User and Grouping objects, +with the UserGroups being included into the object definition of the +corresponding Grouping objects. + +.. _ICAT-data-files-references: + +References to ICAT objects and unique keys +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +References to ICAT objects may be encoded using reference keys. There +are two kinds of those keys, local keys and unique keys: + +When an ICAT object is defined in the file, it generally defines a +local key at the same time. Local keys are stored in the object index +and may be used to reference this object from other objects in the +same data chunk. + +Unique keys can be obtained from an object by calling +:meth:`icat.entity.Entity.getUniqueKey`. An object can be searched by +its unique key from the ICAT server by calling +:meth:`icat.client.Client.searchUniqueKey`. As a result, it is +possible to reference an object by its unique key even if the +reference is not in the object index. All references that go across +chunk boundaries must use unique keys. [#dc]_ + +Reference keys should be considered as opaque ids. + +.. _ICAT-data-xml-files: + +ICAT data XML files +~~~~~~~~~~~~~~~~~~~ + +The root element of ICAT data XML files is ``icatdata``. It may +optionally have one ``head`` subelement and one or more ``data`` +subelements. + +The ``head`` element will be ignored by :ref:`icatingest`. It serves +to provide some information on the context of the creation of the data +file, which may be useful for debugging in case of issues. + +The actual payload of an ICAT data XML file is in the ``data`` +elements. There can be any number of them and each is one chunk +according to the logical structure explained above. The subelements +of ``data`` may either be ICAT object references or ICAT object +definitions, both explained in detail below. Either of them may have +an ``id`` attribute that defines a local key that allows to reference +the corresponding object later on. + +:numref:`snip-file-icatdata-xml-1` shows a simple example for an ICAT +data XML file having one single ``data`` element that defines four +Datasets. + +.. code-block:: XML + :name: snip-file-icatdata-xml-1 + :caption: A simple example for an ICAT data XML file + :dedent: 2 + + + + + 2023-10-17T07:33:36Z + manual edit + + + + + false + 2012-07-30T01:10:08+00:00 + e209001 + 2012-07-26T15:44:24+00:00 + + + + + + false + 2012-08-06T01:10:08+00:00 + e209002 + 2012-08-02T05:30:00+00:00 + + + + + + false + 2012-07-16T14:30:17+00:00 + e209003 + 2012-07-16T11:42:05+00:00 + + + + + + false + 2012-07-31T22:52:23+00:00 + e209004 + 2012-07-31T20:20:37+00:00 + + + + + + +ICAT object references +...................... + +ICAT object references do not define an ICAT object to be created when +reading the ICAT data file but reference an already existing one. It +is either assumed to exist in ICAT before ingesting the file or it +must appear earlier in the ICAT data file, so that it will be created +before the referencing object is read. + +ICAT objects may either be referenced by reference key or by +attributes. A reference key should be included as a ``ref`` +attribute. + +When referencing the object by attributes, these attributes should be +included using the same name in the reference element. This may also +include attributes of related objects using the same dot notation as +for ICAT JPQL search expressions. Referencing by attributes may be +combined with referencing related objects by reference key, using +``ref`` in place of the related object's attribute names. In any +case, referenced objects must be uniquely defined by the attribute +values. + +ICAT object references may be used in two locations in ICAT data XML +files: as direct subelements of ``data`` or to reference related +objects in many-to-one relations in ICAT object definitions, see +below. In the former case, the name of the object reference element +is the name of the corresponding ICAT entity type (the first letter in +lowercase) with a ``Ref`` suffix appended. In that case, the element +should have an ``id`` attribute that will define a local key that can +be used to reference that object in subsequent object references. +This is convenient to define a shortcut when the same object needs to +be referenced often, to avoid having to repeat the same set of +attributes each time. + +In any case, object reference elements only have attributes, but no +content or subelements. + +See :numref:`snip-file-icatdata-xml-1` for a few examples: the first +subelement of the ``data`` element in this case is +``investigationRef``. It references a (supposed to be existing) +Investigation by its attributes ``name`` and ``visitId``. It defines +a local key for that Investigation object in the ``id`` attribute. +The Dataset object definitions in that example each use that local key +to set their relation with the Investigation respectively. The +Dataset object definitions each also include a relation with their +``type``, referencing the related DatasetType by the ``name`` +attribute. Some of the Dataset object definitions also include a +relation with a Sample. The respective Sample object is referenced by +``name`` and the related Investigation. The latter is referenced by +the local key defined earlier in the ``investigation.ref`` attribute. + +ICAT object definitions +....................... + +ICAT object definitions define objects that will be created in ICAT +when ingesting the ICAT data file. As direct subelements of ``data``, +the name of the element must be the name of the corresponding entity +type in the ICAT schema (the first letter in lowercase). + +The subelements of ICAT object definitions are the attributes and +object relations as defined in the ICAT schema using the same names. +Attributes must include the corresponding value as text content of the +element. All many-to-one relations must be provided as ICAT object +references, see above. + +The ICAT object definitions may include one-to-many relations as +subelements. In this case, these subelements must in turn be ICAT +object definitions for the related objects. These related objects +will be created along with the parent in one single cascading call. +The object definition for the related object must not include its +relation with the parent object as this is already implied by the +parent and child relationship. + +When appearing as direct subelements of ``data``, ICAT object +definitions may have an ``id`` attribute that will define a local key +that can be used to reference the defined object later on. + +.. literalinclude:: ../examples/icatdump-simple.xml + :language: xml + :name: snip-file-icatdata-xml-2 + :caption: An example for an ICAT data XML file + +Consider the example in :numref:`snip-file-icatdata-xml-2`. It +contains two chunks: the first chunk contains four User objects and +three Grouping objects. The Groupings include related UserGroups. +Note that these UserGroups include their relation to the User, but not +their relation with Grouping. The latter is implied by the parent +relation of the object in the file. The second chunk only contains +one Investigation, including related InvestigationGroups. + +Finally note that the file format also depends on the ICAT schema +version: the present example can only be ingested into ICAT server 5.0 +or newer, because the attributes fileCount and fileSize have been +added to Investigation in this version. With older ICAT versions, it +will fail because these attributes are not defined. + +You will find more extensive examples in the source distribution of +python-icat. The distribution also provides XML Schema Definition +files for the ICAT data XML file format corresponding to various ICAT +schema versions. Note the these XML Schema Definition +files are provided for reference only. The :ref:`icatingest` script +does not validate its input. + +.. _ICAT-data-yaml-files: + +ICAT data YAML files +~~~~~~~~~~~~~~~~~~~~ + +In this section we describe the ICAT data file format using the YAML +backend. Consider the example in :numref:`snip-file-icatdata-yaml`, +it corresponds to the same ICAT content as the XML in +:numref:`snip-file-icatdata-xml-2`: + +.. literalinclude:: ../examples/icatdump-simple.yaml + :language: yaml + :name: snip-file-icatdata-yaml + :caption: An example for an ICAT data YAML file + +ICAT data YAML files start with a head consisting of a few comment +lines, followed by one or more YAML documents. YAML documents are +separated by a line containing only ``---``. The comments in the head +provide some information on the context of the creation of the data +file, which may be useful for debugging in case of issues. + +Each YAML document defines one chunk of data according to the logical +structure explained above. It consists of a mapping having the name +of entity types in the ICAT schema (the first letter in lowercase) as +keys. The values are in turn mappings that map object ids as key to +ICAT object definitions as value. These object ids define local keys +that may be used to reference the respective object later on. In the +present example, the first chunk contains four User objects and three +Grouping objects. The Groupings include related UserGroups. The +second chunk only contains one Investigation, including related +investigationGroups. + +Each of the ICAT object definitions corresponds to an object in the +ICAT schema. It is again a mapping with the object's attribute and +relation names as keys and corresponding values. All many-to-one +relations must be provided and reference existing objects, e.g. they +must either already have existed before starting the ingestion or +appear in the same or an earlier YAML document in the ICAT data file. +The values of many-to-one relations are reference keys, either local +keys defined in the same YAML document or unique keys. Unlike the XML +backend, the YAML backend does not support referencing objects by +attributes. + +The object definitions may include one-to-many relations. In this +case, the value for the relation name is a list of object definitions +for the related objects. These related objects will be created along +with the parent in one single cascading call. In the present example, +the Grouping objects include their related UserGroup objects. Note +that these UserGroups include their relation to the User, but not with +Grouping. The latter relationship is implied by the parent relation +of the object in the file. + +Note that the entries in the mappings in YAML have no inherent order. +The :ref:`icatingest` script uses a predefined order to read the ICAT +entity types in order to make sure that referenced objects are created +before any object that may reference them. + + +.. [#dc] There is one exception: DataCollections doesn't have a + uniqueness constraint and can't reliably be searched by + attributes. Therefore local keys for DataCollections are + always kept in the object index and may be used to reference + them across chunk boundaries. diff --git a/doc/src/file-icatingest.rst b/doc/src/file-icatingest.rst new file mode 100644 index 00000000..e6e1c251 --- /dev/null +++ b/doc/src/file-icatingest.rst @@ -0,0 +1,102 @@ +.. _ICAT-ingest-files: + +Metadata ingest files +===================== + +Metadata ingest files are the input format for class +:class:`icat.ingest.IngestReader`. This class is intended to be used +in scripts that read the metadata created by experiments into ICAT. +The file format is basically a restricted version of +:ref:`ICAT-data-xml-files`. + +The underlying idea is that ICAT data files are in principle suitable +to encode the metadata to be ingested from the experiment. The only +problem is that this file format is too powerful: it can encode any +ICAT content. We want the ingest files from the experiment to create +new Datasets and DatasetParameters, we certainly don't want these +files to create new Instruments or Users in ICAT. And we also want to +control to which Investigation newly created Datasets are going to be +added. It would be rather difficult to control the power of the input +format if we would use plain ICAT data files for this purpose. + +.. note:: + The metadata ingest file format is versioned. This version number + is independent from the python-icat version. It is incremented + only when the format changes. The latest version of the metadata + ingest file format is 1.1. + +.. versionchanged:: 1.2.0 + add metadata ingest file format version 1.1, adding support for + relating Datasets with Samples. + +Differences compared to ICAT data XML files +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Class :class:`icat.ingest.IngestReader` takes an ``investigation`` +argument. We will refer to the Investigation given in this argument +as the *prescribed Investigation* in the following. The metadata +ingest file format restricts ICAT data XML files in the following +ways: + +* ingest files must contain one and only one ``data`` element, + e.g. one chunk according to the :ref:`ICAT-data-files-structure`. + +* the allowed object types are restricted to Dataset, + DatasetInstrument, DatasetTechnique, and DatasetParameter. + +* the attributes in the object definitions for Datasets are restricted + to name, description, startDate, and endDate. + +* object definitions for Datasets can not include references to the + related Investigation or DatasetType. These relation will be added + by :class:`icat.ingest.IngestReader`. The relation to the + Investigation will be set to the prescribed Investigation. + +* object definitions for Datasets can reference a related Sample only + by name or by pid. A relation of the related Sample with the + prescribed Investigation will be implied. + +* references to the related Dataset in DatasetInstrument, + DatasetTechnique, and DatasetParameter definitions are restricted to + :ref:`local keys `. As a result, these + objects can only relate to Datasets defined in the same ingest file. + +* other object references are restricted to reference by attributes. + +These restrictions are enforced by validating the input against an XML +Schema Definition (XSD). + +Another change with respect to ICAT data XML files is that the name of +the root element is ``icatingest`` and that it must have a ``version`` +attribute. + +Example +~~~~~~~ + +Consider the following example: + +.. literalinclude:: ../examples/metadata.xml + :language: xml + +This file defines four Datasets with related objects. All datasets +have a ``name``, ``description``, ``startDate``, and ``endDate`` +attribute and include a relation with an Instrument and a Technique, +respectively. + +Note that the Datasets have no ``complete`` attribute and no relation +with Investigation or DatasetType respectively. All of these are +added with prescribed values by class +:class:`icat.ingest.IngestReader`. + +Some Datasets relate to Samples: the first two Datasets relate to the +same Sample, the third Dataset to another Sample, while the last +Dataset has no relation with any Sample. All Samples are referenced +by their name. Class :class:`icat.ingest.IngestReader` will add a +reference to the Investigation to this, so that only Samples that are +related to the prescribed Investigation can actually be referenced. + +Some DatasetParameter are added as separate objects in the file. They +respectively reference their related Datasets using local keys that +are defined in the ``id`` attribute of the corresponding Dataset +earlier in the file. Alternatively, the DatasetParameter could have +been included into into the respective Datasets. diff --git a/doc/src/fileformats.rst b/doc/src/fileformats.rst new file mode 100644 index 00000000..c24d37eb --- /dev/null +++ b/doc/src/fileformats.rst @@ -0,0 +1,25 @@ +File formats +============ + +Some components of python-icat read input files or write output files: + +The :ref:`icatdump` command line script fetches content from an ICAT +server and writes it to a file. The :ref:`icatingest` command line +script reads those files and restores the content in an ICAT server. +The ICAT data file format written and read by these scripts +respectively corresponds directly to the ICAT schema. It is rather +generic and may encode any ICAT content. + +The metadata ingest file format is basically a restricted version of +the ICAT data file format. It is read by class +:class:`icat.ingest.IngestReader` for the purpose of ingesting +metadata created by experiments into ICAT. + +See the following sections for a detailed description of these file +formats: + +.. toctree:: + :maxdepth: 1 + + file-icatdata + file-icatingest diff --git a/doc/src/icatdump.rst b/doc/src/icatdump.rst index 6e7d0caf..0023fca3 100644 --- a/doc/src/icatdump.rst +++ b/doc/src/icatdump.rst @@ -7,7 +7,7 @@ icatdump Synopsis ~~~~~~~~ -**icatdump** [*standard options*] [-o FILE] [-f FORMAT] +| **icatdump** [*standard options*] [-o FILE] [-f FORMAT] Description diff --git a/doc/src/icatingest.rst b/doc/src/icatingest.rst index 7cba2199..c260d468 100644 --- a/doc/src/icatingest.rst +++ b/doc/src/icatingest.rst @@ -7,7 +7,8 @@ icatingest Synopsis ~~~~~~~~ -**icatingest** [*standard options*] [-i FILE] [-f FORMAT] [--upload-datafiles] [--datafile-dir DATADIR] [--duplicate OPTION] +| **icatingest** [*standard options*] [-i FILE] [-f FORMAT] +| [--upload-datafiles] [--datafile-dir DATADIR] [--duplicate OPTION] Description @@ -71,12 +72,12 @@ The following options are specific to icatingest: **CHECK** Compare all attributes from the input object with the already - existing object in ICAT. Throw an error of any attribute - differs. + existing object in ICAT. Throw an error of any attribute + differs. **OVERWRITE** Overwrite the existing object in ICAT, e.g. update it with all - attributes set to the values found in the input object. + attributes set to the values found in the input object. If :option:`--upload-datafiles` is set, this option will be ignored for Datafile objects which will then always raise an error diff --git a/doc/src/index.rst b/doc/src/index.rst index 1fdc3c09..a3d947c0 100644 --- a/doc/src/index.rst +++ b/doc/src/index.rst @@ -38,6 +38,7 @@ Parts of the documentation tutorial moduleref scripts + fileformats known-issues changelog diff --git a/doc/src/ingest.rst b/doc/src/ingest.rst index 4fed8b7e..e0574221 100644 --- a/doc/src/ingest.rst +++ b/doc/src/ingest.rst @@ -11,7 +11,7 @@ even in minor releases of python-icat. This module provides class :class:`icat.ingest.IngestReader` that -reads metadata from an XML file to add them to ICAT. It is designed +reads :ref:`ICAT-ingest-files` to add them to ICAT. It is designed for the use case of ingesting metadata for datasets created during experiments. @@ -21,22 +21,14 @@ that base class in restricting the vocabular of the input file: only objects that need to be created during ingestion from the experiment may appear in the input. This restriction is enforced by first validating the input against an XML Schema Definition (XSD). In a -second step, the input is transformed into generic XML :ref:`ICAT data -file ` format using an XSL Transformation (XSLT) and -then fed into :class:`~icat.dumpfile_xml.XMLDumpFileReader`. The -format of the input files may be customized to some extent by providing -custom versions of XSD and XSLT files, see :ref:`ingest-customize` -below. - -The input accepted by :class:`~icat.ingest.IngestReader` consists of -one or more ``Dataset`` objects that all need to relate to the same -``Investigation`` and any number of related ``DatasetTechnique``, -``DatasetInstrument``, and ``DatasetParameter`` objects. The -``Investigation`` must exist beforehand in ICAT. The relation from -the ``Dataset`` objects to the ``Investigation`` will be set by -:class:`~icat.ingest.IngestReader` accordingly. (Actually, the XSLT -will add that attribute to the datasets in the input.) The -``Dataset`` objects will not be created by +second step, the input is transformed into generic :ref:`ICAT data XML +file format ` using an XSL Transformation (XSLT) +and then fed into :class:`~icat.dumpfile_xml.XMLDumpFileReader`. The +format of the input files may be customized to some extent by +providing custom versions of XSD and XSLT files, see +:ref:`ingest-customize` below. + +The ``Dataset`` objects in the input will not be created by :class:`~icat.ingest.IngestReader`, because it is assumed that a separate workflow in the caller will copy the content of datafiles to the storage managed by IDS and create the corresponding ``Dataset`` @@ -47,19 +39,62 @@ of the datasets will be read from the input file and set in the ``DatasetTechnique``, ``DatasetInstrument`` and ``DatasetParameter`` objects read from the input file in ICAT. -Using ingest file format 1.1, ``Dataset`` objects may also include a -reference to a ``Sample``. That ``Sample`` objects needs to exist -beforehand and needs to be related to the same ``Investigation`` as -the ``Dataset``. - -.. versionchanged:: 1.2.0 - add version 1.1 of the ingest file format, including references to samples - .. autoclass:: icat.ingest.IngestReader :members: :show-inheritance: +.. _ingest-process: + +Ingest process +-------------- + +The processing of the metadata during the instantiation of an +:class:`~icat.ingest.IngestReader` object may be summarized by the +following steps: + +1. Read the metadata and parse the :class:`lxml.etree._ElementTree`. + +2. Call :meth:`~icat.ingest.IngestReader.get_xsd` to get the + appropriate XSD file and validate the metadata against that schema. + +3. Inject an ``_environment`` element as first child of the root + element, see below. + +4. Call :meth:`~icat.ingest.IngestReader.get_xslt` to get the + appropriate XSLT file and transform the metadata into generic ICAT + data XML file format. + +5. Feed the result of the transformation into the parent class + :class:`~icat.dumpfile_xml.XMLDumpFileReader`. + +Once this initialization is done, +:meth:`~icat.ingest.IngestReader.ingest` may be called to read the +individual objects defined in the metadata. + + +.. _ingest-environment: + +The environment element +----------------------- + +During the processing of the metadata, an ``_environment`` element +will be injected as the first child of the root element. In the +current version of python-icat, this ``_environment`` element has the +following attributes: + + `icat_version` + Version of the ICAT server this client connects to, e.g. the + :attr:`icat.client.Client.apiversion` attribute of the `client` + object being used by this :class:`~icat.ingest.IngestReader`. + +More attributes may be added in future versions. This +``_environment`` element may be used by the XSLT in order to adapt the +result of the transformation to the environment, in particular to +adapt the output to the ICAT schema version it is supposed to conform +to. + + .. _ingest-example: Ingest example @@ -114,14 +149,14 @@ Customizing the input format The ingest input file format may be customized by providing custom XSD and XSLT files. The easiest way to do that is to subclass -:class:`~icat.ingest.IngestReader`, you'd only need to override some -class attributes as follows:: +:class:`~icat.ingest.IngestReader`. In most cases, you'd only need to +override some class attributes as follows:: from pathlib import Path import icat.ingest class MyFacilityIngestReader(icat.ingest.IngestReader): - + # Override the directory to search for XSD and XSLT files: SchemaDir = Path("/usr/share/icat/my-facility") @@ -132,18 +167,27 @@ class attributes as follows:: } # Override the XSLT file to use: - XSLT_name = "myingest.xslt" - -:attr:`~icat.ingest.IngestReader.XSD_Map` is a mapping with pairs of -root element name and version attribute as keys and XSD file names as -values. The method :meth:`~icat.ingest.IngestReader.get_xsd` inspects -the input file and selects the file name from -:attr:`~icat.ingest.IngestReader.XSD_Map` accordingly. (Note that -there is no such mapping for the XSLT file, because its is assumed -that it is fairly easy to formulate adaptations to the input version -directly in XSLT, so one single XSLT file would be sufficient to cover -all versions.) In the above example, `MyFacilityIngestReader` would -recognize input files like + XSLT_Map = { + 'legacyingest': "legacy-ingest.xslt", + 'myingest': "my-ingest.xslt", + } + +:attr:`~icat.ingest.IngestReader.XSD_Map` and +:attr:`~icat.ingest.IngestReader.XSLT_Map` are mappings with +properties of the root element of the input data as keys and file +names as values. The methods +:meth:`~icat.ingest.IngestReader.get_xsd` and +:meth:`~icat.ingest.IngestReader.get_xslt` respectively inspect the +input file and use these mappings to select the XSD and XSLT file +accordingly. Note that :attr:`~icat.ingest.IngestReader.XSD_Map` +takes tuples of root element name and version attribute as keys, while +:attr:`~icat.ingest.IngestReader.XSLT_Map` uses the name of the root +element name alone. It is is assumed that it is fairly easy to +formulate adaptations to the input version directly in XSLT, so one +single XSLT file would be sufficient to cover all versions. + +In the above example, `MyFacilityIngestReader` would recognize input +files like .. code-block:: xml diff --git a/doc/src/install.rst b/doc/src/install.rst index 7b200458..12d0bd41 100644 --- a/doc/src/install.rst +++ b/doc/src/install.rst @@ -1,11 +1,11 @@ +.. include:: _meta.rst + Install instructions ==================== -Release packages of python-icat are published in the `Python Package -Index (PyPI)`__. See :ref:`install-using-pip` for the short version -of the install instructions. +See :ref:`install-using-pip` for the short version of the install +instructions. -.. __: `PyPI site`_ System requirements @@ -52,13 +52,13 @@ are not required to install python-icat and use its core features: tests will be skipped in that case, so the results will not be very meaningful. -+ `setuptools_scm`_ ++ `git-props`_ - The version number is managed using this package. All source - distributions add a static text file with the version number and - fall back using that if setuptools_scm is not available. So this - package is only needed to build out of the plain development source - tree as cloned from GitHub. + This package is used to extract some metadata such as the version + number out of git, the version control system. All releases embed + that metadata in the distribution. So this package is only needed + to build out of the plain development source tree as cloned from + GitHub, but not to build a release distribution. + `pytest`_ >= 3.1.0 @@ -114,26 +114,56 @@ Installation Installation using pip ...................... -You can install python-icat from PyPI using pip:: +You can install python-icat from the +`Python Package Index (PyPI) `_ using pip:: $ pip install python-icat +Note that while installing from PyPI is convenient, there is no way to +verify the integrity of the source distribution, which may be +considered a security risk. + Installation from the source distribution ......................................... -Steps to manually build from the source distribution: +Note that the manual build does not automatically check the +dependencies. So we assume that you have all the systems requirements +installed. Steps to manually build from the source distribution: + +1. Download the sources. + + From the `Release Page `_ you may download + the source distribution file |distribution_source|_ and the + detached signature file |distribution_signature|_ + +2. Check the signature (optional). + + You may verify the integrity of the source distribution by checking + the signature (showing the output for version 1.2.0 as an example):: -1. Download the sources, unpack, and change into the source directory. + $ gpg --verify python-icat-1.2.0.tar.gz.asc + gpg: assuming signed data in 'python-icat-1.2.0.tar.gz' + gpg: Signature made Tue Oct 31 07:01:55 2023 CET + gpg: using RSA key 760465DAF652737A61EC0C9D83F336432C7FCC91 + gpg: Good signature from "Rolf Krahl " [full] -2. Build:: + The signature should be made by the key + :download:`0x760465DAF652737A61EC0C9D83F336432C7FCC91 + <83F336432C7FCC91.pub>`. The fingerprint of that key is:: + + 7604 65DA F652 737A 61EC 0C9D 83F3 3643 2C7F CC91 + +3. Unpack and change into the source directory. + +4. Build (optional):: $ python setup.py build -3. Test (optional, see below):: +5. Test (optional, see below):: $ python setup.py test -4. Install:: +6. Install:: $ python setup.py install @@ -144,9 +174,9 @@ Building from development sources ................................. For production use, it is always recommended to use the latest release -version from PyPI, see above. If you need some not yet released -bleeding edge feature or if you want to participate in the -development, you may also clone the `source repository from GitHub`__. +version, see above. If you need some not yet released bleeding edge +feature or if you want to participate in the development, you may also +clone the `source repository from GitHub`__. Note that some source files are dynamically created and thus missing in the development sources. If you want to build from the development @@ -175,11 +205,10 @@ authentication plugin must also have these users configured. from the test server and replace it with example content. Do not configure the tests to access a production server! -You can safely run the tests without configuring any test server. You -will just get many skipped tests then. +You can safely run the tests without configuring any test server. But +most of the test will be skipped then. -.. _PyPI site: https://pypi.org/project/python-icat/ .. _setuptools: https://github.com/pypa/setuptools/ .. _packaging: https://github.com/pypa/packaging/ .. _suds-jurko: https://pypi.org/project/suds-jurko/ @@ -187,9 +216,11 @@ will just get many skipped tests then. .. _PyYAML: https://github.com/yaml/pyyaml/ .. _lxml: https://lxml.de/ .. _Requests: https://requests.readthedocs.io/ -.. _setuptools_scm: https://github.com/pypa/setuptools_scm/ +.. _git-props: https://github.com/RKrahl/git-props/ .. _pytest: https://docs.pytest.org/en/latest/ .. _pytest-dependency: https://pypi.org/project/pytest-dependency/ .. _distutils-pytest: https://github.com/RKrahl/distutils-pytest/ +.. _PyPI site: https://pypi.org/project/python-icat/ +.. _GitHub latest release: https://github.com/icatproject/python-icat/releases/latest/ .. _GitHub repository: https://github.com/icatproject/python-icat/ .. _Issue #72: https://github.com/icatproject/python-icat/issues/72 diff --git a/doc/src/scripts.rst b/doc/src/scripts.rst index 82f57d75..f944efde 100644 --- a/doc/src/scripts.rst +++ b/doc/src/scripts.rst @@ -2,7 +2,7 @@ Command line scripts ==================== This section provides a reference for the command line scripts that -are alongside with python-icat. +are installed alongside with python-icat. .. toctree:: :maxdepth: 1 diff --git a/doc/src/tutorial-create.rst b/doc/src/tutorial-create.rst index c6c56ea8..07977db1 100644 --- a/doc/src/tutorial-create.rst +++ b/doc/src/tutorial-create.rst @@ -132,7 +132,8 @@ created together with the ``ParameterType`` object. We can verify this by searching for the newly created objects:: - >>> query = "SELECT pt FROM ParameterType pt INCLUDE pt.facility, pt.permissibleStringValues" + >>> query = ("SELECT pt FROM ParameterType pt " + ... "INCLUDE pt.facility, pt.permissibleStringValues") >>> client.search(query) [(parameterType){ createId = "simple/root" diff --git a/doc/src/tutorial-ids.rst b/doc/src/tutorial-ids.rst index 0ce2748c..c71d221e 100644 --- a/doc/src/tutorial-ids.rst +++ b/doc/src/tutorial-ids.rst @@ -54,10 +54,12 @@ We need a dataset in ICAT that the uploaded files should be put into, so let's create one:: >>> from icat.query import Query - >>> investigation = client.assertedSearch(Query(client, "Investigation", conditions={"name": "= '12100409-ST'"}))[0] + >>> query = Query(client, "Investigation", conditions={"name": "= '12100409-ST'"}) + >>> investigation = client.assertedSearch(query)[0] >>> dataset = client.new("Dataset") >>> dataset.investigation = investigation - >>> dataset.type = client.assertedSearch(Query(client, "DatasetType", conditions={"name": "= 'other'"}))[0] + >>> query = Query(client, "DatasetType", conditions={"name": "= 'other'"}) + >>> dataset.type = client.assertedSearch(query)[0] >>> dataset.name = "greetings" >>> dataset.complete = False >>> dataset.create() @@ -65,9 +67,13 @@ so let's create one:: For each of the files, we create a new datafile object and call the :meth:`~icat.client.Client.putData` method to upload it:: - >>> df_format = client.assertedSearch(Query(client, "DatafileFormat", conditions={"name": "= 'Text'"}))[0] + >>> query = Query(client, "DatafileFormat", conditions={"name": "= 'Text'"}) + >>> df_format = client.assertedSearch(query)[0] >>> for fname in ("greet-jdoe.txt", "greet-nbour.txt", "greet-rbeck.txt"): - ... datafile = client.new("Datafile", name=fname, dataset=dataset, datafileFormat=df_format) + ... datafile = client.new("Datafile", + ... name=fname, + ... dataset=dataset, + ... datafileFormat=df_format) ... client.putData(fname, datafile) ... (datafile){ @@ -125,7 +131,10 @@ Download files We can request a download of a set of data using the :meth:`~icat.client.Client.getData` method:: - >>> query = Query(client, "Datafile", conditions={"name": "= 'greet-jdoe.txt'", "dataset.name": "= 'greetings'"}) + >>> query = Query(client, "Datafile", conditions={ + ... "name": "= 'greet-jdoe.txt'", + ... "dataset.name": "= 'greetings'" + ... }) >>> df = client.assertedSearch(query)[0] >>> data = client.getData([df]) >>> type(data) diff --git a/doc/src/tutorial-search.rst b/doc/src/tutorial-search.rst index ed9843ae..9d1c5fec 100644 --- a/doc/src/tutorial-search.rst +++ b/doc/src/tutorial-search.rst @@ -122,7 +122,8 @@ appropriate condition. The `conditions` argument to :class:`~icat.query.Query` should be a mapping of attribute names to conditions on that attribute:: - >>> query = Query(client, "Investigation", conditions={"name": "= '10100601-ST'"}) + >>> query = Query(client, "Investigation", + ... conditions={"name": "= '10100601-ST'"}) >>> print(query) SELECT o FROM Investigation o WHERE o.name = '10100601-ST' >>> client.search(query) @@ -144,7 +145,9 @@ conditions on that attribute:: We may also include related objects in the search results:: - >>> query = Query(client, "Investigation", conditions={"name": "= '10100601-ST'"}, includes=["datasets"]) + >>> query = Query(client, "Investigation", + ... conditions={"name": "= '10100601-ST'"}, + ... includes=["datasets"]) >>> print(query) SELECT o FROM Investigation o WHERE o.name = '10100601-ST' INCLUDE o.datasets >>> client.search(query) @@ -208,7 +211,8 @@ python-icat supports the use of some JPQL functions when specifying which attribute a condition should be applied to. Consider the following query:: - >>> query = Query(client, "Investigation", conditions={"LENGTH(title)": "= 18"}) + >>> query = Query(client, "Investigation", + ... conditions={"LENGTH(title)": "= 18"}) >>> print(query) SELECT o FROM Investigation o WHERE LENGTH(o.title) = 18 >>> client.search(query) @@ -253,7 +257,8 @@ field larger then 5 Tesla and include its parameters in the result:: ... "parameters.type.units": "= 'T'", ... "parameters.numericValue": "> 5.0", ... } - >>> query = Query(client, "Dataset", conditions=conditions, includes=["parameters.type"]) + >>> query = Query(client, "Dataset", + ... conditions=conditions, includes=["parameters.type"]) >>> print(query) SELECT o FROM Dataset o JOIN o.investigation AS i JOIN o.parameters AS p JOIN p.type AS pt WHERE i.name = '10100601-ST' AND p.numericValue > 5.0 AND pt.name = 'Magnetic field' AND pt.units = 'T' INCLUDE o.parameters AS p, p.type >>> client.search(query) @@ -456,7 +461,9 @@ multiple attributes at once. The result will be a tuple of attribute values rather then a single value for each object found in the query. This requires an ICAT server version 4.11 or newer though:: - >>> query = Query(client, "Dataset", attributes=["investigation.name", "name", "complete", "type.name"]) + >>> query = Query(client, "Dataset", attributes=[ + ... "investigation.name", "name", "complete", "type.name" + ... ]) >>> print(query) SELECT i.name, o.name, o.complete, t.name FROM Dataset o JOIN o.investigation AS i JOIN o.type AS t >>> client.search(query) @@ -485,7 +492,8 @@ average magnetic field applied in the measurements:: ... "type.name": "= 'Magnetic field'", ... "type.units": "= 'T'", ... } - >>> query = Query(client, "DatasetParameter", conditions=conditions, attributes="numericValue") + >>> query = Query(client, "DatasetParameter", + ... conditions=conditions, attributes="numericValue") >>> print(query) SELECT o.numericValue FROM DatasetParameter o JOIN o.dataset AS ds JOIN ds.investigation AS i JOIN o.type AS t WHERE i.name = '10100601-ST' AND t.name = 'Magnetic field' AND t.units = 'T' >>> client.search(query) @@ -578,7 +586,8 @@ make sure not to count the same object more then once:: ... "datasets.parameters.type.name": "= 'Magnetic field'", ... "datasets.parameters.type.units": "= 'T'", ... } - >>> query = Query(client, "Investigation", conditions=conditions, aggregate="COUNT") + >>> query = Query(client, "Investigation", + ... conditions=conditions, aggregate="COUNT") >>> print(query) SELECT COUNT(o) FROM Investigation o JOIN o.datasets AS s1 JOIN s1.parameters AS s2 JOIN s2.type AS s3 WHERE s3.name = 'Magnetic field' AND s3.units = 'T' >>> client.search(query) @@ -761,7 +770,9 @@ in the `order` argument to :class:`~icat.query.Query`. Let's search for user sorted by the length of their name, from longest to shortest:: - >>> query = Query(client, "User", conditions={"fullName": "IS NOT NULL"}, order=[("LENGTH(fullName)", "DESC")]) + >>> query = Query(client, "User", conditions={ + ... "fullName": "IS NOT NULL" + ... }, order=[("LENGTH(fullName)", "DESC")]) >>> print(query) SELECT o FROM User o WHERE o.fullName IS NOT NULL ORDER BY LENGTH(o.fullName) DESC >>> for user in client.search(query): @@ -782,7 +793,8 @@ shortest:: We may limit the number of returned items. Search for the third to last dataset to have been finished:: - >>> query = Query(client, "Dataset", order=[("endDate", "DESC")], limit=(2, 1)) + >>> query = Query(client, "Dataset", + ... order=[("endDate", "DESC")], limit=(2, 1)) >>> print(query) SELECT o FROM Dataset o ORDER BY o.endDate DESC LIMIT 2, 1 >>> client.search(query) diff --git a/doc/src/wipeicat.rst b/doc/src/wipeicat.rst index 89567684..1c1ca4cd 100644 --- a/doc/src/wipeicat.rst +++ b/doc/src/wipeicat.rst @@ -7,7 +7,7 @@ wipeicat Synopsis ~~~~~~~~ -**wipeicat** [*options*] +| **wipeicat** [*options*] Description diff --git a/doc/tutorial/create.py b/doc/tutorial/create.py new file mode 100644 index 00000000..c6ad80f0 --- /dev/null +++ b/doc/tutorial/create.py @@ -0,0 +1,59 @@ +# Tutorial / Creating stuff in the ICAT server +# interactive code blocks + +# Creating simple objects + +f1 = client.new("Facility") +f1.name = "Fac1" +f1.fullName = "Facility 1" +f1.id = client.create(f1) +client.search("SELECT f FROM Facility f") + +# -------------------- + +f2 = client.new("Facility", name="Fac2", fullName="Facility 2") +f2.create() +client.search("SELECT f FROM Facility f") + +# Relationships to other objects + +f1 = client.get("Facility", 1) + +# -------------------- + +pt1 = client.new("ParameterType") +pt1.name = "Test parameter type 1" +pt1.units = "pct" +pt1.applicableToDataset = True +pt1.valueType = "NUMERIC" +pt1.facility = f1 +pt1.create() + +# -------------------- + +pt2 = client.new("ParameterType") +pt2.name = "Test parameter type 2" +pt2.units = "N/A" +pt2.applicableToDataset = True +pt2.valueType = "STRING" +pt2.facility = f1 +for v in ["buono", "brutto", "cattivo"]: + psv = client.new("PermissibleStringValue", value=v) + pt2.permissibleStringValues.append(psv) + +pt2.create() + +# -------------------- + +query = ("SELECT pt FROM ParameterType pt " + "INCLUDE pt.facility, pt.permissibleStringValues") +client.search(query) + +# Access rules + +publicTables = [ "Application", "DatafileFormat", "DatasetType", + "Facility", "FacilityCycle", "Instrument", + "InvestigationType", "ParameterType", + "PermissibleStringValue", "SampleType", ] +queries = [ "SELECT o FROM %s o" % t for t in publicTables ] +client.createRules("R", queries) diff --git a/doc/tutorial/edit.py b/doc/tutorial/edit.py new file mode 100644 index 00000000..ca2aacc4 --- /dev/null +++ b/doc/tutorial/edit.py @@ -0,0 +1,43 @@ +# Tutorial / Working with objects in the ICAT server +# interactive code blocks + +client.search("SELECT f FROM Facility f") + +# Editing the attributes of objects + +for facility in client.search("SELECT f FROM Facility f"): + facility.description = "An example facility" + facility.daysUntilRelease = 1826 + facility.fullName = "%s Facility" % facility.name + client.update(facility) + +client.search("SELECT f FROM Facility f") + +# -------------------- + +for facility in client.search("SELECT f FROM Facility f"): + facility.description = None + facility.update() + +client.search("SELECT f FROM Facility f") + +# Copying objects + +fac = client.get("Facility f INCLUDE f.parameterTypes", 1) +print(fac) + +# -------------------- + +facc = fac.copy() +print(facc.name) +print(facc.parameterTypes[0].name) +facc.name = "Fac0" +facc.parameterTypes[0].name = "Test parameter type 0" +print(fac.name) +print(fac.parameterTypes[0].name) + +# -------------------- + +fac.truncateRelations() +print(fac) +print(facc) diff --git a/doc/tutorial/ids.py b/doc/tutorial/ids.py new file mode 100644 index 00000000..f3156039 --- /dev/null +++ b/doc/tutorial/ids.py @@ -0,0 +1,92 @@ +# Tutorial / Upload and download files to and from IDS +# interactive code blocks + +client.ids.isReadOnly() + +# Upload files + +users = [("jdoe", "John"), ("nbour", "Nicolas"), ("rbeck", "Rudolph")] +for user, name in users: + with open("greet-%s.txt" % user, "wt") as f: + print("Hello %s!" % name, file=f) + +# -------------------- + +from icat.query import Query +query = Query(client, "Investigation", conditions={"name": "= '12100409-ST'"}) +investigation = client.assertedSearch(query)[0] +dataset = client.new("Dataset") +dataset.investigation = investigation +query = Query(client, "DatasetType", conditions={"name": "= 'other'"}) +dataset.type = client.assertedSearch(query)[0] +dataset.name = "greetings" +dataset.complete = False +dataset.create() + +# -------------------- + +query = Query(client, "DatafileFormat", conditions={"name": "= 'Text'"}) +df_format = client.assertedSearch(query)[0] +for fname in ("greet-jdoe.txt", "greet-nbour.txt", "greet-rbeck.txt"): + datafile = client.new("Datafile", + name=fname, + dataset=dataset, + datafileFormat=df_format) + client.putData(fname, datafile) + +# Download files + +query = Query(client, "Datafile", conditions={ + "name": "= 'greet-jdoe.txt'", + "dataset.name": "= 'greetings'" +}) +df = client.assertedSearch(query)[0] +data = client.getData([df]) +type(data) +data.read().decode('utf8') + +# -------------------- + +from io import BytesIO +from zipfile import ZipFile +query = Query(client, "Dataset", conditions={"name": "= 'greetings'"}) +ds = client.assertedSearch(query)[0] +data = client.getData([ds]) +buffer = BytesIO(data.read()) +with ZipFile(buffer) as zipfile: + for f in zipfile.namelist(): + print("file name: %s" % f) + print("content: %r" % zipfile.open(f).read().decode('utf8')) + +# -------------------- + +from icat.ids import DataSelection +selection = DataSelection([ds]) +client.ids.archive(selection) + +# -------------------- + +client.ids.getStatus(selection) + +# -------------------- + +data = client.getData([ds]) + +# -------------------- + +client.ids.getStatus(selection) +data = client.getData([ds]) +len(data.read()) + +# -------------------- + +preparedId = client.prepareData(selection) +preparedId + +# -------------------- + +client.isDataPrepared(preparedId) +data = client.getData(preparedId) +buffer = BytesIO(data.read()) +with ZipFile(buffer) as zipfile: + zipfile.namelist() diff --git a/doc/tutorial/search.py b/doc/tutorial/search.py new file mode 100644 index 00000000..a697581e --- /dev/null +++ b/doc/tutorial/search.py @@ -0,0 +1,212 @@ +# Tutorial / Working with objects in the ICAT server +# interactive code blocks + +client.search("SELECT f FROM Facility f INCLUDE f.parameterTypes LIMIT 1,1") + +# Building advanced queries + +from icat.query import Query + +# -------------------- + +query = Query(client, "Investigation") +print(query) +client.search(query) + +# -------------------- + +query = Query(client, "Investigation", + conditions={"name": "= '10100601-ST'"}) +print(query) +client.search(query) + +# -------------------- + +query = Query(client, "Investigation", + conditions={"name": "= '10100601-ST'"}, + includes=["datasets"]) +print(query) +client.search(query) + +# -------------------- + +query = Query(client, "Investigation", + conditions={"LENGTH(title)": "= 18"}) +print(query) +client.search(query) + +# -------------------- + +conditions = { + "investigation.name": "= '10100601-ST'", + "parameters.type.name": "= 'Magnetic field'", + "parameters.type.units": "= 'T'", + "parameters.numericValue": "> 5.0", +} +query = Query(client, "Dataset", + conditions=conditions, includes=["parameters.type"]) +print(query) +client.search(query) + +# -------------------- + +def get_investigation(client, name, visitId=None): + query = Query(client, "Investigation") + query.addConditions({"name": "= '%s'" % name}) + if visitId is not None: + query.addConditions({"visitId": "= '%s'" % visitId}) + print(query) + return client.assertedSearch(query)[0] + +get_investigation(client, "08100122-EF") +get_investigation(client, "12100409-ST", "1.1-P") + +# -------------------- + +conditions = { + "datafileCreateTime": [">= '2012-01-01'", "< '2013-01-01'"] +} +query = Query(client, "Datafile", conditions=conditions) +print(query) +client.search(query) + +# -------------------- + +query = Query(client, "Datafile") +query.addConditions({"datafileCreateTime": ">= '2012-01-01'"}) +query.addConditions({"datafileCreateTime": "< '2013-01-01'"}) +print(query) + +# -------------------- + +query = Query(client, "Dataset", attributes="name") +print(query) +client.search(query) + +# -------------------- + +query = Query(client, "Dataset", attributes=[ + "investigation.name", "name", "complete", "type.name" +]) +print(query) +client.search(query) + +# -------------------- + +query = Query(client, "Dataset", aggregate="COUNT") +print(query) +client.search(query) + +# -------------------- + +conditions = { + "dataset.investigation.name": "= '10100601-ST'", + "type.name": "= 'Magnetic field'", + "type.units": "= 'T'", +} +query = Query(client, "DatasetParameter", + conditions=conditions, attributes="numericValue") +print(query) +client.search(query) +query.setAggregate("MIN") +print(query) +client.search(query) +query.setAggregate("MAX") +print(query) +client.search(query) +query.setAggregate("AVG") +print(query) +client.search(query) + +# -------------------- + +conditions = { + "datasets.parameters.type.name": "= 'Magnetic field'", + "datasets.parameters.type.units": "= 'T'", +} +query = Query(client, "Investigation", conditions=conditions) +print(query) +client.search(query) + +# -------------------- + +query.setAggregate("DISTINCT") +print(query) +client.search(query) + +# -------------------- + +conditions = { + "datasets.parameters.type.name": "= 'Magnetic field'", + "datasets.parameters.type.units": "= 'T'", +} +query = Query(client, "Investigation", + conditions=conditions, aggregate="COUNT") +print(query) +client.search(query) +query.setAggregate("COUNT:DISTINCT") +print(query) +client.search(query) + +# -------------------- + +order = ["type.name", "type.units", ("numericValue", "DESC")] +query = Query(client, "DatasetParameter", includes=["type"], order=order) +print(query) +client.search(query) + +# -------------------- + +query = Query(client, "User", conditions={ + "fullName": "IS NOT NULL" +}, order=[("LENGTH(fullName)", "DESC")]) +print(query) +for user in client.search(query): + print("%d: %s" % (len(user.fullName), user.fullName)) + +# -------------------- + +query = Query(client, "Dataset", + order=[("endDate", "DESC")], limit=(2, 1)) +print(query) +client.search(query) + +# Useful search methods + +res = client.search(Query(client, "Facility")) +if not res: + raise RuntimeError("Facility not found") +elif len(res) > 1: + raise RuntimeError("Facility not unique") + +facility = res[0] +facility = client.assertedSearch(Query(client, "Facility"))[0] + +# -------------------- + +for ds in client.searchChunked(Query(client, "Dataset")): + # do something useful with the dataset ds ... + print(ds.name) + +# -------------------- + +def get_dataset(client, inv_name, ds_name, ds_type="raw"): + """Get a dataset in an investigation. + If it already exists, search and return it, create it, if not. + """ + try: + dataset = client.new("Dataset") + query = Query(client, "Investigation", conditions={ + "name": "= '%s'" % inv_name + }) + dataset.investigation = client.assertedSearch(query)[0] + query = Query(client, "DatasetType", conditions={ + "name": "= '%s'" % ds_type + }) + dataset.type = client.assertedSearch(query)[0] + dataset.complete = False + dataset.name = ds_name + dataset.create() + except icat.ICATObjectExistsError: + dataset = client.searchMatching(dataset) + return dataset diff --git a/etc/ingest.xslt b/etc/ingest.xslt index 6e1e5cee..ad14d715 100644 --- a/etc/ingest.xslt +++ b/etc/ingest.xslt @@ -10,6 +10,8 @@ + + diff --git a/python-icat.spec b/python-icat.spec index 6268ad8d..d0a39b81 100644 --- a/python-icat.spec +++ b/python-icat.spec @@ -1,3 +1,13 @@ +%if 0%{?sle_version} >= 150500 +%global pyversfx 311 +%global python %__python311 +%global python_sitelib %python311_sitelib +%else +%global pyversfx 3 +%global python %__python3 +%global python_sitelib %python3_sitelib +%endif + Name: python-icat Version: $version Release: 0 @@ -5,18 +15,19 @@ Url: $url Summary: $description License: Apache-2.0 Group: Development/Libraries/Python -Source: %{name}-%{version}.tar.gz -BuildRequires: python3-base >= 3.4 -BuildRequires: python3-setuptools +Source: https://github.com/icatproject/python-icat/releases/download/%{version}/python-icat-%{version}.tar.gz +BuildRequires: python%{pyversfx}-base >= 3.4 +BuildRequires: python%{pyversfx}-setuptools +BuildRequires: fdupes +BuildRequires: python-rpm-macros BuildArch: noarch -BuildRoot: %{_tmppath}/%{name}-%{version}-build %description $long_description %package examples -Summary: Python interface to ICAT and IDS +Summary: $description Group: Documentation/Other Requires: %{name} = %{version} @@ -27,7 +38,7 @@ This package contains example scripts. %package man -Summary: Python interface to ICAT and IDS +Summary: $description Group: Documentation/Other Requires: %{name} = %{version} Requires: man @@ -38,16 +49,16 @@ $long_description This package contains the manual pages for the command line scripts. -%package -n python3-icat -Summary: Python interface to ICAT and IDS +%package -n python%{pyversfx}-icat +Summary: $description Requires: %{name} = %{version} -Requires: python3-lxml -Requires: python3-packaging -Requires: python3-suds +Requires: python%{pyversfx}-lxml +Requires: python%{pyversfx}-packaging +Requires: python%{pyversfx}-suds Recommends: %{name}-man -Recommends: python3-PyYAML +Recommends: python%{pyversfx}-PyYAML -%description -n python3-icat +%description -n python%{pyversfx}-icat $long_description @@ -56,25 +67,23 @@ $long_description %build -python3 setup.py build +%{python} setup.py build %install -python3 setup.py install --optimize=1 --prefix=%{_prefix} --root=%{buildroot} +%{python} setup.py install --optimize=1 --prefix=%{_prefix} --root=%{buildroot} for f in `ls %{buildroot}%{_bindir}` do mv %{buildroot}%{_bindir}/$$f %{buildroot}%{_bindir}/$${f%%.py} done - %__install -d -m 755 %{buildroot}%{_datadir}/icat -%__cp -p etc/ingest-10.xsd etc/ingest.xslt %{buildroot}%{_datadir}/icat - +%__cp -p etc/ingest-*.xsd etc/ingest.xslt %{buildroot}%{_datadir}/icat %__install -d -m 755 %{buildroot}%{_mandir}/man1 %__cp -p doc/man/*.1 %{buildroot}%{_mandir}/man1 - %__install -d -m 755 %{buildroot}%{_docdir}/%{name} %__cp -pr README.rst CHANGES.rst doc/* %{buildroot}%{_docdir}/%{name} %__chmod -f a-x %{buildroot}%{_docdir}/%{name}/examples/*.py +%fdupes %{buildroot}%{python_sitelib} %files @@ -96,9 +105,9 @@ done %defattr(-,root,root) %{_mandir}/man1/* -%files -n python3-icat +%files -n python%{pyversfx}-icat %defattr(-,root,root) -%{python3_sitelib}/* +%{python_sitelib}/* %{_bindir}/* diff --git a/setup.py b/setup.py index d1b8f904..ee7173e1 100755 --- a/setup.py +++ b/setup.py @@ -1,4 +1,3 @@ -#! /usr/bin/python """Python interface to ICAT and IDS This package provides a collection of modules for writing Python @@ -13,34 +12,23 @@ import setuptools.command.build_py import distutils.command.sdist from distutils import log -from glob import glob -import os -import os.path from pathlib import Path import string -import sys try: import distutils_pytest cmdclass = distutils_pytest.cmdclass except (ImportError, AttributeError): cmdclass = dict() try: - import setuptools_scm - version = setuptools_scm.get_version() + import gitprops + release = str(gitprops.get_last_release()) + version = str(gitprops.get_version()) except (ImportError, LookupError): try: - import _meta - version = _meta.__version__ + from _meta import release, version except ImportError: log.warn("warning: cannot determine version number") - version = "UNKNOWN" - - -if sys.version_info < (3, 4): - log.warn("warning: Python %d.%d is not supported! " - "This package requires Python 3.4 or newer." - % sys.version_info[:2]) - + release = version = "UNKNOWN" docstring = __doc__ @@ -49,47 +37,24 @@ class meta(setuptools.Command): description = "generate meta files" user_options = [] - init_template = '''"""%(doc)s""" - -__version__ = "%(version)s" - -# -# Default import -# - -from icat.client import * -from icat.exception import * -''' meta_template = ''' -__version__ = "%(version)s" +release = "%(release)s" +version = "%(version)s" ''' def initialize_options(self): - self.package_dir = None + pass def finalize_options(self): - self.package_dir = {} - if self.distribution.package_dir: - for name, path in self.distribution.package_dir.items(): - self.package_dir[name] = convert_path(path) + pass def run(self): version = self.distribution.get_version() log.info("version: %s", version) values = { + 'release': release, 'version': version, - 'doc': docstring, } - try: - pkgname = self.distribution.packages[0] - except IndexError: - log.warn("warning: no package defined") - else: - pkgdir = Path(self.package_dir.get(pkgname, pkgname)) - if not pkgdir.is_dir(): - pkgdir.mkdir() - with (pkgdir / "__init__.py").open("wt") as f: - print(self.init_template % values, file=f) with Path("_meta.py").open("wt") as f: print(self.meta_template % values, file=f) @@ -109,34 +74,36 @@ def run(self): self.copy_test_data() def copy_test_scripts(self): - destdir = os.path.join("tests", "scripts") - self.mkpath(destdir) + destdir = Path("tests", "scripts") + self.mkpath(str(destdir)) scripts = [] - scripts += glob(os.path.join("doc", "examples", "*.py")) - scripts += self.distribution.scripts + scripts += Path("doc", "examples").glob("*.py") + scripts += (Path(s) for s in self.distribution.scripts) for script in scripts: - dest = os.path.join(destdir, os.path.basename(script)) - self.copy_file(script, dest, preserve_mode=False) + dest = destdir / script.name + self.copy_file(str(script), str(dest), preserve_mode=False) def copy_test_data(self): - destdir = os.path.join("tests", "data") - self.mkpath(destdir) + destdir = Path("tests", "data") + self.mkpath(str(destdir)) + etc = Path("etc") + doc = Path("doc") + examples = doc / "examples" files = [] - files += [ os.path.join("doc", "examples", f) - for f in ["example_data.yaml", + files += ( examples / f + for f in ("example_data.yaml", "ingest-datafiles.xml", "ingest-ds-params.xml", - "ingest-sample-ds.xml"] ] - files += [ os.path.join("doc", "examples", - "icatdump-%s.%s" % (ver, ext)) + "ingest-sample-ds.xml") ) + files += ( examples / ("icatdump-%s.%s" % (ver, ext)) for ver in ("4.4", "4.7", "4.10", "5.0") - for ext in ("xml", "yaml") ] - files += glob(os.path.join("doc", "icatdata-*.xsd")) - files += glob(os.path.join("doc", "examples", "metadata-*.xml")) - files += [ os.path.join("etc", f) - for f in ["ingest-10.xsd", "ingest-11.xsd", "ingest.xslt"] ] + for ext in ("xml", "yaml") ) + files += doc.glob("icatdata-*.xsd") + files += examples.glob("metadata-*.xml") + files += ( etc / f + for f in ("ingest-10.xsd", "ingest-11.xsd", "ingest.xslt") ) for f in files: - dest = os.path.join(destdir, os.path.basename(f)) - self.copy_file(f, dest, preserve_mode=False) + dest = destdir / f.name + self.copy_file(str(f), str(dest), preserve_mode=False) # Note: Do not use setuptools for making the source distribution, @@ -152,8 +119,8 @@ def run(self): "description": docstring.split("\n")[0], "long_description": docstring.split("\n", maxsplit=2)[2].strip(), } - for spec in glob("*.spec"): - with Path(spec).open('rt') as inf: + for spec in Path().glob("*.spec"): + with spec.open('rt') as inf: with Path(self.dist_dir, spec).open('wt') as outf: outf.write(string.Template(inf.read()).substitute(subst)) @@ -162,6 +129,9 @@ class build_py(setuptools.command.build_py.build_py): def run(self): self.run_command('meta') super().run() + package = self.distribution.packages[0].split('.') + outfile = self.get_module_outfile(self.build_lib, package, "_meta") + self.copy_file("_meta.py", outfile, preserve_mode=0) # There are several forks of the original suds package around, most of @@ -171,7 +141,7 @@ def run(self): # one particular suds clone. Therefore, we first try if (any clone # of) suds is already installed and only add suds to install_requires # if not. -requires = ["lxml", "packaging"] +requires = ["setuptools", "lxml", "packaging"] try: import suds except ImportError: @@ -210,13 +180,20 @@ def run(self): project_urls = dict( Documentation="https://python-icat.readthedocs.io/", Source="https://github.com/icatproject/python-icat/", - Download="https://github.com/icatproject/python-icat/releases/latest", - Changes="https://python-icat.readthedocs.io/en/latest/changelog.html", + Download=("https://github.com/icatproject/python-icat/releases/%s/" + % release), + Changes=("https://python-icat.readthedocs.io/en/stable" + "/changelog.html#changes-%s" % release.replace('.', '-')), ), packages = ["icat"], + package_dir = {"": "src"}, python_requires = ">=3.4", install_requires = requires, - scripts = ["icatdump.py", "icatingest.py", "wipeicat.py"], + scripts = [ + "src/scripts/icatdump.py", + "src/scripts/icatingest.py", + "src/scripts/wipeicat.py" + ], cmdclass = dict(cmdclass, meta=meta, build_py=build_py, diff --git a/src/icat/__init__.py b/src/icat/__init__.py new file mode 100644 index 00000000..30475152 --- /dev/null +++ b/src/icat/__init__.py @@ -0,0 +1,13 @@ +"""Python interface to ICAT and IDS + +This package provides a collection of modules for writing Python +programs that access an `ICAT`_ service using the SOAP interface. It +is based on Suds and extends it with ICAT specific features. + +.. _ICAT: https://icatproject.org/ +""" + +from ._meta import version as __version__ +from .client import * +from .exception import * + diff --git a/icat/authinfo.py b/src/icat/authinfo.py similarity index 100% rename from icat/authinfo.py rename to src/icat/authinfo.py diff --git a/icat/chunkedhttp.py b/src/icat/chunkedhttp.py similarity index 100% rename from icat/chunkedhttp.py rename to src/icat/chunkedhttp.py diff --git a/icat/client.py b/src/icat/client.py similarity index 98% rename from icat/client.py rename to src/icat/client.py index 144b24a2..b18935d6 100644 --- a/icat/client.py +++ b/src/icat/client.py @@ -17,14 +17,14 @@ import suds.client import suds.sudsobject -from icat.entities import getTypeMap -from icat.entity import Entity -from icat.exception import * -from icat.helper import (Version, simpleqp_unquote, parse_attr_val, - ms_timestamp, disable_logger) -from icat.ids import * -from icat.query import Query -from icat.sslcontext import create_ssl_context, HTTPSTransport +from .entities import getTypeMap +from .entity import Entity +from .exception import * +from .helper import (Version, simpleqp_unquote, parse_attr_val, + ms_timestamp, disable_logger) +from .ids import * +from .query import Query +from .sslcontext import create_ssl_context, HTTPSTransport __all__ = ['Client'] @@ -691,6 +691,8 @@ def searchMatching(self, obj, includes=None): if a in obj.InstAttr: query.addConditions({a: "= '%s'" % v}) elif a in obj.InstRel: + if v.id is None: + raise ValueError("%s.id is not set" % a) query.addConditions({"%s.id" % a: "= %d" % v.id}) else: raise InternalError("Invalid constraint '%s' in %s." diff --git a/icat/config.py b/src/icat/config.py similarity index 99% rename from icat/config.py rename to src/icat/config.py index 9554c615..f3187df5 100644 --- a/icat/config.py +++ b/src/icat/config.py @@ -8,9 +8,10 @@ from pathlib import Path import sys import warnings -from icat.client import Client -from icat.authinfo import AuthenticatorInfo, LegacyAuthenticatorInfo -from icat.exception import ConfigError, VersionMethodError + +from .client import Client +from .authinfo import AuthenticatorInfo, LegacyAuthenticatorInfo +from .exception import ConfigError, VersionMethodError __all__ = ['boolean', 'flag', 'Configuration', 'Config'] diff --git a/icat/dump_queries.py b/src/icat/dump_queries.py similarity index 99% rename from icat/dump_queries.py rename to src/icat/dump_queries.py index 6bbf7ce2..7c8aa53b 100644 --- a/icat/dump_queries.py +++ b/src/icat/dump_queries.py @@ -32,8 +32,7 @@ into account and include the new entity types. """ -import icat -from icat.query import Query +from .query import Query __all__ = [ 'getAuthQueries', 'getStaticQueries', 'getFundingQueries', 'getInvestigationQueries', 'getDataCollectionQueries', diff --git a/icat/dumpfile.py b/src/icat/dumpfile.py similarity index 84% rename from icat/dumpfile.py rename to src/icat/dumpfile.py index ca77de96..a18832a3 100644 --- a/icat/dumpfile.py +++ b/src/icat/dumpfile.py @@ -5,53 +5,14 @@ writing ICAT data files. The actual work is done in file format specific modules that should provide subclasses that must implement the abstract methods. - -Data files are partitioned in chunks. This is done to avoid having -the whole file, e.g. the complete inventory of the ICAT, at once in -memory. The problem is that objects contain references to other -objects (e.g. Datafiles refer to Datasets, the latter refer to -Investigations, and so forth). We keep an index of the objects in -order to resolve these references. But there is a memory versus time -tradeoff: we cannot keep all the objects in the index, that would -again mean the complete inventory of the ICAT. And we can't know -beforehand which object is going to be referenced later on, so we -don't know which one to keep and which one to discard from the index. -Fortunately we can query objects we discarded once back from the ICAT -server with :meth:`icat.client.Client.searchUniqueKey`. But this is -expensive. So the strategy is as follows: keep all objects from the -current chunk in the index and discard the complete index each time a -chunk has been processed. This will work fine if objects are mostly -referencing other objects from the same chunk and only a few -references go across chunk boundaries. - -Therefore, we want these chunks to be small enough to fit into memory, -but at the same time large enough to keep as many relations between -objects as possible local in a chunk. It is in the responsibility of -the writer of the data file to create the chunks in this manner. - -The objects that get written to the data file and how this file is -organized is controlled by lists of ICAT search expressions, see -:meth:`icat.dumpfile.DumpFileWriter.writeobjs`. There is some degree -of flexibility: an object may include related objects in an -one-to-many relation, just by including them in the search expression. -In this case, these related objects should not have a search -expression on their own again. For instance, the search expression -for Grouping may include UserGroup. The UserGroups will then be -embedded in their respective grouping in the data file. There should -not be a search expression for UserGroup then. - -Objects related in a many-to-one relation must always be included in -the search expression. This is also true if the object is -indirectly related to one of the included objects. In this case, -only a reference to the related object will be included in the data -file. The related object must have its own list entry. """ from collections import ChainMap import os import sys -import icat -from icat.query import Query + +from .entity import Entity +from .query import Query def _get_retain_entities(client): @@ -138,6 +99,9 @@ def getdata(self): specific to the implementing backend and should be passed as the `data` argument to :meth:`~icat.dumpfile.DumpFileReader.getobjs_from_data`. + + This abstract method must be implemented in the file format + specific backend. """ raise NotImplementedError @@ -146,6 +110,9 @@ def getobjs_from_data(self, data, objindex): Yield a new entity object in each iteration. The object is initialized from the data, but not yet created at the client. + + This abstract method must be implemented in the file format + specific backend. """ raise NotImplementedError @@ -236,7 +203,11 @@ def __exit__(self, type, value, traceback): self.outfile.close() def head(self): - """Write a header with some meta information to the data file.""" + """Write a header with some meta information to the data file. + + This abstract method must be implemented in the file format + specific backend. + """ raise NotImplementedError def startdata(self): @@ -244,15 +215,26 @@ def startdata(self): If the current chunk contains any data, write it to the data file. + + This abstract method must be implemented in the file format + specific backend. """ raise NotImplementedError def writeobj(self, key, obj, keyindex): - """Add an entity object to the current data chunk.""" + """Add an entity object to the current data chunk. + + This abstract method must be implemented in the file format + specific backend. + """ raise NotImplementedError def finalize(self): - """Finalize the data file.""" + """Finalize the data file. + + This abstract method must be implemented in the file format + specific backend. + """ raise NotImplementedError def writeobjs(self, objs, keyindex, chunksize=100): @@ -293,7 +275,7 @@ def writeobjs(self, objs, keyindex, chunksize=100): """ if isinstance(objs, Query) or isinstance(objs, str): objs = self.client.searchChunked(objs, chunksize=chunksize) - for obj in sorted(objs, key=icat.entity.Entity.__sortkey__): + for obj in sorted(objs, key=Entity.__sortkey__): # Entities without a constraint will use their id to form # the unique key as a last resort. But we want the keys # not to depend on volatile attributes such as the id. diff --git a/icat/dumpfile_xml.py b/src/icat/dumpfile_xml.py similarity index 92% rename from icat/dumpfile_xml.py rename to src/icat/dumpfile_xml.py index db676b13..2373c4ea 100644 --- a/icat/dumpfile_xml.py +++ b/src/icat/dumpfile_xml.py @@ -5,9 +5,12 @@ import os import sys from lxml import etree -import icat -import icat.dumpfile -from icat.query import Query + +from . import __version__ +from .dumpfile import DumpFileReader, DumpFileWriter, register_backend +from .entity import Entity +from .exception import SearchResultError +from .query import Query utc = datetime.timezone.utc @@ -16,7 +19,7 @@ # XMLDumpFileReader # ------------------------------------------------------------ -class XMLDumpFileReader(icat.dumpfile.DumpFileReader): +class XMLDumpFileReader(DumpFileReader): """Backend for reading ICAT data from a XML file. :param client: a client object configured to connect to the ICAT @@ -58,7 +61,10 @@ def _searchByReference(self, element, objtype, objindex): ref = element.get('ref') if ref: # object is referenced by key. - return self.client.searchUniqueKey(ref, objindex) + try: + return self.client.searchUniqueKey(ref, objindex) + except ValueError: + raise SearchResultError("invalid reference %s" % ref) else: # object is referenced by attributes. attrs = set(element.keys()) - {'id'} @@ -138,7 +144,7 @@ def getobjs_from_data(self, data, objindex): # XMLDumpFileWriter # ------------------------------------------------------------ -class XMLDumpFileWriter(icat.dumpfile.DumpFileWriter): +class XMLDumpFileWriter(DumpFileWriter): """Backend for writing ICAT data to a XML file. :param client: a client object configured to connect to the ICAT @@ -200,8 +206,7 @@ def _entity2elem(self, obj, tag, keyindex): k = o.getUniqueKey(keyindex=keyindex) etree.SubElement(d, attr, ref=k) for attr in sorted(obj.InstMRel): - for o in sorted(getattr(obj, attr), - key=icat.entity.Entity.__sortkey__): + for o in sorted(getattr(obj, attr), key=Entity.__sortkey__): d.append(self._entity2elem(o, tag=attr, keyindex=keyindex)) return d @@ -213,7 +218,7 @@ def head(self): etree.SubElement(head, "service").text = self.client.url etree.SubElement(head, "apiversion").text = str(self.client.apiversion) etree.SubElement(head, "generator").text = ("icatdump (python-icat %s)" - % icat.__version__) + % __version__) self.outfile.write(b""" """) @@ -241,4 +246,4 @@ def finalize(self): self.outfile.write(b"\n") -icat.dumpfile.register_backend("XML", XMLDumpFileReader, XMLDumpFileWriter) +register_backend("XML", XMLDumpFileReader, XMLDumpFileWriter) diff --git a/icat/dumpfile_yaml.py b/src/icat/dumpfile_yaml.py similarity index 91% rename from icat/dumpfile_yaml.py rename to src/icat/dumpfile_yaml.py index 091ae537..6203543f 100644 --- a/icat/dumpfile_yaml.py +++ b/src/icat/dumpfile_yaml.py @@ -3,8 +3,11 @@ import datetime import yaml -import icat -import icat.dumpfile + +from . import __version__ +from .dumpfile import DumpFileReader, DumpFileWriter, register_backend +from .entity import Entity +from .exception import SearchResultError utc = datetime.timezone.utc @@ -69,7 +72,7 @@ # YAMLDumpFileReader # ------------------------------------------------------------ -class YAMLDumpFileReader(icat.dumpfile.DumpFileReader): +class YAMLDumpFileReader(DumpFileReader): """Backend for reading ICAT data from a YAML file. :param client: a client object configured to connect to the ICAT @@ -98,7 +101,10 @@ def _dict2entity(self, d, objtype, objindex): if attr in obj.InstAttr: setattr(obj, attr, d[k]) elif attr in obj.InstRel: - robj = self.client.searchUniqueKey(d[k], objindex) + try: + robj = self.client.searchUniqueKey(d[k], objindex) + except ValueError: + raise SearchResultError("invalid reference %s" % d[k]) setattr(obj, attr, robj) elif attr in obj.InstMRel: rtype = self.insttypemap[obj.getAttrType(attr)] @@ -138,7 +144,7 @@ def getobjs_from_data(self, data, objindex): # YAMLDumpFileWriter # ------------------------------------------------------------ -class YAMLDumpFileWriter(icat.dumpfile.DumpFileWriter): +class YAMLDumpFileWriter(DumpFileWriter): """Backend for writing ICAT data to a YAML file. :param client: a client object configured to connect to the ICAT @@ -190,8 +196,7 @@ def _entity2dict(self, obj, keyindex): for attr in obj.InstMRel: if len(getattr(obj, attr)) > 0: d[attr] = [] - for o in sorted(getattr(obj, attr), - key=icat.entity.Entity.__sortkey__): + for o in sorted(getattr(obj, attr), key=Entity.__sortkey__): d[attr].append(self._entity2dict(o, keyindex=keyindex)) return d @@ -204,7 +209,7 @@ def head(self): # Service: %s # ICAT-API: %s # Generator: icatdump (python-icat %s) -""" % (date, self.client.url, self.client.apiversion, icat.__version__) +""" % (date, self.client.url, self.client.apiversion, __version__) self.outfile.write(head) def startdata(self): @@ -232,4 +237,4 @@ def finalize(self): self.startdata() -icat.dumpfile.register_backend("YAML", YAMLDumpFileReader, YAMLDumpFileWriter) +register_backend("YAML", YAMLDumpFileReader, YAMLDumpFileWriter) diff --git a/icat/entities.py b/src/icat/entities.py similarity index 99% rename from icat/entities.py rename to src/icat/entities.py index c6b90753..7761aeef 100644 --- a/icat/entities.py +++ b/src/icat/entities.py @@ -17,8 +17,9 @@ """ import itertools -from icat.entity import Entity -from icat.exception import InternalError + +from .entity import Entity +from .exception import InternalError class GroupingMixin: @@ -241,7 +242,6 @@ def getTypeMap(client): may be used as :attr:`icat.client.Client.typemap` for the client object. :rtype: :class:`dict` - """ def addType(typemap, cls): instanceName = cls.getInstanceName() diff --git a/icat/entity.py b/src/icat/entity.py similarity index 99% rename from icat/entity.py rename to src/icat/entity.py index 71dd894d..8a778fe1 100644 --- a/icat/entity.py +++ b/src/icat/entity.py @@ -4,9 +4,10 @@ import re from warnings import warn import suds.sudsobject -from icat.listproxy import ListProxy -from icat.exception import InternalError, EntityTypeError, DataConsistencyError -from icat.helper import simpleqp_quote + +from .listproxy import ListProxy +from .exception import InternalError, EntityTypeError, DataConsistencyError +from .helper import simpleqp_quote __all__ = ['Entity'] diff --git a/icat/eval.py b/src/icat/eval.py similarity index 91% rename from icat/eval.py rename to src/icat/eval.py index d21e9df4..b5acba43 100644 --- a/icat/eval.py +++ b/src/icat/eval.py @@ -12,14 +12,13 @@ """ import logging -import icat -import icat.config +from .config import Config if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - config = icat.config.Config(ids="optional") + config = Config(ids="optional") config.add_variable('expression', ("-e", "--eval"), dict(help="Python expression to evaluate")) client, conf = config.getconfig() diff --git a/icat/exception.py b/src/icat/exception.py similarity index 100% rename from icat/exception.py rename to src/icat/exception.py diff --git a/icat/helper.py b/src/icat/helper.py similarity index 100% rename from icat/helper.py rename to src/icat/helper.py diff --git a/icat/ids.py b/src/icat/ids.py similarity index 99% rename from icat/ids.py rename to src/icat/ids.py index 8df49d4a..42ee136f 100644 --- a/icat/ids.py +++ b/src/icat/ids.py @@ -20,16 +20,16 @@ from urllib.request import build_opener import zlib -from icat.entity import Entity -from icat.exception import * -from icat.helper import Version +from .entity import Entity +from .exception import * +from .helper import Version # For Python versions older then 3.6.0b1, the standard library does # not support sending the body using chunked transfer encoding. Need # to replace the HTTPHandler with our modified versions from # icat.chunkedhttp in this case. if sys.version_info < (3, 6, 0, 'beta'): - from icat.chunkedhttp import HTTPHandler, HTTPSHandler + from .chunkedhttp import HTTPHandler, HTTPSHandler else: from urllib.request import HTTPHandler, HTTPSHandler diff --git a/icat/ingest.py b/src/icat/ingest.py similarity index 56% rename from icat/ingest.py rename to src/icat/ingest.py index 8e62008a..0e513526 100644 --- a/icat/ingest.py +++ b/src/icat/ingest.py @@ -8,13 +8,46 @@ .. versionadded:: 1.1.0 """ +from collections import namedtuple from pathlib import Path from lxml import etree -import icat.dumpfile_xml -from icat.exception import InvalidIngestFileError +from .dumpfile_xml import XMLDumpFileReader +from .exception import InvalidIngestFileError -class IngestReader(icat.dumpfile_xml.XMLDumpFileReader): + +_ObjIdTuple = namedtuple('_ObjIdTuple', ['t', 'dsname', 'relid']) +class _ObjId(_ObjIdTuple): + _MsgTemplate = { + 'Dataset': + "Dataset, name:%(dsname)s", + 'DatasetInstrument': + "DatasetInstrument, Dataset:%(dsname)s, Instrument:%(relid)d", + 'DatasetTechnique': + "DatasetTechnique, Dataset:%(dsname)s, Technique:%(relid)d", + 'DatasetParameter': + "DatasetParameter, Dataset:%(dsname)s, ParameterType:%(relid)d", + } + def __new__(cls, obj): + kwargs = dict(t=obj.BeanName, relid=None) + if obj.BeanName == "Dataset": + kwargs['dsname'] = obj.name + else: + kwargs['dsname'] = obj.dataset.name + if obj.BeanName == "DatasetInstrument": + kwargs['relid'] = obj.instrument.id + elif obj.BeanName == "DatasetTechnique": + kwargs['relid'] = obj.technique.id + elif obj.BeanName == "DatasetParameter": + kwargs['relid'] = obj.type.id + else: + raise InvalidIngestFileError("Invalid %s object" + % (obj.BeanName)) + return super().__new__(cls, **kwargs) + def __str__(self): + return self._MsgTemplate[self.t] % self._asdict() + +class IngestReader(XMLDumpFileReader): """Read metadata from XML ingest files into ICAT. The input file may contain one or more datasets and related @@ -36,6 +69,14 @@ class IngestReader(icat.dumpfile_xml.XMLDumpFileReader): :type investigation: :class:`icat.entity.Entity` :raise icat.exception.InvalidIngestFileError: if the input in metadata is not valid. + + .. versionchanged:: 1.3.0 + drop class attribute :attr:`~icat.ingest.IngestReader.XSLT_name` + in favour of :attr:`~icat.ingest.IngestReader.XSLT_Map`. + + .. versionchanged:: 1.3.0 + inject an element ``_environment`` as first child of the root + element into the input data. """ SchemaDir = Path("/usr/share/icat") @@ -49,8 +90,14 @@ class IngestReader(icat.dumpfile_xml.XMLDumpFileReader): element name and version attribute, the values are the corresponding name of the XSD file. """ - XSLT_name = "ingest.xslt" - """The name of the XSLT file to use. + XSLT_Map = { + 'icatingest': "ingest.xslt", + } + """A mapping to select the XSLT file to use. Keys are the root + element name, the values are the corresponding name of the XSLT + file. + + .. versionadded:: 1.3.0 """ def __init__(self, client, metadata, investigation): @@ -65,8 +112,11 @@ def __init__(self, client, metadata, investigation): raise InvalidIngestFileError(e) with self.get_xsd(ingest_data).open("rb") as f: schema = etree.XMLSchema(etree.parse(f)) - if not schema.validate(ingest_data): - raise InvalidIngestFileError("validation failed") + try: + schema.assertValid(ingest_data) + except etree.DocumentInvalid as exc: + raise InvalidIngestFileError("DocumentInvalid: %s" % exc) + self.add_environment(client, ingest_data) with self.get_xslt(ingest_data).open("rb") as f: xslt = etree.XSLT(etree.parse(f)) super().__init__(client, xslt(ingest_data)) @@ -105,9 +155,11 @@ def get_xsd(self, ingest_data): def get_xslt(self, ingest_data): """Get the XSLT file. - Take :attr:`~icat.ingest.IngestReader.XSLT_name` as a file - name relative to :attr:`~icat.ingest.IngestReader.SchemaDir` - and return this path. + Inspect the root element in the input data and lookup the + element name in :attr:`~icat.ingest.IngestReader.XSLT_Map`. + The value is taken as a file name relative to + :attr:`~icat.ingest.IngestReader.SchemaDir` and this path is + returned. Subclasses may override this method to customize the XSLT file to use. These derived versions may inspect the input data to @@ -119,8 +171,68 @@ def get_xslt(self, ingest_data): :type ingest_data: :class:`lxml.etree._ElementTree` :return: path to the XSLT file. :rtype: :class:`~pathlib.Path` + :raise icat.exception.InvalidIngestFileError: if the root + element name could not be found in + :attr:`~icat.ingest.IngestReader.XSLT_Map`. + + .. versionchanged:: 1.3.0 + lookup the root element name in + :attr:`~icat.ingest.IngestReader.XSLT_Map` rather than + using a static file name. + """ + root = ingest_data.getroot() + try: + xslt = self.XSLT_Map[root.tag] + except KeyError: + raise InvalidIngestFileError("unknown format") + return self.SchemaDir / xslt + + def get_environment(self, client): + """Get the environment to be injected as an element into the input. + + Subclasses may override this method to control the attributes + set in the environment. + + :param client: the client object being used by this + IngestReader. + :type client: :class:`icat.client.Client` + :return: the environment. + :rtype: :class:`dict` + + .. versionadded:: 1.3.0 """ - return self.SchemaDir / self.XSLT_name + return dict(icat_version=str(client.apiversion)) + + def add_environment(self, client, ingest_data): + """Inject environment information into input data. + + The attributes set in the environment are determined by + calling :meth:`~icat.ingest.IngestReader.get_environment`. + Subclasses may override this method to fully control the + process of adding the environment element. + + :param client: the client object being used by this + IngestReader. + :type client: :class:`icat.client.Client` + :param ingest_data: input data + :type ingest_data: :class:`lxml.etree._ElementTree` + + .. versionadded:: 1.3.0 + """ + env = self.get_environment(client) + env_elem = etree.Element("_environment", **env) + ingest_data.getroot().insert(0, env_elem) + + def getobjs_from_data(self, data, objindex): + typed_objindex = set() + for key, obj in super().getobjs_from_data(data, objindex): + if key in objindex: + raise InvalidIngestFileError("Duplicate id %s" % key) + objid = _ObjId(obj) + if objid in typed_objindex: + raise InvalidIngestFileError("Duplicate %s" % str(objid)) + typed_objindex.add(objid) + yield key, obj def getobjs(self): """Iterate over the objects in the ingest file. @@ -142,11 +254,16 @@ def ingest(self, datasets, dry_run=False, update_ds=False): created in ICAT. In this case, the `datasets` in the argument must already have been created in ICAT beforehand (e.g. the `id` attribute must be set). If `dry_run` is :const:`True`, - the `datasets` don't need to be created beforehand. + the objects in the metadata will be checked for conformance, + but nothing will be committed to ICAT. In this case, the + `datasets` don't need to be created beforehand. if `update_ds` is :const:`True`, the objects in the `datasets` argument will be updated: the attributes and the relations to other objects will be set to the values read from the input. + This is particularly useful in conjunction with `dry_run` in + order to update the `datasets` from the metadata prior to + creating them in ICAT. :param datasets: list of allowed datasets in the input. :type datasets: iterable of :class:`icat.entity.Entity` @@ -155,8 +272,11 @@ def ingest(self, datasets, dry_run=False, update_ds=False): :param update_ds: flag whether to update the `datasets` in the argument. :type update_ds: :class:`bool` - :raise icat.exception.InvalidIngestFileError: if any unallowed - object is read from the input. + :raise icat.exception.InvalidIngestFileError: if the input is + not valid, for instance if there is any unallowed object + or duplicate objects. + :raise icat.exception.SearchResultError: if any object + references in the input could not be resolved. """ dataset_map = { ds.name: ds for ds in datasets } allowed_ds_related = { diff --git a/icat/listproxy.py b/src/icat/listproxy.py similarity index 100% rename from icat/listproxy.py rename to src/icat/listproxy.py diff --git a/icat/query.py b/src/icat/query.py similarity index 99% rename from icat/query.py rename to src/icat/query.py index ccbe7c1e..975cc2e8 100644 --- a/icat/query.py +++ b/src/icat/query.py @@ -5,8 +5,9 @@ import re from warnings import warn from collections.abc import Mapping -import icat.entity -from icat.exception import * + +from .entity import Entity +from .exception import * __all__ = ['Query'] @@ -119,7 +120,7 @@ def __init__(self, client, entity, if isinstance(entity, str): self.entity = self.client.getEntityClass(entity) - elif issubclass(entity, icat.entity.Entity): + elif issubclass(entity, Entity): if (entity in self.client.typemap.values() and entity.BeanName is not None): self.entity = entity diff --git a/icat/sslcontext.py b/src/icat/sslcontext.py similarity index 100% rename from icat/sslcontext.py rename to src/icat/sslcontext.py diff --git a/icatdump.py b/src/scripts/icatdump.py old mode 100755 new mode 100644 similarity index 100% rename from icatdump.py rename to src/scripts/icatdump.py diff --git a/icatingest.py b/src/scripts/icatingest.py old mode 100755 new mode 100644 similarity index 100% rename from icatingest.py rename to src/scripts/icatingest.py diff --git a/wipeicat.py b/src/scripts/wipeicat.py old mode 100755 new mode 100644 similarity index 100% rename from wipeicat.py rename to src/scripts/wipeicat.py diff --git a/tests/conftest.py b/tests/conftest.py index 25c01dcb..104901da 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -159,6 +159,20 @@ def require_dumpfile_backend(backend): _skip("need %s backend for icat.dumpfile" % (backend)) +def get_icatdata_schema(): + if icat_version < "4.4": + fname = "icatdata-4.3.xsd" + elif icat_version < "4.7": + fname = "icatdata-4.4.xsd" + elif icat_version < "4.10": + fname = "icatdata-4.7.xsd" + elif icat_version < "5.0": + fname = "icatdata-4.10.xsd" + else: + fname = "icatdata-5.0.xsd" + return gettestdata(fname) + + def get_reference_dumpfile(ext = "yaml"): require_icat_version("4.4.0", "oldest available set of test data") if icat_version < "4.7": diff --git a/tests/data/ingest-env.xslt b/tests/data/ingest-env.xslt new file mode 100644 index 00000000..8e0eb4e7 --- /dev/null +++ b/tests/data/ingest-env.xslt @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + 2024-01-22T14:30:51+01:00 + + + + ingest-env.xslt + + + + + + + + + + + + + false + + + + + + + + + + + + + + + + _Investigation + + + + + + + + + diff --git a/tests/data/metadata-5.0-badref.xml b/tests/data/metadata-5.0-badref.xml deleted file mode 100644 index 252a69cb..00000000 --- a/tests/data/metadata-5.0-badref.xml +++ /dev/null @@ -1,29 +0,0 @@ - - - - - 2023-06-16T11:01:15+02:00 - metadata-writer 0.27a - - - - e208339 - - - - - - - - - - - very evil - - - - - diff --git a/tests/data/metadata-custom-icatingest.xml b/tests/data/metadata-custom-icatingest.xml new file mode 100644 index 00000000..5e3c9b68 --- /dev/null +++ b/tests/data/metadata-custom-icatingest.xml @@ -0,0 +1,15 @@ + + + + 2023-06-16T11:01:15+02:00 + metadata-writer 0.29 + + + + testingest_custom_icatingest_1 + Dy01Cp02 at 2.7 K + 2022-02-03T15:40:12+01:00 + 2022-02-03T17:04:22+01:00 + + + diff --git a/tests/data/metadata-custom-myingest.xml b/tests/data/metadata-custom-myingest.xml new file mode 100644 index 00000000..e1c659f1 --- /dev/null +++ b/tests/data/metadata-custom-myingest.xml @@ -0,0 +1,15 @@ + + + + 2023-06-16T11:01:15+02:00 + metadata-writer 0.29 + + + + testingest_custom_myingest_1 + Dy01Cp02 at 2.7 K + 2022-02-03T15:40:12+01:00 + 2022-02-03T17:04:22+01:00 + + + diff --git a/tests/data/myingest.xsd b/tests/data/myingest.xsd new file mode 100644 index 00000000..5a086142 --- /dev/null +++ b/tests/data/myingest.xsd @@ -0,0 +1,138 @@ + + + + + + + Schema definition for ingest files to ICAT. + Test schema for testing the use of a custom schema. + Version 1.0. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/myingest.xslt b/tests/data/myingest.xslt new file mode 100644 index 00000000..7b7f591b --- /dev/null +++ b/tests/data/myingest.xslt @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + + + + + + + + + + + + false + + + + + + + + + + + x-ray + + + + + + + + + + + + + + + _Investigation + + + + + + + + + diff --git a/tests/pytest.ini b/tests/pytest.ini index b52d7707..a6d9be1f 100644 --- a/tests/pytest.ini +++ b/tests/pytest.ini @@ -1,4 +1,2 @@ [pytest] minversion = 3.1.0 -markers = - dependency: mark dependencies between tests. \ No newline at end of file diff --git a/tests/test_06_client.py b/tests/test_06_client.py index b36cefb1..8702316a 100644 --- a/tests/test_06_client.py +++ b/tests/test_06_client.py @@ -498,3 +498,37 @@ def test_searchMatching_include(client): assert obj.name == "e208945" assert len(obj.datafiles) > 0 +def test_searchMatching_error_attribute_missing(client): + """Test error handling with searchMatching(): + leaving out a required attribute + """ + facility = client.assertedSearch("Facility")[0] + # Neglect to set visitId + investigation = client.new("Investigation", + name="12100409-ST", + facility=facility) + with pytest.raises(ValueError): + obj = client.searchMatching(investigation) + +def test_searchMatching_error_relation_missing(client): + """Test error handling with searchMatching(): + leaving out a required many-to-one relation + """ + facility = client.assertedSearch("Facility")[0] + # Neglect to set facility + investigation = client.new("Investigation", + name="12100409-ST", visitId="1.1-P") + with pytest.raises(ValueError): + obj = client.searchMatching(investigation) + +def test_searchMatching_error_relation_id_missing(client): + """Test error handling with searchMatching(): + a required many-to-one relation has no id + """ + facility = client.assertedSearch("Facility")[0] + fac = client.new("Facility", name=str(facility.name)) + investigation = client.new("Investigation", + name="12100409-ST", visitId="1.1-P", + facility=fac) + with pytest.raises(ValueError): + obj = client.searchMatching(investigation) diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py index 03a84de4..baa0c73c 100644 --- a/tests/test_06_ingest.py +++ b/tests/test_06_ingest.py @@ -3,6 +3,8 @@ from collections import namedtuple import datetime +import io +import logging import pytest pytest.importorskip("lxml") from lxml import etree @@ -10,8 +12,10 @@ import icat.config from icat.ingest import IngestReader from icat.query import Query -from conftest import getConfig, gettestdata, icat_version, testdatadir +from conftest import (getConfig, gettestdata, icat_version, + get_icatdata_schema, testdatadir) +logger = logging.getLogger(__name__) def get_test_investigation(client): query = Query(client, "Investigation", conditions={ @@ -19,6 +23,11 @@ def get_test_investigation(client): }) return client.assertedSearch(query)[0] +class NamedBytesIO(io.BytesIO): + def __init__(self, initial_bytes, name): + super().__init__(initial_bytes) + self.name = name + @pytest.fixture(scope="module") def client(setupicat): client, conf = getConfig(confSection="ingest", ids=False) @@ -56,17 +65,31 @@ def investigation(client, cleanup_objs): def schemadir(monkeypatch): monkeypatch.setattr(IngestReader, "SchemaDir", testdatadir) + +class MyIngestReader(IngestReader): + """Testting a customized IngestReader + """ + XSD_Map = { + ('icatingest', '1.0'): "ingest-10.xsd", + ('icatingest', '1.1'): "ingest-11.xsd", + ('myingest', '1.0'): "myingest.xsd", + } + XSLT_Map = { + 'icatingest': "ingest.xslt", + 'myingest': "myingest.xslt", + } + + cet = datetime.timezone(datetime.timedelta(hours=1)) cest = datetime.timezone(datetime.timedelta(hours=2)) -Case = namedtuple('Case', ['data', 'metadata', 'schema', 'checks', 'marks']) +Case = namedtuple('Case', ['data', 'metadata', 'checks', 'marks']) # Try out different variants for the metadata input file cases = [ Case( data = ["testingest_inl_1", "testingest_inl_2"], metadata = gettestdata("metadata-4.4-inl.xml"), - schema = gettestdata("icatdata-4.4.xsd"), checks = { "testingest_inl_1": [ ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", @@ -106,7 +129,6 @@ def schemadir(monkeypatch): Case( data = ["testingest_inl5_1", "testingest_inl5_2"], metadata = gettestdata("metadata-5.0-inl.xml"), - schema = gettestdata("icatdata-5.0.xsd"), checks = { "testingest_inl5_1": [ ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", @@ -165,7 +187,6 @@ def schemadir(monkeypatch): Case( data = ["testingest_sep_1", "testingest_sep_2"], metadata = gettestdata("metadata-4.4-sep.xml"), - schema = gettestdata("icatdata-4.4.xsd"), checks = { "testingest_sep_1": [ ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", @@ -205,7 +226,6 @@ def schemadir(monkeypatch): Case( data = ["testingest_sep5_1", "testingest_sep5_2"], metadata = gettestdata("metadata-5.0-sep.xml"), - schema = gettestdata("icatdata-5.0.xsd"), checks = { "testingest_sep5_1": [ ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", @@ -265,7 +285,6 @@ def schemadir(monkeypatch): data = [ "testingest_sample_1", "testingest_sample_2", "testingest_sample_3", "testingest_sample_4" ], metadata = gettestdata("metadata-sample.xml"), - schema = gettestdata("icatdata-4.4.xsd"), checks = { "testingest_sample_1": [ ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", @@ -336,9 +355,9 @@ def test_ingest_schema(client, investigation, schemadir, case): for name in case.data: datasets.append(client.new("Dataset", name=name)) reader = IngestReader(client, case.metadata, investigation) - with case.schema.open("rb") as f: + with get_icatdata_schema().open("rb") as f: schema = etree.XMLSchema(etree.parse(f)) - assert schema.validate(reader.infile) + schema.assertValid(reader.infile) @pytest.mark.parametrize("case", [ pytest.param(c, id=c.metadata.name, marks=c.marks) for c in cases @@ -361,26 +380,362 @@ def test_ingest(client, investigation, samples, schemadir, case): for query, res in case.checks[name]: assert client.assertedSearch(query % ds.id)[0] == res +io_metadata = NamedBytesIO(""" + + + 2023-06-16T11:01:15+02:00 + metadata-writer 0.27a + + + + testingest_io_1 + Dy01Cp02 at 10.2 K + 2022-02-03T15:40:12+01:00 + 2022-02-03T17:04:22+01:00 + + neutron + + + + + +""".encode("utf8"), "io_metadata") +io_cases = [ + Case( + data = ["testingest_io_1"], + metadata = io_metadata, + checks = { + "testingest_io_1": [ + ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", + "Dy01Cp02 at 10.2 K"), + (("SELECT p.stringValue FROM DatasetParameter p " + "JOIN p.dataset AS ds JOIN p.type AS t " + "WHERE ds.id = %d AND t.name = 'Probe'"), + "neutron"), + ], + }, + marks = (), + ), +] + +@pytest.mark.parametrize("case", [ + pytest.param(c, id=c.metadata.name, marks=c.marks) for c in io_cases +]) +def test_ingest_fileobj(client, investigation, samples, schemadir, case): + """Test ingest reading from a file object rather than a Path + """ + datasets = [] + for name in case.data: + datasets.append(client.new("Dataset", name=name)) + reader = IngestReader(client, case.metadata, investigation) + reader.ingest(datasets, dry_run=True, update_ds=True) + for ds in datasets: + ds.create() + reader.ingest(datasets) + for name in case.checks.keys(): + query = Query(client, "Dataset", conditions={ + "name": "= '%s'" % name, + "investigation.id": "= %d" % investigation.id, + }) + ds = client.assertedSearch(query)[0] + for query, res in case.checks[name]: + assert client.assertedSearch(query % ds.id)[0] == res -badcases = [ + +invalid_root_metadata = NamedBytesIO(""" + + + 2023-06-16T11:01:15+02:00 + metadata-writer 0.27a + + + +""".encode("utf8"), "invalid_root") +invalid_ver_metadata = NamedBytesIO(""" + + + 2023-06-16T11:01:15+02:00 + metadata-writer 0.27a + + + +""".encode("utf8"), "invalid_version") +invalid_ref_metadata = NamedBytesIO(""" + + + 2023-06-16T11:01:15+02:00 + metadata-writer 0.27a + + + + testingest_err_invalid_ref + + + very evil + + + + + +""".encode("utf8"), "invalid_ref") +invalid_dup_metadata = NamedBytesIO(""" + + + 2023-06-16T11:01:15+02:00 + metadata-writer 0.27a + + + + testingest_err_invalid_dup + + + 10.0 + + + + + 17.0 + + + + + +""".encode("utf8"), "invalid_dup") +invalid_dup_id_metadata = NamedBytesIO(""" + + + 2023-06-16T11:01:15+02:00 + metadata-writer 0.27a + + + + testingest_err_invalid_dup_id_1 + + + testingest_err_invalid_dup_id_2 + + + 10.0 + + + + + +""".encode("utf8"), "invalid_dup_id") +invalid_cases = [ Case( - data = ["e208339"], - metadata = gettestdata("metadata-5.0-badref.xml"), - schema = gettestdata("icatdata-5.0.xsd"), + data = [], + metadata = invalid_root_metadata, checks = {}, - marks = ( - pytest.mark.skipif(icat_version < "5.0", - reason="Need ICAT schema 5.0 or newer"), - ), + marks = (), + ), + Case( + data = [], + metadata = invalid_ver_metadata, + checks = {}, + marks = (), + ), + Case( + data = ["testingest_err_invalid_ref"], + metadata = invalid_ref_metadata, + checks = {}, + marks = (), + ), + Case( + data = ["testingest_err_invalid_dup"], + metadata = invalid_dup_metadata, + checks = {}, + marks = (), + ), + Case( + data = ["testingest_err_invalid_dup_id_1", + "testingest_err_invalid_dup_id_2"], + metadata = invalid_dup_id_metadata, + checks = {}, + marks = (), + ), +] +@pytest.mark.parametrize("case", [ + pytest.param(c, id=c.metadata.name, marks=c.marks) for c in invalid_cases +]) +def test_ingest_error_invalid(client, investigation, schemadir, case): + datasets = [] + for name in case.data: + datasets.append(client.new("Dataset", name=name)) + with pytest.raises(icat.InvalidIngestFileError) as exc: + reader = IngestReader(client, case.metadata, investigation) + reader.ingest(datasets, dry_run=True, update_ds=True) + logger.info("Raised %s: %s", exc.type.__name__, exc.value) + +searcherr_attr_metadata = NamedBytesIO(""" + + + 2023-06-16T11:01:15+02:00 + metadata-writer 0.27a + + + + testingest_err_search_attr + + + 10.0 + + + + + +""".encode("utf8"), "search_attr") +searcherr_ref_metadata = NamedBytesIO(""" + + + 2023-06-16T11:01:15+02:00 + metadata-writer 0.27a + + + + testingest_err_search_ref + + + 10.0 + + + + + +""".encode("utf8"), "search_ref") +searcherr_cases = [ + Case( + data = ["testingest_err_search_attr"], + metadata = searcherr_attr_metadata, + checks = {}, + marks = (), + ), + Case( + data = ["testingest_err_search_ref"], + metadata = searcherr_ref_metadata, + checks = {}, + marks = (), ), ] @pytest.mark.parametrize("case", [ - pytest.param(c, id=c.metadata.name, marks=c.marks) for c in badcases + pytest.param(c, id=c.metadata.name, marks=c.marks) for c in searcherr_cases ]) -def test_badref_ingest(client, investigation, schemadir, case): +def test_ingest_error_searcherr(client, investigation, schemadir, case): datasets = [] for name in case.data: datasets.append(client.new("Dataset", name=name)) - with pytest.raises(icat.InvalidIngestFileError): + with pytest.raises(icat.SearchResultError) as exc: reader = IngestReader(client, case.metadata, investigation) reader.ingest(datasets, dry_run=True, update_ds=True) + logger.info("Raised %s: %s", exc.type.__name__, exc.value) + + +customcases = [ + Case( + data = ["testingest_custom_icatingest_1"], + metadata = gettestdata("metadata-custom-icatingest.xml"), + checks = { + "testingest_custom_icatingest_1": [ + ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", + "Dy01Cp02 at 2.7 K"), + ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", + datetime.datetime(2022, 2, 3, 15, 40, 12, tzinfo=cet)), + ("SELECT ds.endDate FROM Dataset ds WHERE ds.id = %d", + datetime.datetime(2022, 2, 3, 17, 4, 22, tzinfo=cet)), + (("SELECT COUNT(p) FROM DatasetParameter p " + "JOIN p.dataset AS ds " + "WHERE ds.id = %d"), + 0), + ], + }, + marks = (), + ), + Case( + data = ["testingest_custom_myingest_1"], + metadata = gettestdata("metadata-custom-myingest.xml"), + checks = { + "testingest_custom_myingest_1": [ + ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", + "My Ingest: Dy01Cp02 at 2.7 K"), + ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", + datetime.datetime(2022, 2, 3, 15, 40, 12, tzinfo=cet)), + ("SELECT ds.endDate FROM Dataset ds WHERE ds.id = %d", + datetime.datetime(2022, 2, 3, 17, 4, 22, tzinfo=cet)), + (("SELECT COUNT(p) FROM DatasetParameter p " + "JOIN p.dataset AS ds " + "WHERE ds.id = %d"), + 1), + (("SELECT p.stringValue FROM DatasetParameter p " + "JOIN p.dataset AS ds JOIN p.type AS t " + "WHERE ds.id = %d AND t.name = 'Probe'"), + "x-ray"), + ], + }, + marks = (), + ), +] +@pytest.mark.parametrize("case", [ + pytest.param(c, id=c.metadata.name, marks=c.marks) for c in customcases +]) +def test_custom_ingest(client, investigation, samples, schemadir, case): + """Test a custom ingest reader MyIngestReader, defined above. + + MyIngestReader defines a custom ingest format by defining it's own + set of XSD and XSLT file. But it still supports the vanilla + icatingest format. In the test, we define two cases, having + identical input data: the first one using icatdata format, the + second one the customized myingest format. Otherwise the input is + identical. But note that the transformation for the myingest case + alters the input on the fly, so we get different results. + """ + datasets = [] + for name in case.data: + datasets.append(client.new("Dataset", name=name)) + reader = MyIngestReader(client, case.metadata, investigation) + reader.ingest(datasets, dry_run=True, update_ds=True) + for ds in datasets: + ds.create() + reader.ingest(datasets) + for name in case.checks.keys(): + query = Query(client, "Dataset", conditions={ + "name": "= '%s'" % name, + "investigation.id": "= %d" % investigation.id, + }) + ds = client.assertedSearch(query)[0] + for query, res in case.checks[name]: + assert client.assertedSearch(query % ds.id)[0] == res + + +env_cases = [ + Case( + data = ["testingest_inl_1", "testingest_inl_2"], + metadata = gettestdata("metadata-4.4-inl.xml"), + checks = {}, + marks = (), + ), +] +@pytest.mark.parametrize("case", [ + pytest.param(c, id=c.metadata.name, marks=c.marks) for c in env_cases +]) +def test_ingest_env(monkeypatch, client, investigation, schemadir, case): + """Test using the _environment element. + + Applying a custom XSLT that extracts an attribute from the + _environment element that is injected by IngestReader into the + input data and puts that values into the head element of the + transformed input. This is to test that adding the _environment + element works and it is in principle possible to make use of the + values in the XSLT. + """ + monkeypatch.setattr(IngestReader, + "XSLT_Map", dict(icatingest="ingest-env.xslt")) + datasets = [] + for name in case.data: + datasets.append(client.new("Dataset", name=name)) + reader = IngestReader(client, case.metadata, investigation) + with get_icatdata_schema().open("rb") as f: + schema = etree.XMLSchema(etree.parse(f)) + schema.assertValid(reader.infile) + version_elem = reader.infile.xpath("/icatdata/head/apiversion") + assert version_elem + assert version_elem[0].text == str(client.apiversion)