From 023f4c07e08615eada044530c444f689bebd0ea5 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 15 Mar 2023 14:26:01 +0100
Subject: [PATCH 001/102] Client.searchMatching(): raise a somewhat more
 meaningful error if the id for a related object in the constraint is not set

---
 icat/client.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/icat/client.py b/icat/client.py
index a43d0db1..46f40493 100644
--- a/icat/client.py
+++ b/icat/client.py
@@ -679,6 +679,8 @@ def searchMatching(self, obj, includes=None):
             if a in obj.InstAttr:
                 query.addConditions({a: "= '%s'" % v})
             elif a in obj.InstRel:
+                if v.id is None:
+                    raise ValueError("%s.id is not set" % a)
                 query.addConditions({"%s.id" % a: "= %d" % v.id})
             else:
                 raise InternalError("Invalid constraint '%s' in %s."

From 71128fc6e27b4211a09a6423dc1bfd85c51ec9b0 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 15 Mar 2023 16:09:16 +0100
Subject: [PATCH 002/102] The implementations of DumpFileReader
 (XMLDumpFileReader and YAMLDumpFileReader) raise SearchResultError rather
 than ValueError if a reference to a related object is invalid

---
 icat/dumpfile_xml.py  | 6 +++++-
 icat/dumpfile_yaml.py | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/icat/dumpfile_xml.py b/icat/dumpfile_xml.py
index cde769ca..7251a75e 100644
--- a/icat/dumpfile_xml.py
+++ b/icat/dumpfile_xml.py
@@ -7,6 +7,7 @@
 from lxml import etree
 import icat
 import icat.dumpfile
+from icat.exception import SearchResultError
 from icat.query import Query
 try:
     utc = datetime.timezone.utc
@@ -64,7 +65,10 @@ def _searchByReference(self, element, objtype, objindex):
         ref = element.get('ref')
         if ref:
             # object is referenced by key.
-            return self.client.searchUniqueKey(ref, objindex)
+            try:
+                return self.client.searchUniqueKey(ref, objindex)
+            except ValueError:
+                raise SearchResultError("invalid reference %s" % ref)
         else:
             # object is referenced by attributes.
             attrs = set(element.keys()) - {'id'}
diff --git a/icat/dumpfile_yaml.py b/icat/dumpfile_yaml.py
index ce5a638f..d65859c2 100644
--- a/icat/dumpfile_yaml.py
+++ b/icat/dumpfile_yaml.py
@@ -5,6 +5,7 @@
 import yaml
 import icat
 import icat.dumpfile
+from icat.exception import SearchResultError
 try:
     utc = datetime.timezone.utc
 except AttributeError:
@@ -104,7 +105,10 @@ def _dict2entity(self, d, objtype, objindex):
             if attr in obj.InstAttr:
                 setattr(obj, attr, d[k])
             elif attr in obj.InstRel:
-                robj = self.client.searchUniqueKey(d[k], objindex)
+                try:
+                    robj = self.client.searchUniqueKey(d[k], objindex)
+                except ValueError:
+                    raise SearchResultError("invalid reference %s" % d[k])
                 setattr(obj, attr, robj)
             elif attr in obj.InstMRel:
                 rtype = self.insttypemap[obj.getAttrType(attr)]

From da284c7d122637f9705d8d88f6bef14e0d6e2751 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 31 Oct 2023 20:45:32 +0100
Subject: [PATCH 003/102] Link to stable version of Changes rather than latest
 in Project-URLs

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index d1b8f904..ac2f897d 100755
--- a/setup.py
+++ b/setup.py
@@ -211,7 +211,7 @@ def run(self):
         Documentation="https://python-icat.readthedocs.io/",
         Source="https://github.com/icatproject/python-icat/",
         Download="https://github.com/icatproject/python-icat/releases/latest",
-        Changes="https://python-icat.readthedocs.io/en/latest/changelog.html",
+        Changes="https://python-icat.readthedocs.io/en/stable/changelog.html",
     ),
     packages = ["icat"],
     python_requires = ">=3.4",

From 49aa9ee80a3008f3fd81cc37bcb1afa8cc0da6be Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 10 Nov 2023 15:46:21 +0100
Subject: [PATCH 004/102] Review spec file, build for Python 3.11 on openSUSE
 15.5

---
 python-icat.spec | 47 ++++++++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/python-icat.spec b/python-icat.spec
index 6268ad8d..b0f4292c 100644
--- a/python-icat.spec
+++ b/python-icat.spec
@@ -1,3 +1,13 @@
+%if 0%{?sle_version} >= 150500
+%global pyversfx 311
+%global python %__python311
+%global python_sitelib %python311_sitelib
+%else
+%global pyversfx 3
+%global python %__python3
+%global python_sitelib %python3_sitelib
+%endif
+
 Name:		python-icat
 Version:	$version
 Release:	0
@@ -6,17 +16,18 @@ Summary:	$description
 License:	Apache-2.0
 Group:		Development/Libraries/Python
 Source:		%{name}-%{version}.tar.gz
-BuildRequires:	python3-base >= 3.4
-BuildRequires:	python3-setuptools
+BuildRequires:	python%{pyversfx}-base >= 3.4
+BuildRequires:	python%{pyversfx}-setuptools
+BuildRequires:	fdupes
+BuildRequires:	python-rpm-macros
 BuildArch:	noarch
-BuildRoot:	%{_tmppath}/%{name}-%{version}-build
 
 %description
 $long_description
 
 
 %package examples
-Summary:	Python interface to ICAT and IDS
+Summary:	$description
 Group:		Documentation/Other
 Requires:	%{name} = %{version}
 
@@ -27,7 +38,7 @@ This package contains example scripts.
 
 
 %package man
-Summary:	Python interface to ICAT and IDS
+Summary:	$description
 Group:		Documentation/Other
 Requires:	%{name} = %{version}
 Requires:	man
@@ -38,16 +49,16 @@ $long_description
 This package contains the manual pages for the command line scripts.
 
 
-%package -n python3-icat
-Summary:	Python interface to ICAT and IDS
+%package -n python%{pyversfx}-icat
+Summary:	$description
 Requires:	%{name} = %{version}
-Requires:	python3-lxml
-Requires:	python3-packaging
-Requires:	python3-suds
+Requires:	python%{pyversfx}-lxml
+Requires:	python%{pyversfx}-packaging
+Requires:	python%{pyversfx}-suds
 Recommends:	%{name}-man
-Recommends:	python3-PyYAML
+Recommends:	python%{pyversfx}-PyYAML
 
-%description -n python3-icat
+%description -n python%{pyversfx}-icat
 $long_description
 
 
@@ -56,25 +67,23 @@ $long_description
 
 
 %build
-python3 setup.py build
+%{python} setup.py build
 
 
 %install
-python3 setup.py install --optimize=1 --prefix=%{_prefix} --root=%{buildroot}
+%{python} setup.py install --optimize=1 --prefix=%{_prefix} --root=%{buildroot}
 for f in `ls %{buildroot}%{_bindir}`
 do
     mv %{buildroot}%{_bindir}/$$f %{buildroot}%{_bindir}/$${f%%.py}
 done
-
 %__install -d -m 755 %{buildroot}%{_datadir}/icat
 %__cp -p etc/ingest-10.xsd etc/ingest.xslt %{buildroot}%{_datadir}/icat
-
 %__install -d -m 755 %{buildroot}%{_mandir}/man1
 %__cp -p doc/man/*.1 %{buildroot}%{_mandir}/man1
-
 %__install -d -m 755 %{buildroot}%{_docdir}/%{name}
 %__cp -pr README.rst CHANGES.rst doc/* %{buildroot}%{_docdir}/%{name}
 %__chmod -f a-x %{buildroot}%{_docdir}/%{name}/examples/*.py
+%fdupes %{buildroot}%{python_sitelib}
 
 
 %files
@@ -96,9 +105,9 @@ done
 %defattr(-,root,root)
 %{_mandir}/man1/*
 
-%files -n python3-icat
+%files -n python%{pyversfx}-icat
 %defattr(-,root,root)
-%{python3_sitelib}/*
+%{python_sitelib}/*
 %{_bindir}/*
 
 

From bba8f5f948821c5afb9355ee596ff644f7d24615 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 22 Nov 2023 09:33:37 +0100
Subject: [PATCH 005/102] Point to the release page on GitHub instead of PyPI
 for the download link in the README

---
 README.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.rst b/README.rst
index 1d1b19ca..e32e8ce3 100644
--- a/README.rst
+++ b/README.rst
@@ -21,10 +21,10 @@ is based on Suds and extends it with ICAT specific features.
 Download
 --------
 
-The latest release version can be found in the
-`Python Package Index (PyPI)`__.
+The latest release version can be found at the
+`release page on GitHub`__.
 
-.. __: `PyPI site`_
+.. __: `GitHub release`_
 
 
 Documentation
@@ -64,6 +64,6 @@ permissions and limitations under the License.
 
 
 .. _ICAT: https://icatproject.org/
-.. _PyPI site: https://pypi.org/project/python-icat/
+.. _GitHub release: https://github.com/icatproject/python-icat/releases/latest
 .. _Read the Docs site: https://python-icat.readthedocs.io/
 .. _Apache License: https://www.apache.org/licenses/LICENSE-2.0

From 9be8303b0dd96ac5ef991a2fecab34b3830303fd Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 22 Nov 2023 09:48:27 +0100
Subject: [PATCH 006/102] Put a full download URL in the spec file

---
 python-icat.spec | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python-icat.spec b/python-icat.spec
index b0f4292c..2401ab9f 100644
--- a/python-icat.spec
+++ b/python-icat.spec
@@ -15,7 +15,7 @@ Url:		$url
 Summary:	$description
 License:	Apache-2.0
 Group:		Development/Libraries/Python
-Source:		%{name}-%{version}.tar.gz
+Source:		https://github.com/icatproject/python-icat/releases/latest/download/python-icat-%{version}.tar.gz
 BuildRequires:	python%{pyversfx}-base >= 3.4
 BuildRequires:	python%{pyversfx}-setuptools
 BuildRequires:	fdupes

From 97e1ba21569f6717249bc02ceb81b8c66de75c23 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 8 Dec 2023 16:45:03 +0100
Subject: [PATCH 007/102] Fix spec file: did not install ingest-11.xsd

---
 python-icat.spec | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python-icat.spec b/python-icat.spec
index b0f4292c..57f9896b 100644
--- a/python-icat.spec
+++ b/python-icat.spec
@@ -77,7 +77,7 @@ do
     mv %{buildroot}%{_bindir}/$$f %{buildroot}%{_bindir}/$${f%%.py}
 done
 %__install -d -m 755 %{buildroot}%{_datadir}/icat
-%__cp -p etc/ingest-10.xsd etc/ingest.xslt %{buildroot}%{_datadir}/icat
+%__cp -p etc/ingest-*.xsd etc/ingest.xslt %{buildroot}%{_datadir}/icat
 %__install -d -m 755 %{buildroot}%{_mandir}/man1
 %__cp -p doc/man/*.1 %{buildroot}%{_mandir}/man1
 %__install -d -m 755 %{buildroot}%{_docdir}/%{name}

From 0c1d28f969088f6c9f81060f7d74a15bd514e346 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 11 Dec 2023 12:47:39 +0100
Subject: [PATCH 008/102] Drop drop class attribute XSLT_name in favour of
 XSLT_Map in icat.ingest.IngestReader

---
 doc/src/ingest.rst | 41 +++++++++++++++++++++++++++--------------
 icat/ingest.py     | 30 ++++++++++++++++++++++++------
 2 files changed, 51 insertions(+), 20 deletions(-)

diff --git a/doc/src/ingest.rst b/doc/src/ingest.rst
index 4fed8b7e..37785613 100644
--- a/doc/src/ingest.rst
+++ b/doc/src/ingest.rst
@@ -55,6 +55,10 @@ the ``Dataset``.
 .. versionchanged:: 1.2.0
    add version 1.1 of the ingest file format, including references to samples
 
+.. versionchanged:: 1.3.0
+   drop class attribute :attr:`~icat.ingest.IngestReader.XSLT_name` in
+   favour of :attr:`~icat.ingest.IngestReader.XSLT_Map`.
+
 .. autoclass:: icat.ingest.IngestReader
     :members:
     :show-inheritance:
@@ -114,8 +118,8 @@ Customizing the input format
 
 The ingest input file format may be customized by providing custom XSD
 and XSLT files.  The easiest way to do that is to subclass
-:class:`~icat.ingest.IngestReader`, you'd only need to override some
-class attributes as follows::
+:class:`~icat.ingest.IngestReader`.  In most cases, you'd only need to
+override some class attributes as follows::
 
   from pathlib import Path
   import icat.ingest
@@ -132,18 +136,27 @@ class attributes as follows::
       }
 
       # Override the XSLT file to use:
-      XSLT_name = "myingest.xslt"
-
-:attr:`~icat.ingest.IngestReader.XSD_Map` is a mapping with pairs of
-root element name and version attribute as keys and XSD file names as
-values.  The method :meth:`~icat.ingest.IngestReader.get_xsd` inspects
-the input file and selects the file name from
-:attr:`~icat.ingest.IngestReader.XSD_Map` accordingly.  (Note that
-there is no such mapping for the XSLT file, because its is assumed
-that it is fairly easy to formulate adaptations to the input version
-directly in XSLT, so one single XSLT file would be sufficient to cover
-all versions.)  In the above example, `MyFacilityIngestReader` would
-recognize input files like
+      XSLT_Map = {
+          'legacyingest': "legacy-ingest.xslt",
+          'myingest': "my-ingest.xslt",
+      }
+
+:attr:`~icat.ingest.IngestReader.XSD_Map` and
+:attr:`~icat.ingest.IngestReader.XSLT_Map` are mappings with
+properties of the root element of the input data as keys and file
+names as values.  The methods
+:meth:`~icat.ingest.IngestReader.get_xsd` and
+:meth:`~icat.ingest.IngestReader.get_xslt` respectively inspect the
+input file and use these mappings to select the XSD and XSLT file
+accordingly.  Note that :attr:`~icat.ingest.IngestReader.XSD_Map`
+takes tuples of root element name and version attribute as keys, while
+:attr:`~icat.ingest.IngestReader.XSLT_Map` uses the name of the root
+element name alone.  It is is assumed that it is fairly easy to
+formulate adaptations to the input version directly in XSLT, so one
+single XSLT file would be sufficient to cover all versions.
+
+In the above example, `MyFacilityIngestReader` would recognize input
+files like
 
 .. code-block:: xml
 
diff --git a/icat/ingest.py b/icat/ingest.py
index 8e62008a..9cc230f9 100644
--- a/icat/ingest.py
+++ b/icat/ingest.py
@@ -49,8 +49,14 @@ class IngestReader(icat.dumpfile_xml.XMLDumpFileReader):
     element name and version attribute, the values are the
     corresponding name of the XSD file.
     """
-    XSLT_name = "ingest.xslt"
-    """The name of the XSLT file to use.
+    XSLT_Map = {
+        'icatingest': "ingest.xslt",
+    }
+    """A mapping to select the XSLT file to use.  Keys are the root
+    element name, the values are the corresponding name of the XSLT
+    file.
+
+    .. versionadded:: 1.3.0
     """
 
     def __init__(self, client, metadata, investigation):
@@ -105,9 +111,11 @@ def get_xsd(self, ingest_data):
     def get_xslt(self, ingest_data):
         """Get the XSLT file.
 
-        Take :attr:`~icat.ingest.IngestReader.XSLT_name` as a file
-        name relative to :attr:`~icat.ingest.IngestReader.SchemaDir`
-        and return this path.
+        Inspect the root element in the input data and lookup the
+        element name in :attr:`~icat.ingest.IngestReader.XSLT_Map`.
+        The value is taken as a file name relative to
+        :attr:`~icat.ingest.IngestReader.SchemaDir` and this path is
+        returned.
 
         Subclasses may override this method to customize the XSLT file
         to use.  These derived versions may inspect the input data to
@@ -119,8 +127,18 @@ def get_xslt(self, ingest_data):
         :type ingest_data: :class:`lxml.etree._ElementTree`
         :return: path to the XSLT file.
         :rtype: :class:`~pathlib.Path`
+
+        .. versionchanged:: 1.3.0
+            Lookup the root element name in
+            :attr:`~icat.ingest.IngestReader.XSLT_Map` rather than
+            using a static file name.
         """
-        return self.SchemaDir / self.XSLT_name
+        root = ingest_data.getroot()
+        try:
+            xslt = self.XSLT_Map[root.tag]
+        except KeyError:
+            raise InvalidIngestFileError("unknown format")
+        return self.SchemaDir / xslt
 
     def getobjs(self):
         """Iterate over the objects in the ingest file.

From 4f9f57555b28b380692f4815a03b9190f6ddebb5 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 11 Dec 2023 17:44:03 +0100
Subject: [PATCH 009/102] Add tests for a customized ingest reader

---
 MANIFEST.in                               |   4 +-
 tests/data/metadata-custom-icatingest.xml |  15 +++
 tests/data/metadata-custom-myingest.xml   |  15 +++
 tests/data/myingest.xsd                   | 138 ++++++++++++++++++++++
 tests/data/myingest.xslt                  |  59 +++++++++
 tests/test_06_ingest.py                   |  93 +++++++++++++++
 6 files changed, 323 insertions(+), 1 deletion(-)
 create mode 100644 tests/data/metadata-custom-icatingest.xml
 create mode 100644 tests/data/metadata-custom-myingest.xml
 create mode 100644 tests/data/myingest.xsd
 create mode 100644 tests/data/myingest.xslt

diff --git a/MANIFEST.in b/MANIFEST.in
index fe080b6d..a7c92f8b 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -18,7 +18,9 @@ include etc/ingest.xslt
 include tests/conftest.py
 include tests/data/legacy-icatdump-*.xml
 include tests/data/legacy-icatdump-*.yaml
-include tests/data/metadata-5.0-badref.xml
+include tests/data/metadata-*.xml
+include tests/data/myingest.xsd
+include tests/data/myingest.xslt
 include tests/data/ref-icatdump-*.xml
 include tests/data/ref-icatdump-*.yaml
 include tests/data/summary*
diff --git a/tests/data/metadata-custom-icatingest.xml b/tests/data/metadata-custom-icatingest.xml
new file mode 100644
index 00000000..5e3c9b68
--- /dev/null
+++ b/tests/data/metadata-custom-icatingest.xml
@@ -0,0 +1,15 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<icatingest version="1.0">
+  <head>
+    <date>2023-06-16T11:01:15+02:00</date>
+    <generator>metadata-writer 0.29</generator>
+  </head>
+  <data>
+    <dataset id="Dataset_1">
+      <name>testingest_custom_icatingest_1</name>
+      <description>Dy01Cp02 at 2.7 K</description>
+      <startDate>2022-02-03T15:40:12+01:00</startDate>
+      <endDate>2022-02-03T17:04:22+01:00</endDate>
+    </dataset>
+  </data>
+</icatingest>
diff --git a/tests/data/metadata-custom-myingest.xml b/tests/data/metadata-custom-myingest.xml
new file mode 100644
index 00000000..e1c659f1
--- /dev/null
+++ b/tests/data/metadata-custom-myingest.xml
@@ -0,0 +1,15 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<myingest version="1.0">
+  <head>
+    <date>2023-06-16T11:01:15+02:00</date>
+    <generator>metadata-writer 0.29</generator>
+  </head>
+  <data>
+    <dataset id="Dataset_1">
+      <name>testingest_custom_myingest_1</name>
+      <description>Dy01Cp02 at 2.7 K</description>
+      <startDate>2022-02-03T15:40:12+01:00</startDate>
+      <endDate>2022-02-03T17:04:22+01:00</endDate>
+    </dataset>
+  </data>
+</myingest>
diff --git a/tests/data/myingest.xsd b/tests/data/myingest.xsd
new file mode 100644
index 00000000..5a086142
--- /dev/null
+++ b/tests/data/myingest.xsd
@@ -0,0 +1,138 @@
+<?xml version="1.0" encoding="utf-8"?>
+
+<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
+
+<xsd:annotation>
+  <xsd:documentation>
+    Schema definition for ingest files to ICAT.
+    Test schema for testing the use of a custom schema.
+    Version 1.0.
+  </xsd:documentation>
+</xsd:annotation>
+
+<xsd:element name="myingest" type="myingest"/>
+
+<xsd:complexType name="myingest">
+  <xsd:sequence>
+    <xsd:element name="head" type="head" minOccurs="0"/>
+    <xsd:element name="data" type="data"/>
+  </xsd:sequence>
+  <xsd:attribute name="version" type="version_10" use="required"/>
+</xsd:complexType>
+
+<xsd:complexType name="head">
+  <xsd:sequence>
+    <xsd:element name="date" type="xsd:dateTime"/>
+    <xsd:element name="generator" type="xsd:string"/>
+  </xsd:sequence>
+</xsd:complexType>
+
+<xsd:complexType name="data">
+  <xsd:sequence>
+    <xsd:element name="dataset" type="dataset"
+		 minOccurs="0" maxOccurs="unbounded"/>
+    <xsd:element name="datasetTechnique" type="datasetTechnique"
+		 minOccurs="0" maxOccurs="unbounded"/>
+    <xsd:element name="datasetInstrument" type="datasetInstrument"
+		 minOccurs="0" maxOccurs="unbounded"/>
+    <xsd:element name="datasetParameter" type="datasetParameter"
+		 minOccurs="0" maxOccurs="unbounded"/>
+  </xsd:sequence>
+</xsd:complexType>
+
+
+<xsd:complexType name="entityBase">
+  <xsd:attribute name="id" type="identifier"/>
+</xsd:complexType>
+
+<xsd:complexType name="entityReference">
+  <xsd:attribute name="ref" type="identifier" use="required"/>
+</xsd:complexType>
+
+
+<xsd:complexType name="dataset">
+  <xsd:complexContent>
+    <xsd:extension base="entityBase">
+      <xsd:sequence>
+	<xsd:element name="name" type="xsd:string"/>
+	<xsd:element name="description" type="xsd:string" minOccurs="0"/>
+	<xsd:element name="startDate" type="xsd:dateTime" minOccurs="0"/>
+	<xsd:element name="endDate" type="xsd:dateTime" minOccurs="0"/>
+	<xsd:element name="sample" type="nameRef" minOccurs="0"/>
+	<xsd:element name="datasetInstruments" type="datasetInstrument"
+		     minOccurs="0" maxOccurs="unbounded"/>
+	<xsd:element name="datasetTechniques" type="datasetTechnique"
+		     minOccurs="0" maxOccurs="unbounded"/>
+	<xsd:element name="parameters" type="datasetParameter"
+		     minOccurs="0" maxOccurs="unbounded"/>
+      </xsd:sequence>
+    </xsd:extension>
+  </xsd:complexContent>
+</xsd:complexType>
+
+<xsd:complexType name="datasetTechnique">
+  <xsd:complexContent>
+    <xsd:extension base="entityBase">
+      <xsd:sequence>
+	<xsd:element name="dataset" type="entityReference" minOccurs="0"/>
+	<xsd:element name="technique" type="nameRef"/>
+      </xsd:sequence>
+    </xsd:extension>
+  </xsd:complexContent>
+</xsd:complexType>
+
+<xsd:complexType name="datasetInstrument">
+  <xsd:complexContent>
+    <xsd:extension base="entityBase">
+      <xsd:sequence>
+	<xsd:element name="dataset" type="entityReference" minOccurs="0"/>
+	<xsd:element name="instrument" type="nameRef"/>
+      </xsd:sequence>
+    </xsd:extension>
+  </xsd:complexContent>
+</xsd:complexType>
+
+<xsd:complexType name="datasetParameter">
+  <xsd:complexContent>
+    <xsd:extension base="entityBase">
+      <xsd:sequence>
+	<xsd:element name="dateTimeValue" type="xsd:dateTime" minOccurs="0"/>
+	<xsd:element name="error" type="xsd:double" minOccurs="0"/>
+	<xsd:element name="numericValue" type="xsd:double" minOccurs="0"/>
+	<xsd:element name="rangeBottom" type="xsd:double" minOccurs="0"/>
+	<xsd:element name="rangeTop" type="xsd:double" minOccurs="0"/>
+	<xsd:element name="stringValue" type="xsd:string" minOccurs="0"/>
+	<xsd:element name="dataset" type="entityReference" minOccurs="0"/>
+	<xsd:element name="type" type="parameterTypeRef"/>
+      </xsd:sequence>
+    </xsd:extension>
+  </xsd:complexContent>
+</xsd:complexType>
+
+
+<xsd:complexType name="nameRef">
+  <xsd:attribute name="name" type="xsd:string"/>
+  <xsd:attribute name="pid" type="xsd:string"/>
+</xsd:complexType>
+
+<xsd:complexType name="parameterTypeRef">
+  <xsd:attribute name="name" type="xsd:string"/>
+  <xsd:attribute name="units" type="xsd:string"/>
+  <xsd:attribute name="pid" type="xsd:string"/>
+</xsd:complexType>
+
+
+<xsd:simpleType name="identifier">
+  <xsd:restriction base="xsd:string">
+    <xsd:pattern value="[A-Za-z][A-Za-z0-9_]*"/>
+  </xsd:restriction>
+</xsd:simpleType>
+
+<xsd:simpleType name="version_10">
+  <xsd:restriction base="xsd:string">
+    <xsd:enumeration value="1.0"/>
+  </xsd:restriction>
+</xsd:simpleType>
+
+
+</xsd:schema>
diff --git a/tests/data/myingest.xslt b/tests/data/myingest.xslt
new file mode 100644
index 00000000..4016a60e
--- /dev/null
+++ b/tests/data/myingest.xslt
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xsl:stylesheet version="1.0"
+		xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+
+    <xsl:output method="xml"/>
+
+    <xsl:template match="/myingest">
+	<icatdata>
+	    <xsl:apply-templates/>
+	</icatdata>
+    </xsl:template>
+
+    <xsl:template match="/myingest/head"/>
+
+    <xsl:template match="/myingest/data">
+	<data>
+	    <xsl:apply-templates/>
+	</data>
+    </xsl:template>
+
+    <xsl:template match="/myingest/data/dataset">
+	<dataset>
+	    <xsl:copy-of select="@id"/>
+	    <complete>false</complete>
+	    <xsl:apply-templates select="description"/>
+	    <xsl:copy-of select="endDate"/>
+	    <xsl:copy-of select="name"/>
+	    <xsl:copy-of select="startDate"/>
+	    <investigation ref="_Investigation"/>
+	    <xsl:apply-templates select="sample"/>
+	    <type name="raw"/>
+	    <xsl:copy-of select="datasetInstruments"/>
+	    <xsl:copy-of select="datasetTechniques"/>
+	    <parameters>
+		<stringValue>x-ray</stringValue>
+		<type name="Probe"/>
+	    </parameters>
+	    <xsl:copy-of select="parameters"/>
+	</dataset>
+    </xsl:template>
+
+    <xsl:template match="/myingest/data/dataset/description">
+	<xsl:copy>
+	    <xsl:value-of select="concat('My Ingest: ', .)"/>
+	</xsl:copy>
+    </xsl:template>
+
+    <xsl:template match="/myingest/data/dataset/sample">
+	<xsl:copy>
+	    <xsl:attribute name="investigation.ref">_Investigation</xsl:attribute>
+	    <xsl:copy-of select="@*"/>
+	</xsl:copy>
+    </xsl:template>
+
+    <xsl:template match="*">
+	<xsl:copy-of select="."/>
+    </xsl:template>
+
+</xsl:stylesheet>
diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py
index 03a84de4..bdd08f1e 100644
--- a/tests/test_06_ingest.py
+++ b/tests/test_06_ingest.py
@@ -56,6 +56,21 @@ def investigation(client, cleanup_objs):
 def schemadir(monkeypatch):
     monkeypatch.setattr(IngestReader, "SchemaDir", testdatadir)
 
+
+class MyIngestReader(IngestReader):
+    """Testting a customized IngestReader
+    """
+    XSD_Map = {
+        ('icatingest', '1.0'): "ingest-10.xsd",
+        ('icatingest', '1.1'): "ingest-11.xsd",
+        ('myingest', '1.0'): "myingest.xsd",
+    }
+    XSLT_Map = {
+        'icatingest': "ingest.xslt",
+        'myingest': "myingest.xslt",
+    }
+
+
 cet = datetime.timezone(datetime.timedelta(hours=1))
 cest = datetime.timezone(datetime.timedelta(hours=2))
 
@@ -384,3 +399,81 @@ def test_badref_ingest(client, investigation, schemadir, case):
     with pytest.raises(icat.InvalidIngestFileError):
         reader = IngestReader(client, case.metadata, investigation)
         reader.ingest(datasets, dry_run=True, update_ds=True)
+
+
+customcases = [
+    Case(
+        data = ["testingest_custom_icatingest_1"],
+        metadata = gettestdata("metadata-custom-icatingest.xml"),
+        schema = gettestdata("icatdata-4.4.xsd"),
+        checks = {
+            "testingest_custom_icatingest_1": [
+                ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
+                 "Dy01Cp02 at 2.7 K"),
+                ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d",
+                 datetime.datetime(2022, 2, 3, 15, 40, 12, tzinfo=cet)),
+                ("SELECT ds.endDate FROM Dataset ds WHERE ds.id = %d",
+                 datetime.datetime(2022, 2, 3, 17, 4, 22, tzinfo=cet)),
+                (("SELECT COUNT(p) FROM DatasetParameter p "
+                  "JOIN p.dataset AS ds "
+                  "WHERE ds.id = %d"),
+                 0),
+            ],
+        },
+        marks = (),
+    ),
+    Case(
+        data = ["testingest_custom_myingest_1"],
+        metadata = gettestdata("metadata-custom-myingest.xml"),
+        schema = gettestdata("icatdata-4.4.xsd"),
+        checks = {
+            "testingest_custom_myingest_1": [
+                ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
+                 "My Ingest: Dy01Cp02 at 2.7 K"),
+                ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d",
+                 datetime.datetime(2022, 2, 3, 15, 40, 12, tzinfo=cet)),
+                ("SELECT ds.endDate FROM Dataset ds WHERE ds.id = %d",
+                 datetime.datetime(2022, 2, 3, 17, 4, 22, tzinfo=cet)),
+                (("SELECT COUNT(p) FROM DatasetParameter p "
+                  "JOIN p.dataset AS ds "
+                  "WHERE ds.id = %d"),
+                 1),
+                (("SELECT p.stringValue FROM DatasetParameter p "
+                  "JOIN p.dataset AS ds JOIN p.type AS t "
+                  "WHERE ds.id = %d AND t.name = 'Probe'"),
+                 "x-ray"),
+            ],
+        },
+        marks = (),
+    ),
+]
+@pytest.mark.parametrize("case", [
+    pytest.param(c, id=c.metadata.name, marks=c.marks) for c in customcases
+])
+def test_custom_ingest(client, investigation, samples, schemadir, case):
+    """Test a custom ingest reader MyIngestReader, defined above.
+
+    MyIngestReader defines a custom ingest format by defining it's own
+    set of XSD and XSLT file.  But it still supports the vanilla
+    icatingest format.  In the test, we define two cases, having
+    identical input data: the first one using icatdata format, the
+    second one the customized myingest format.  Otherwise the input is
+    identical.  But note that the transformation for the myingest case
+    alters the input on the fly, so we get different results.
+    """
+    datasets = []
+    for name in case.data:
+        datasets.append(client.new("Dataset", name=name))
+    reader = MyIngestReader(client, case.metadata, investigation)
+    reader.ingest(datasets, dry_run=True, update_ds=True)
+    for ds in datasets:
+        ds.create()
+    reader.ingest(datasets)
+    for name in case.checks.keys():
+        query = Query(client, "Dataset", conditions={
+            "name": "= '%s'" % name,
+            "investigation.id": "= %d" % investigation.id,
+        })
+        ds = client.assertedSearch(query)[0]
+        for query, res in case.checks[name]:
+            assert client.assertedSearch(query % ds.id)[0] == res

From 03dc2d35c09821c3a85dacb9bfaf39cb97844c2b Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 11 Dec 2023 18:45:09 +0100
Subject: [PATCH 010/102] Update changelog

---
 CHANGES.rst | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/CHANGES.rst b/CHANGES.rst
index 7a21a6cd..c735619f 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -2,6 +2,29 @@ Changelog
 =========
 
 
+1.3.0 (not yet released)
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+New features
+------------
+
++ `#143`_, `#144`_: Make it easier to configure XSLT files to use for
+  processing the input in custom versions of
+  :class:`icat.ingest.IngestReader`.
+
+Incompatible changes
+--------------------
+
++ `#144`_: Drop class attribute
+  :attr:`icat.ingest.IngestReader.XSLT_name` in favour of
+  :attr:`icat.ingest.IngestReader.XSLT_Map`.
+
+  Note that :mod:`icat.ingest` has been declared experimental for now.
+
+.. _#143: https://github.com/icatproject/python-icat/issues/143
+.. _#144: https://github.com/icatproject/python-icat/pull/144
+
+
 1.2.0 (2023-10-31)
 ~~~~~~~~~~~~~~~~~~
 

From 1cf1cc208ac213606dc8758b18345af1992bc924 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 11 Dec 2023 18:47:16 +0100
Subject: [PATCH 011/102] Documentation fixes

---
 icat/ingest.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/icat/ingest.py b/icat/ingest.py
index 9cc230f9..9c7ddbf1 100644
--- a/icat/ingest.py
+++ b/icat/ingest.py
@@ -127,9 +127,12 @@ def get_xslt(self, ingest_data):
         :type ingest_data: :class:`lxml.etree._ElementTree`
         :return: path to the XSLT file.
         :rtype: :class:`~pathlib.Path`
+        :raise icat.exception.InvalidIngestFileError: if the root
+            element name could not be found in
+            :attr:`~icat.ingest.IngestReader.XSLT_Map`.
 
         .. versionchanged:: 1.3.0
-            Lookup the root element name in
+            lookup the root element name in
             :attr:`~icat.ingest.IngestReader.XSLT_Map` rather than
             using a static file name.
         """

From fb63ed7a15e8e4db309cb2aba93b561970e36f5f Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 13 Dec 2023 12:09:48 +0100
Subject: [PATCH 012/102] Fix: .gitignore some more files copied by setup.py
 build_test into tests/data and remove them on make clean

---
 .gitignore | 2 ++
 Makefile   | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/.gitignore b/.gitignore
index 2e60c739..d9103d85 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@ __pycache__/
 /icat/__init__.py
 /tests/data/example_data.yaml
 /tests/data/icat.cfg
+/tests/data/icatdata-*.xsd
 /tests/data/icatdump-*.xml
 /tests/data/icatdump-*.yaml
 /tests/data/ingest-*.xml
@@ -13,4 +14,5 @@ __pycache__/
 /tests/data/ingest.xslt
 /tests/data/metadata-*-inl.xml
 /tests/data/metadata-*-sep.xml
+/tests/data/metadata-sample.xml
 /tests/scripts/
diff --git a/Makefile b/Makefile
index 41ac4b47..c087a875 100644
--- a/Makefile
+++ b/Makefile
@@ -20,9 +20,11 @@ clean:
 	rm -rf build
 	rm -rf __pycache__ icat/__pycache__
 	rm -rf tests/data/example_data.yaml
+	rm -rf tests/data/icatdata-*.xsd
 	rm -rf tests/data/icatdump-* tests/data/ingest-*.xml
 	rm -rf tests/data/ingest-*.xsd tests/data/ingest.xslt
 	rm -rf tests/data/metadata-*-inl.xml tests/data/metadata-*-sep.xml
+	rm -rf tests/data/metadata-sample.xml
 	rm -rf tests/scripts
 
 distclean: clean

From 3999092882b89d43295a4c6dfff1f60779ce5db3 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Thu, 21 Dec 2023 10:24:58 +0100
Subject: [PATCH 013/102] Set the table of predefined configuration variables
 in a table directive

---
 doc/src/_static/css/captions.css | 14 +++++++
 doc/src/conf.py                  |  1 +
 doc/src/config.rst               | 66 ++++++++++++++++----------------
 3 files changed, 49 insertions(+), 32 deletions(-)
 create mode 100644 doc/src/_static/css/captions.css

diff --git a/doc/src/_static/css/captions.css b/doc/src/_static/css/captions.css
new file mode 100644
index 00000000..8321eee7
--- /dev/null
+++ b/doc/src/_static/css/captions.css
@@ -0,0 +1,14 @@
+.rst-content div.figure p.caption, .rst-content table.docutils caption, .rst-content div.code-block-caption{
+    color: #404040;
+    font-style: italic;
+    font-size: 90%;
+    line-height: normal;
+    text-align: left;
+}
+.rst-content div.figure p.caption span.caption-number, .rst-content table.docutils caption span.caption-number, .rst-content div.code-block-caption span.caption-number{
+    font-weight: bold;
+}
+.rst-content div.code-block-caption a.headerlink, .rst-content table.docutils caption a.headerlink{
+    display: none;
+    visibility: hidden;
+}
diff --git a/doc/src/conf.py b/doc/src/conf.py
index f41f3773..a5fca0c9 100644
--- a/doc/src/conf.py
+++ b/doc/src/conf.py
@@ -109,6 +109,7 @@
 html_favicon = "images/favicon-32x32.png"
 
 html_css_files = [
+    'css/captions.css',
     'css/spacing.css',
 ]
 
diff --git a/doc/src/config.rst b/doc/src/config.rst
index 0f5c42bd..ff706eed 100644
--- a/doc/src/config.rst
+++ b/doc/src/config.rst
@@ -138,38 +138,40 @@ A few derived variables are also set in
     (username and password if authenticator information is not
     available) suitable to be passed to :meth:`icat.client.Client.login`.
 
-The command line arguments, environment variables, and default values
-for the configuration variables are as follows:
-
-+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
-| Name            | Command line                | Environment           | Default        | Mandatory | Notes        |
-+=================+=============================+=======================+================+===========+==============+
-| `configFile`    | ``-c``, ``--configfile``    | ``ICAT_CFG``          | depends        | no        | \(1)         |
-+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
-| `configSection` | ``-s``, ``--configsection`` | ``ICAT_CFG_SECTION``  | :const:`None`  | no        |              |
-+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
-| `url`           | ``-w``, ``--url``           | ``ICAT_SERVICE``      |                | yes       |              |
-+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
-| `idsurl`        | ``--idsurl``                | ``ICAT_DATA_SERVICE`` | :const:`None`  | depends   | \(2)         |
-+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
-| `checkCert`     | ``--check-certificate``,    |                       | :const:`True`  | no        |              |
-|                 | ``--no-check-certificate``  |                       |                |           |              |
-+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
-| `http_proxy`    | ``--http-proxy``            | ``http_proxy``        | :const:`None`  | no        |              |
-+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
-| `https_proxy`   | ``--https-proxy``           | ``https_proxy``       | :const:`None`  | no        |              |
-+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
-| `no_proxy`      | ``--no-proxy``              | ``no_proxy``          | :const:`None`  | no        |              |
-+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
-| `auth`          | ``-a``, ``--auth``          | ``ICAT_AUTH``         |                | yes       | \(3)         |
-+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
-| `username`      | ``-u``, ``--user``          | ``ICAT_USER``         |                | yes       | \(3),(4)     |
-+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
-| `password`      | ``-p``, ``--pass``          |                       | interactive    | yes       | \(3),(4),(5) |
-+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
-| `promptPass`    | ``-P``, ``--prompt-pass``   |                       | :const:`False` | no        | \(3),(4),(5) |
-+-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
-
+.. table:: Command line arguments, environment variables, and default values
+	   for the configuration variables.
+    :name: tab-config-vars
+
+    +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
+    | Name            | Command line                | Environment           | Default        | Mandatory | Notes        |
+    +=================+=============================+=======================+================+===========+==============+
+    | `configFile`    | ``-c``, ``--configfile``    | ``ICAT_CFG``          | depends        | no        | \(1)         |
+    +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
+    | `configSection` | ``-s``, ``--configsection`` | ``ICAT_CFG_SECTION``  | :const:`None`  | no        |              |
+    +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
+    | `url`           | ``-w``, ``--url``           | ``ICAT_SERVICE``      |                | yes       |              |
+    +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
+    | `idsurl`        | ``--idsurl``                | ``ICAT_DATA_SERVICE`` | :const:`None`  | depends   | \(2)         |
+    +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
+    | `checkCert`     | ``--check-certificate``,    |                       | :const:`True`  | no        |              |
+    |                 | ``--no-check-certificate``  |                       |                |           |              |
+    +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
+    | `http_proxy`    | ``--http-proxy``            | ``http_proxy``        | :const:`None`  | no        |              |
+    +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
+    | `https_proxy`   | ``--https-proxy``           | ``https_proxy``       | :const:`None`  | no        |              |
+    +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
+    | `no_proxy`      | ``--no-proxy``              | ``no_proxy``          | :const:`None`  | no        |              |
+    +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
+    | `auth`          | ``-a``, ``--auth``          | ``ICAT_AUTH``         |                | yes       | \(3)         |
+    +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
+    | `username`      | ``-u``, ``--user``          | ``ICAT_USER``         |                | yes       | \(3),(4)     |
+    +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
+    | `password`      | ``-p``, ``--pass``          |                       | interactive    | yes       | \(3),(4),(5) |
+    +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
+    | `promptPass`    | ``-P``, ``--prompt-pass``   |                       | :const:`False` | no        | \(3),(4),(5) |
+    +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
+
+See the table for an overview of predefined configuration variables.
 Mandatory means that an error will be raised in
 :meth:`icat.config.Config.getconfig` if no value is found for the
 configuration variable in question.

From 299c82bc817dc673e7a1ca981b410a9b9d8f7f8c Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Thu, 21 Dec 2023 11:21:14 +0100
Subject: [PATCH 014/102] ReST style fixes: - fix tabulation used for
 indentation - remove trailing white space

---
 doc/src/client.rst     | 2 +-
 doc/src/config.rst     | 6 +++---
 doc/src/icatingest.rst | 6 +++---
 doc/src/ingest.rst     | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/doc/src/client.rst b/doc/src/client.rst
index 0f0b99dc..e32dc2df 100644
--- a/doc/src/client.rst
+++ b/doc/src/client.rst
@@ -29,7 +29,7 @@ manages the interaction with an ICAT service as a client.
 
         Version of the ICAT server this client connects to.
 
-	.. versionchanged:: 1.0.0
+        .. versionchanged:: 1.0.0
             changed type to :class:`icat.helper.Version`
 
     .. attribute:: autoLogout
diff --git a/doc/src/config.rst b/doc/src/config.rst
index ff706eed..af597211 100644
--- a/doc/src/config.rst
+++ b/doc/src/config.rst
@@ -62,8 +62,8 @@ added.  The main class that client programs interact with is
     .. attribute:: client
 
         The :class:`icat.client.Client` object initialized according to
-	the configuration.  This is also the first element in the
-	return value from :meth:`getconfig`.
+        the configuration.  This is also the first element in the
+        return value from :meth:`getconfig`.
 
     .. attribute:: client_kwargs
 
@@ -139,7 +139,7 @@ A few derived variables are also set in
     available) suitable to be passed to :meth:`icat.client.Client.login`.
 
 .. table:: Command line arguments, environment variables, and default values
-	   for the configuration variables.
+           for the configuration variables.
     :name: tab-config-vars
 
     +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
diff --git a/doc/src/icatingest.rst b/doc/src/icatingest.rst
index 7cba2199..62e571cb 100644
--- a/doc/src/icatingest.rst
+++ b/doc/src/icatingest.rst
@@ -71,12 +71,12 @@ The following options are specific to icatingest:
 
     **CHECK**
         Compare all attributes from the input object with the already
-	existing object in ICAT.  Throw an error of any attribute
-	differs.
+        existing object in ICAT.  Throw an error of any attribute
+        differs.
 
     **OVERWRITE**
         Overwrite the existing object in ICAT, e.g. update it with all
-	attributes set to the values found in the input object.
+        attributes set to the values found in the input object.
 
     If :option:`--upload-datafiles` is set, this option will be
     ignored for Datafile objects which will then always raise an error
diff --git a/doc/src/ingest.rst b/doc/src/ingest.rst
index 4fed8b7e..a2880030 100644
--- a/doc/src/ingest.rst
+++ b/doc/src/ingest.rst
@@ -121,7 +121,7 @@ class attributes as follows::
   import icat.ingest
 
   class MyFacilityIngestReader(icat.ingest.IngestReader):
-  
+
       # Override the directory to search for XSD and XSLT files:
       SchemaDir = Path("/usr/share/icat/my-facility")
 

From 1b27f82fe3607d917bed830b7a92fdb2a8d49a72 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Thu, 21 Dec 2023 11:24:13 +0100
Subject: [PATCH 015/102] Set the Synopsis section in man pages as line blocks

---
 doc/src/icatdump.rst   | 2 +-
 doc/src/icatingest.rst | 3 ++-
 doc/src/wipeicat.rst   | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/src/icatdump.rst b/doc/src/icatdump.rst
index 6e7d0caf..0023fca3 100644
--- a/doc/src/icatdump.rst
+++ b/doc/src/icatdump.rst
@@ -7,7 +7,7 @@ icatdump
 Synopsis
 ~~~~~~~~
 
-**icatdump** [*standard options*] [-o FILE] [-f FORMAT]
+| **icatdump** [*standard options*] [-o FILE] [-f FORMAT]
 
 
 Description
diff --git a/doc/src/icatingest.rst b/doc/src/icatingest.rst
index 62e571cb..c260d468 100644
--- a/doc/src/icatingest.rst
+++ b/doc/src/icatingest.rst
@@ -7,7 +7,8 @@ icatingest
 Synopsis
 ~~~~~~~~
 
-**icatingest** [*standard options*] [-i FILE] [-f FORMAT] [--upload-datafiles] [--datafile-dir DATADIR] [--duplicate OPTION]
+| **icatingest** [*standard options*] [-i FILE] [-f FORMAT]
+|     [--upload-datafiles] [--datafile-dir DATADIR] [--duplicate OPTION]
 
 
 Description
diff --git a/doc/src/wipeicat.rst b/doc/src/wipeicat.rst
index 89567684..1c1ca4cd 100644
--- a/doc/src/wipeicat.rst
+++ b/doc/src/wipeicat.rst
@@ -7,7 +7,7 @@ wipeicat
 Synopsis
 ~~~~~~~~
 
-**wipeicat** [*options*]
+| **wipeicat** [*options*]
 
 
 Description

From d1d0f385f7d64f05c70534d3de14c8af1d987287 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Thu, 21 Dec 2023 11:47:07 +0100
Subject: [PATCH 016/102] Add GitHub action to check ReST input files

---
 .github/workflows/rst-lint.yaml | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 .github/workflows/rst-lint.yaml

diff --git a/.github/workflows/rst-lint.yaml b/.github/workflows/rst-lint.yaml
new file mode 100644
index 00000000..b5e7c2fe
--- /dev/null
+++ b/.github/workflows/rst-lint.yaml
@@ -0,0 +1,12 @@
+name: Check ReST input files
+on: [push, pull_request]
+jobs:
+  doc8:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repository code
+        uses: actions/checkout@v4
+      - name: doc8-check
+        uses: deep-entertainment/doc8-action@v4
+        with:
+          scanPaths: "doc/src"

From cec25957e021d341bff3a460156748c581d77771 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 22 Dec 2023 11:09:57 +0100
Subject: [PATCH 017/102] Drop version constraint on Sphinx in RtD
 requirements, e.g. essentially update tha Sphinx version used for building
 the documentation

---
 .rtd-require | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.rtd-require b/.rtd-require
index 2de815cd..2972d516 100644
--- a/.rtd-require
+++ b/.rtd-require
@@ -4,6 +4,4 @@ packaging
 setuptools
 setuptools_scm
 suds
-jinja2<3.1
-sphinx>=2,<3
-sphinx-rtd-theme>=0.5,<1
+sphinx_rtd_theme

From 4306390a8f27b96a4ae33fba0cb93c2bae7ea271 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 22 Dec 2023 11:44:28 +0100
Subject: [PATCH 018/102] Add sphinx_copybutton extension

---
 .rtd-require    | 1 +
 doc/src/conf.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.rtd-require b/.rtd-require
index 2972d516..99c132bb 100644
--- a/.rtd-require
+++ b/.rtd-require
@@ -4,4 +4,5 @@ packaging
 setuptools
 setuptools_scm
 suds
+sphinx-copybutton
 sphinx_rtd_theme
diff --git a/doc/src/conf.py b/doc/src/conf.py
index a5fca0c9..a75c5c52 100644
--- a/doc/src/conf.py
+++ b/doc/src/conf.py
@@ -40,6 +40,7 @@
 extensions = [
     'sphinx.ext.autodoc',
     'sphinx.ext.intersphinx',
+    'sphinx_copybutton',
 ]
 
 # Add any paths that contain templates here, relative to this directory.

From fafaa64ce9a89f6f50a5b4a0257fe0ed70bf27ef Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 22 Dec 2023 13:54:12 +0100
Subject: [PATCH 019/102] Add python scripts to contain the interactive code
 blocks from the tutorial

---
 doc/tutorial/create.py |  58 ++++++++++++
 doc/tutorial/edit.py   |  43 +++++++++
 doc/tutorial/ids.py    |  83 +++++++++++++++++
 doc/tutorial/search.py | 200 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 384 insertions(+)
 create mode 100644 doc/tutorial/create.py
 create mode 100644 doc/tutorial/edit.py
 create mode 100644 doc/tutorial/ids.py
 create mode 100644 doc/tutorial/search.py

diff --git a/doc/tutorial/create.py b/doc/tutorial/create.py
new file mode 100644
index 00000000..9a2fc841
--- /dev/null
+++ b/doc/tutorial/create.py
@@ -0,0 +1,58 @@
+# Tutorial / Creating stuff in the ICAT server
+# interactive code blocks
+
+# Creating simple objects
+
+f1 = client.new("Facility")
+f1.name = "Fac1"
+f1.fullName = "Facility 1"
+f1.id = client.create(f1)
+client.search("SELECT f FROM Facility f")
+
+# --------------------
+
+f2 = client.new("Facility", name="Fac2", fullName="Facility 2")
+f2.create()
+client.search("SELECT f FROM Facility f")
+
+# Relationships to other objects
+
+f1 = client.get("Facility", 1)
+
+# --------------------
+
+pt1 = client.new("ParameterType")
+pt1.name = "Test parameter type 1"
+pt1.units = "pct"
+pt1.applicableToDataset = True
+pt1.valueType = "NUMERIC"
+pt1.facility = f1
+pt1.create()
+
+# --------------------
+
+pt2 = client.new("ParameterType")
+pt2.name = "Test parameter type 2"
+pt2.units = "N/A"
+pt2.applicableToDataset = True
+pt2.valueType = "STRING"
+pt2.facility = f1
+for v in ["buono", "brutto", "cattivo"]:
+    psv = client.new("PermissibleStringValue", value=v)
+    pt2.permissibleStringValues.append(psv)
+
+pt2.create()
+
+# --------------------
+
+query = "SELECT pt FROM ParameterType pt INCLUDE pt.facility, pt.permissibleStringValues"
+client.search(query)
+
+# Access rules
+
+publicTables = [ "Application", "DatafileFormat", "DatasetType",
+                 "Facility", "FacilityCycle", "Instrument",
+                 "InvestigationType", "ParameterType",
+                 "PermissibleStringValue", "SampleType", ]
+queries = [ "SELECT o FROM %s o" % t for t in publicTables ]
+client.createRules("R", queries)
diff --git a/doc/tutorial/edit.py b/doc/tutorial/edit.py
new file mode 100644
index 00000000..ca2aacc4
--- /dev/null
+++ b/doc/tutorial/edit.py
@@ -0,0 +1,43 @@
+# Tutorial / Working with objects in the ICAT server
+# interactive code blocks
+
+client.search("SELECT f FROM Facility f")
+
+# Editing the attributes of objects
+
+for facility in client.search("SELECT f FROM Facility f"):
+    facility.description = "An example facility"
+    facility.daysUntilRelease = 1826
+    facility.fullName = "%s Facility" % facility.name
+    client.update(facility)
+
+client.search("SELECT f FROM Facility f")
+
+# --------------------
+
+for facility in client.search("SELECT f FROM Facility f"):
+    facility.description = None
+    facility.update()
+
+client.search("SELECT f FROM Facility f")
+
+# Copying objects
+
+fac = client.get("Facility f INCLUDE f.parameterTypes", 1)
+print(fac)
+
+# --------------------
+
+facc = fac.copy()
+print(facc.name)
+print(facc.parameterTypes[0].name)
+facc.name = "Fac0"
+facc.parameterTypes[0].name = "Test parameter type 0"
+print(fac.name)
+print(fac.parameterTypes[0].name)
+
+# --------------------
+
+fac.truncateRelations()
+print(fac)
+print(facc)
diff --git a/doc/tutorial/ids.py b/doc/tutorial/ids.py
new file mode 100644
index 00000000..84bc12c1
--- /dev/null
+++ b/doc/tutorial/ids.py
@@ -0,0 +1,83 @@
+# Tutorial / Upload and download files to and from IDS
+# interactive code blocks
+
+client.ids.isReadOnly()
+
+# Upload files
+
+users = [("jdoe", "John"), ("nbour", "Nicolas"), ("rbeck", "Rudolph")]
+for user, name in users:
+    with open("greet-%s.txt" % user, "wt") as f:
+        print("Hello %s!" % name, file=f)
+
+# --------------------
+
+from icat.query import Query
+investigation = client.assertedSearch(Query(client, "Investigation", conditions={"name": "= '12100409-ST'"}))[0]
+dataset = client.new("Dataset")
+dataset.investigation = investigation
+dataset.type = client.assertedSearch(Query(client, "DatasetType", conditions={"name": "= 'other'"}))[0]
+dataset.name = "greetings"
+dataset.complete = False
+dataset.create()
+
+# --------------------
+
+df_format = client.assertedSearch(Query(client, "DatafileFormat", conditions={"name": "= 'Text'"}))[0]
+for fname in ("greet-jdoe.txt", "greet-nbour.txt", "greet-rbeck.txt"):
+    datafile = client.new("Datafile", name=fname, dataset=dataset, datafileFormat=df_format)
+    client.putData(fname, datafile)
+
+# Download files
+
+query = Query(client, "Datafile", conditions={"name": "= 'greet-jdoe.txt'", "dataset.name": "= 'greetings'"})
+df = client.assertedSearch(query)[0]
+data = client.getData([df])
+type(data)
+data.read().decode('utf8')
+
+# --------------------
+
+from io import BytesIO
+from zipfile import ZipFile
+query = Query(client, "Dataset", conditions={"name": "= 'greetings'"})
+ds = client.assertedSearch(query)[0]
+data = client.getData([ds])
+buffer = BytesIO(data.read())
+with ZipFile(buffer) as zipfile:
+    for f in zipfile.namelist():
+        print("file name: %s" % f)
+        print("content: %r" % zipfile.open(f).read().decode('utf8'))
+
+# --------------------
+
+from icat.ids import DataSelection
+selection = DataSelection([ds])
+client.ids.archive(selection)
+
+# --------------------
+
+client.ids.getStatus(selection)
+
+# --------------------
+
+data = client.getData([ds])
+
+# --------------------
+
+client.ids.getStatus(selection)
+data = client.getData([ds])
+len(data.read())
+
+# --------------------
+
+preparedId = client.prepareData(selection)
+preparedId
+
+# --------------------
+
+client.isDataPrepared(preparedId)
+data = client.getData(preparedId)
+buffer = BytesIO(data.read())
+with ZipFile(buffer) as zipfile:
+    zipfile.namelist()
diff --git a/doc/tutorial/search.py b/doc/tutorial/search.py
new file mode 100644
index 00000000..4d2d12f4
--- /dev/null
+++ b/doc/tutorial/search.py
@@ -0,0 +1,200 @@
+# Tutorial / Working with objects in the ICAT server
+# interactive code blocks
+
+client.search("SELECT f FROM Facility f INCLUDE f.parameterTypes LIMIT 1,1")
+
+# Building advanced queries
+
+from icat.query import Query
+
+# --------------------
+
+query = Query(client, "Investigation")
+print(query)
+client.search(query)
+
+# --------------------
+
+query = Query(client, "Investigation", conditions={"name": "= '10100601-ST'"})
+print(query)
+client.search(query)
+
+# --------------------
+
+query = Query(client, "Investigation", conditions={"name": "= '10100601-ST'"}, includes=["datasets"])
+print(query)
+client.search(query)
+
+# --------------------
+
+query = Query(client, "Investigation", conditions={"LENGTH(title)": "= 18"})
+print(query)
+client.search(query)
+
+# --------------------
+
+conditions = {
+    "investigation.name": "= '10100601-ST'",
+    "parameters.type.name": "= 'Magnetic field'",
+    "parameters.type.units": "= 'T'",
+    "parameters.numericValue": "> 5.0",
+}
+query = Query(client, "Dataset", conditions=conditions, includes=["parameters.type"])
+print(query)
+client.search(query)
+
+# --------------------
+
+def get_investigation(client, name, visitId=None):
+    query = Query(client, "Investigation")
+    query.addConditions({"name": "= '%s'" % name})
+    if visitId is not None:
+        query.addConditions({"visitId": "= '%s'" % visitId})
+    print(query)
+    return client.assertedSearch(query)[0]
+
+get_investigation(client, "08100122-EF")
+get_investigation(client, "12100409-ST", "1.1-P")
+
+# --------------------
+
+conditions = {
+    "datafileCreateTime": [">= '2012-01-01'", "< '2013-01-01'"]
+}
+query = Query(client, "Datafile", conditions=conditions)
+print(query)
+client.search(query)
+
+# --------------------
+
+query = Query(client, "Datafile")
+query.addConditions({"datafileCreateTime": ">= '2012-01-01'"})
+query.addConditions({"datafileCreateTime": "< '2013-01-01'"})
+print(query)
+
+# --------------------
+
+query = Query(client, "Dataset", attributes="name")
+print(query)
+client.search(query)
+
+# --------------------
+
+query = Query(client, "Dataset", attributes=["investigation.name", "name", "complete", "type.name"])
+print(query)
+client.search(query)
+
+# --------------------
+
+query = Query(client, "Dataset", aggregate="COUNT")
+print(query)
+client.search(query)
+
+# --------------------
+
+conditions = {
+    "dataset.investigation.name": "= '10100601-ST'",
+    "type.name": "= 'Magnetic field'",
+    "type.units": "= 'T'",
+}
+query = Query(client, "DatasetParameter", conditions=conditions, attributes="numericValue")
+print(query)
+client.search(query)
+query.setAggregate("MIN")
+print(query)
+client.search(query)
+query.setAggregate("MAX")
+print(query)
+client.search(query)
+query.setAggregate("AVG")
+print(query)
+client.search(query)
+
+# --------------------
+
+conditions = {
+    "datasets.parameters.type.name": "= 'Magnetic field'",
+    "datasets.parameters.type.units": "= 'T'",
+}
+query = Query(client, "Investigation", conditions=conditions)
+print(query)
+client.search(query)
+
+# --------------------
+
+query.setAggregate("DISTINCT")
+print(query)
+client.search(query)
+
+# --------------------
+
+conditions = {
+    "datasets.parameters.type.name": "= 'Magnetic field'",
+    "datasets.parameters.type.units": "= 'T'",
+}
+query = Query(client, "Investigation", conditions=conditions, aggregate="COUNT")
+print(query)
+client.search(query)
+query.setAggregate("COUNT:DISTINCT")
+print(query)
+client.search(query)
+
+# --------------------
+
+order = ["type.name", "type.units", ("numericValue", "DESC")]
+query = Query(client, "DatasetParameter", includes=["type"], order=order)
+print(query)
+client.search(query)
+
+# --------------------
+
+query = Query(client, "User", conditions={"fullName": "IS NOT NULL"}, order=[("LENGTH(fullName)", "DESC")])
+print(query)
+for user in client.search(query):
+    print("%d: %s" % (len(user.fullName), user.fullName))
+
+# --------------------
+
+query = Query(client, "Dataset", order=[("endDate", "DESC")], limit=(2, 1))
+print(query)
+client.search(query)
+
+# Useful search methods
+
+res = client.search(Query(client, "Facility"))
+if not res:
+    raise RuntimeError("Facility not found")
+elif len(res) > 1:
+    raise RuntimeError("Facility not unique")
+
+facility = res[0]
+facility = client.assertedSearch(Query(client, "Facility"))[0]
+
+# --------------------
+
+for ds in client.searchChunked(Query(client, "Dataset")):
+    # do something useful with the dataset ds ...
+    print(ds.name)
+
+# --------------------
+
+def get_dataset(client, inv_name, ds_name, ds_type="raw"):
+    """Get a dataset in an investigation.
+    If it already exists, search and return it, create it, if not.
+    """
+    try:
+        dataset = client.new("Dataset")
+        query = Query(client, "Investigation", conditions={
+            "name": "= '%s'" % inv_name
+        })
+        dataset.investigation = client.assertedSearch(query)[0]
+        query = Query(client, "DatasetType", conditions={
+            "name": "= '%s'" % ds_type
+        })
+        dataset.type = client.assertedSearch(query)[0]
+        dataset.complete = False
+        dataset.name = ds_name
+        dataset.create()
+    except icat.ICATObjectExistsError:
+        dataset = client.searchMatching(dataset)
+    return dataset

From 4e3bbe39a03abbde2ec3970285f397b27a34deb7 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 22 Dec 2023 14:52:12 +0100
Subject: [PATCH 020/102] Fix overly long lines in interactive tutorial
 examples

---
 doc/src/tutorial-create.rst |  3 ++-
 doc/src/tutorial-ids.rst    | 19 ++++++++++++++-----
 doc/src/tutorial-search.rst | 30 +++++++++++++++++++++---------
 doc/tutorial/create.py      |  3 ++-
 doc/tutorial/ids.py         | 19 ++++++++++++++-----
 doc/tutorial/search.py      | 30 +++++++++++++++++++++---------
 6 files changed, 74 insertions(+), 30 deletions(-)

diff --git a/doc/src/tutorial-create.rst b/doc/src/tutorial-create.rst
index c6c56ea8..07977db1 100644
--- a/doc/src/tutorial-create.rst
+++ b/doc/src/tutorial-create.rst
@@ -132,7 +132,8 @@ created together with the ``ParameterType`` object.
 
 We can verify this by searching for the newly created objects::
 
-  >>> query = "SELECT pt FROM ParameterType pt INCLUDE pt.facility, pt.permissibleStringValues"
+  >>> query = ("SELECT pt FROM ParameterType pt "
+  ...          "INCLUDE pt.facility, pt.permissibleStringValues")
   >>> client.search(query)
   [(parameterType){
      createId = "simple/root"
diff --git a/doc/src/tutorial-ids.rst b/doc/src/tutorial-ids.rst
index 0ce2748c..c71d221e 100644
--- a/doc/src/tutorial-ids.rst
+++ b/doc/src/tutorial-ids.rst
@@ -54,10 +54,12 @@ We need a dataset in ICAT that the uploaded files should be put into,
 so let's create one::
 
   >>> from icat.query import Query
-  >>> investigation = client.assertedSearch(Query(client, "Investigation", conditions={"name": "= '12100409-ST'"}))[0]
+  >>> query = Query(client, "Investigation", conditions={"name": "= '12100409-ST'"})
+  >>> investigation = client.assertedSearch(query)[0]
   >>> dataset = client.new("Dataset")
   >>> dataset.investigation = investigation
-  >>> dataset.type = client.assertedSearch(Query(client, "DatasetType", conditions={"name": "= 'other'"}))[0]
+  >>> query = Query(client, "DatasetType", conditions={"name": "= 'other'"})
+  >>> dataset.type = client.assertedSearch(query)[0]
   >>> dataset.name = "greetings"
   >>> dataset.complete = False
   >>> dataset.create()
@@ -65,9 +67,13 @@ so let's create one::
 For each of the files, we create a new datafile object and call the
 :meth:`~icat.client.Client.putData` method to upload it::
 
-  >>> df_format = client.assertedSearch(Query(client, "DatafileFormat", conditions={"name": "= 'Text'"}))[0]
+  >>> query = Query(client, "DatafileFormat", conditions={"name": "= 'Text'"})
+  >>> df_format = client.assertedSearch(query)[0]
   >>> for fname in ("greet-jdoe.txt", "greet-nbour.txt", "greet-rbeck.txt"):
-  ...     datafile = client.new("Datafile", name=fname, dataset=dataset, datafileFormat=df_format)
+  ...     datafile = client.new("Datafile",
+  ...                           name=fname,
+  ...                           dataset=dataset,
+  ...                           datafileFormat=df_format)
   ...     client.putData(fname, datafile)
   ...
   (datafile){
@@ -125,7 +131,10 @@ Download files
 We can request a download of a set of data using the
 :meth:`~icat.client.Client.getData` method::
 
-  >>> query = Query(client, "Datafile", conditions={"name": "= 'greet-jdoe.txt'", "dataset.name": "= 'greetings'"})
+  >>> query = Query(client, "Datafile", conditions={
+  ...     "name": "= 'greet-jdoe.txt'",
+  ...     "dataset.name": "= 'greetings'"
+  ... })
   >>> df = client.assertedSearch(query)[0]
   >>> data = client.getData([df])
   >>> type(data)
diff --git a/doc/src/tutorial-search.rst b/doc/src/tutorial-search.rst
index ed9843ae..9d1c5fec 100644
--- a/doc/src/tutorial-search.rst
+++ b/doc/src/tutorial-search.rst
@@ -122,7 +122,8 @@ appropriate condition.  The `conditions` argument to
 :class:`~icat.query.Query` should be a mapping of attribute names to
 conditions on that attribute::
 
-  >>> query = Query(client, "Investigation", conditions={"name": "= '10100601-ST'"})
+  >>> query = Query(client, "Investigation",
+  ...               conditions={"name": "= '10100601-ST'"})
   >>> print(query)
   SELECT o FROM Investigation o WHERE o.name = '10100601-ST'
   >>> client.search(query)
@@ -144,7 +145,9 @@ conditions on that attribute::
 
 We may also include related objects in the search results::
 
-  >>> query = Query(client, "Investigation", conditions={"name": "= '10100601-ST'"}, includes=["datasets"])
+  >>> query = Query(client, "Investigation",
+  ...               conditions={"name": "= '10100601-ST'"},
+  ...               includes=["datasets"])
   >>> print(query)
   SELECT o FROM Investigation o WHERE o.name = '10100601-ST' INCLUDE o.datasets
   >>> client.search(query)
@@ -208,7 +211,8 @@ python-icat supports the use of some JPQL functions when specifying
 which attribute a condition should be applied to.  Consider the
 following query::
 
-  >>> query = Query(client, "Investigation", conditions={"LENGTH(title)": "= 18"})
+  >>> query = Query(client, "Investigation",
+  ...               conditions={"LENGTH(title)": "= 18"})
   >>> print(query)
   SELECT o FROM Investigation o WHERE LENGTH(o.title) = 18
   >>> client.search(query)
@@ -253,7 +257,8 @@ field larger then 5 Tesla and include its parameters in the result::
   ...     "parameters.type.units": "= 'T'",
   ...     "parameters.numericValue": "> 5.0",
   ... }
-  >>> query = Query(client, "Dataset", conditions=conditions, includes=["parameters.type"])
+  >>> query = Query(client, "Dataset",
+  ...               conditions=conditions, includes=["parameters.type"])
   >>> print(query)
   SELECT o FROM Dataset o JOIN o.investigation AS i JOIN o.parameters AS p JOIN p.type AS pt WHERE i.name = '10100601-ST' AND p.numericValue > 5.0 AND pt.name = 'Magnetic field' AND pt.units = 'T' INCLUDE o.parameters AS p, p.type
   >>> client.search(query)
@@ -456,7 +461,9 @@ multiple attributes at once.  The result will be a tuple of attribute
 values rather then a single value for each object found in the query.
 This requires an ICAT server version 4.11 or newer though::
 
-  >>> query = Query(client, "Dataset", attributes=["investigation.name", "name", "complete", "type.name"])
+  >>> query = Query(client, "Dataset", attributes=[
+  ...     "investigation.name", "name", "complete", "type.name"
+  ... ])
   >>> print(query)
   SELECT i.name, o.name, o.complete, t.name FROM Dataset o JOIN o.investigation AS i JOIN o.type AS t
   >>> client.search(query)
@@ -485,7 +492,8 @@ average magnetic field applied in the measurements::
   ...     "type.name": "= 'Magnetic field'",
   ...     "type.units": "= 'T'",
   ... }
-  >>> query = Query(client, "DatasetParameter", conditions=conditions, attributes="numericValue")
+  >>> query = Query(client, "DatasetParameter",
+  ...               conditions=conditions, attributes="numericValue")
   >>> print(query)
   SELECT o.numericValue FROM DatasetParameter o JOIN o.dataset AS ds JOIN ds.investigation AS i JOIN o.type AS t WHERE i.name = '10100601-ST' AND t.name = 'Magnetic field' AND t.units = 'T'
   >>> client.search(query)
@@ -578,7 +586,8 @@ make sure not to count the same object more then once::
   ...     "datasets.parameters.type.name": "= 'Magnetic field'",
   ...     "datasets.parameters.type.units": "= 'T'",
   ... }
-  >>> query = Query(client, "Investigation", conditions=conditions, aggregate="COUNT")
+  >>> query = Query(client, "Investigation",
+  ...               conditions=conditions, aggregate="COUNT")
   >>> print(query)
   SELECT COUNT(o) FROM Investigation o JOIN o.datasets AS s1 JOIN s1.parameters AS s2 JOIN s2.type AS s3 WHERE s3.name = 'Magnetic field' AND s3.units = 'T'
   >>> client.search(query)
@@ -761,7 +770,9 @@ in the `order` argument to :class:`~icat.query.Query`.  Let's search
 for user sorted by the length of their name, from longest to
 shortest::
 
-  >>> query = Query(client, "User", conditions={"fullName": "IS NOT NULL"}, order=[("LENGTH(fullName)", "DESC")])
+  >>> query = Query(client, "User", conditions={
+  ...     "fullName": "IS NOT NULL"
+  ... }, order=[("LENGTH(fullName)", "DESC")])
   >>> print(query)
   SELECT o FROM User o WHERE o.fullName IS NOT NULL ORDER BY LENGTH(o.fullName) DESC
   >>> for user in client.search(query):
@@ -782,7 +793,8 @@ shortest::
 We may limit the number of returned items.  Search for the third to
 last dataset to have been finished::
 
-  >>> query = Query(client, "Dataset", order=[("endDate", "DESC")], limit=(2, 1))
+  >>> query = Query(client, "Dataset",
+  ...               order=[("endDate", "DESC")], limit=(2, 1))
   >>> print(query)
   SELECT o FROM Dataset o ORDER BY o.endDate DESC LIMIT 2, 1
   >>> client.search(query)
diff --git a/doc/tutorial/create.py b/doc/tutorial/create.py
index 9a2fc841..c6ad80f0 100644
--- a/doc/tutorial/create.py
+++ b/doc/tutorial/create.py
@@ -45,7 +45,8 @@
 
 # --------------------
 
-query = "SELECT pt FROM ParameterType pt INCLUDE pt.facility, pt.permissibleStringValues"
+query = ("SELECT pt FROM ParameterType pt "
+         "INCLUDE pt.facility, pt.permissibleStringValues")
 client.search(query)
 
 # Access rules
diff --git a/doc/tutorial/ids.py b/doc/tutorial/ids.py
index 84bc12c1..f3156039 100644
--- a/doc/tutorial/ids.py
+++ b/doc/tutorial/ids.py
@@ -13,24 +13,33 @@
 # --------------------
 
 from icat.query import Query
-investigation = client.assertedSearch(Query(client, "Investigation", conditions={"name": "= '12100409-ST'"}))[0]
+query = Query(client, "Investigation", conditions={"name": "= '12100409-ST'"})
+investigation = client.assertedSearch(query)[0]
 dataset = client.new("Dataset")
 dataset.investigation = investigation
-dataset.type = client.assertedSearch(Query(client, "DatasetType", conditions={"name": "= 'other'"}))[0]
+query = Query(client, "DatasetType", conditions={"name": "= 'other'"})
+dataset.type = client.assertedSearch(query)[0]
 dataset.name = "greetings"
 dataset.complete = False
 dataset.create()
 
 # --------------------
 
-df_format = client.assertedSearch(Query(client, "DatafileFormat", conditions={"name": "= 'Text'"}))[0]
+query = Query(client, "DatafileFormat", conditions={"name": "= 'Text'"})
+df_format = client.assertedSearch(query)[0]
 for fname in ("greet-jdoe.txt", "greet-nbour.txt", "greet-rbeck.txt"):
-    datafile = client.new("Datafile", name=fname, dataset=dataset, datafileFormat=df_format)
+    datafile = client.new("Datafile",
+                          name=fname,
+                          dataset=dataset,
+                          datafileFormat=df_format)
     client.putData(fname, datafile)
 
 # Download files
 
-query = Query(client, "Datafile", conditions={"name": "= 'greet-jdoe.txt'", "dataset.name": "= 'greetings'"})
+query = Query(client, "Datafile", conditions={
+    "name": "= 'greet-jdoe.txt'",
+    "dataset.name": "= 'greetings'"
+})
 df = client.assertedSearch(query)[0]
 data = client.getData([df])
 type(data)
diff --git a/doc/tutorial/search.py b/doc/tutorial/search.py
index 4d2d12f4..a697581e 100644
--- a/doc/tutorial/search.py
+++ b/doc/tutorial/search.py
@@ -15,19 +15,23 @@
 
 # --------------------
 
-query = Query(client, "Investigation", conditions={"name": "= '10100601-ST'"})
+query = Query(client, "Investigation",
+              conditions={"name": "= '10100601-ST'"})
 print(query)
 client.search(query)
 
 # --------------------
 
-query = Query(client, "Investigation", conditions={"name": "= '10100601-ST'"}, includes=["datasets"])
+query = Query(client, "Investigation",
+              conditions={"name": "= '10100601-ST'"},
+              includes=["datasets"])
 print(query)
 client.search(query)
 
 # --------------------
 
-query = Query(client, "Investigation", conditions={"LENGTH(title)": "= 18"})
+query = Query(client, "Investigation",
+              conditions={"LENGTH(title)": "= 18"})
 print(query)
 client.search(query)
 
@@ -39,7 +43,8 @@
     "parameters.type.units": "= 'T'",
     "parameters.numericValue": "> 5.0",
 }
-query = Query(client, "Dataset", conditions=conditions, includes=["parameters.type"])
+query = Query(client, "Dataset",
+              conditions=conditions, includes=["parameters.type"])
 print(query)
 client.search(query)
 
@@ -80,7 +85,9 @@ def get_investigation(client, name, visitId=None):
 
 # --------------------
 
-query = Query(client, "Dataset", attributes=["investigation.name", "name", "complete", "type.name"])
+query = Query(client, "Dataset", attributes=[
+    "investigation.name", "name", "complete", "type.name"
+])
 print(query)
 client.search(query)
 
@@ -97,7 +104,8 @@ def get_investigation(client, name, visitId=None):
     "type.name": "= 'Magnetic field'",
     "type.units": "= 'T'",
 }
-query = Query(client, "DatasetParameter", conditions=conditions, attributes="numericValue")
+query = Query(client, "DatasetParameter",
+              conditions=conditions, attributes="numericValue")
 print(query)
 client.search(query)
 query.setAggregate("MIN")
@@ -132,7 +140,8 @@ def get_investigation(client, name, visitId=None):
     "datasets.parameters.type.name": "= 'Magnetic field'",
     "datasets.parameters.type.units": "= 'T'",
 }
-query = Query(client, "Investigation", conditions=conditions, aggregate="COUNT")
+query = Query(client, "Investigation",
+              conditions=conditions, aggregate="COUNT")
 print(query)
 client.search(query)
 query.setAggregate("COUNT:DISTINCT")
@@ -148,14 +157,17 @@ def get_investigation(client, name, visitId=None):
 
 # --------------------
 
-query = Query(client, "User", conditions={"fullName": "IS NOT NULL"}, order=[("LENGTH(fullName)", "DESC")])
+query = Query(client, "User", conditions={
+    "fullName": "IS NOT NULL"
+}, order=[("LENGTH(fullName)", "DESC")])
 print(query)
 for user in client.search(query):
     print("%d: %s" % (len(user.fullName), user.fullName))
 
 # --------------------
 
-query = Query(client, "Dataset", order=[("endDate", "DESC")], limit=(2, 1))
+query = Query(client, "Dataset",
+              order=[("endDate", "DESC")], limit=(2, 1))
 print(query)
 client.search(query)
 

From 62aec073186c7db909cd2c4342dd34300bc95539 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 2 Jan 2024 11:02:06 +0100
Subject: [PATCH 021/102] Drop spurious markers setting in pytest.ini

---
 tests/pytest.ini | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/pytest.ini b/tests/pytest.ini
index b52d7707..a6d9be1f 100644
--- a/tests/pytest.ini
+++ b/tests/pytest.ini
@@ -1,4 +1,2 @@
 [pytest]
 minversion = 3.1.0
-markers =
-    dependency: mark dependencies between tests.
\ No newline at end of file

From 3725120c9b079092916087a92db7caffe8352bdd Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 2 Jan 2024 11:29:05 +0100
Subject: [PATCH 022/102] Rename the '__version__' attribute in _meta.py to
 'version'

---
 doc/src/conf.py | 2 +-
 setup.py        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/src/conf.py b/doc/src/conf.py
index f41f3773..a7603db4 100644
--- a/doc/src/conf.py
+++ b/doc/src/conf.py
@@ -23,7 +23,7 @@
 author = 'Rolf Krahl'
 
 # The full version, including alpha/beta/rc tags
-release = _meta.__version__
+release = _meta.version
 # The short X.Y version
 version = ".".join(release.split(".")[0:2])
 
diff --git a/setup.py b/setup.py
index ac2f897d..ef5133b9 100755
--- a/setup.py
+++ b/setup.py
@@ -30,7 +30,7 @@
 except (ImportError, LookupError):
     try:
         import _meta
-        version = _meta.__version__
+        version = _meta.version
     except ImportError:
         log.warn("warning: cannot determine version number")
         version = "UNKNOWN"
@@ -61,7 +61,7 @@ class meta(setuptools.Command):
 from icat.exception import *
 '''
     meta_template = '''
-__version__ = "%(version)s"
+version = "%(version)s"
 '''
 
     def initialize_options(self):

From cf5381322350337b34240d5df16092f613f1dd86 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 2 Jan 2024 11:47:24 +0100
Subject: [PATCH 023/102] - Drop setuptools_scm in favour of git-props -
 Explicitely require setuptools - Explicitely point to the latest release
 rather than to "latest" in   Download and Changes project url

---
 doc/src/install.rst | 14 +++++++-------
 setup.py            | 20 ++++++++++++--------
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/doc/src/install.rst b/doc/src/install.rst
index 7b200458..78fd935d 100644
--- a/doc/src/install.rst
+++ b/doc/src/install.rst
@@ -52,13 +52,13 @@ are not required to install python-icat and use its core features:
   tests will be skipped in that case, so the results will not be very
   meaningful.
 
-+ `setuptools_scm`_
++ `git-props`_
 
-  The version number is managed using this package.  All source
-  distributions add a static text file with the version number and
-  fall back using that if setuptools_scm is not available.  So this
-  package is only needed to build out of the plain development source
-  tree as cloned from GitHub.
+  This package is used to extract some metadata such as the version
+  number out of git, the version control system.  All releases embed
+  that metadata in the distribution.  So this package is only needed
+  to build out of the plain development source tree as cloned from
+  GitHub, but not to build a release distribution.
 
 + `pytest`_ >= 3.1.0
 
@@ -187,7 +187,7 @@ will just get many skipped tests then.
 .. _PyYAML: https://github.com/yaml/pyyaml/
 .. _lxml: https://lxml.de/
 .. _Requests: https://requests.readthedocs.io/
-.. _setuptools_scm: https://github.com/pypa/setuptools_scm/
+.. _git-props: https://github.com/RKrahl/git-props/
 .. _pytest: https://docs.pytest.org/en/latest/
 .. _pytest-dependency: https://pypi.org/project/pytest-dependency/
 .. _distutils-pytest: https://github.com/RKrahl/distutils-pytest/
diff --git a/setup.py b/setup.py
index ef5133b9..3158cab7 100755
--- a/setup.py
+++ b/setup.py
@@ -25,15 +25,15 @@
 except (ImportError, AttributeError):
     cmdclass = dict()
 try:
-    import setuptools_scm
-    version = setuptools_scm.get_version()
+    import gitprops
+    release = str(gitprops.get_last_release())
+    version = str(gitprops.get_version())
 except (ImportError, LookupError):
     try:
-        import _meta
-        version = _meta.version
+        from _meta import release, version
     except ImportError:
         log.warn("warning: cannot determine version number")
-        version = "UNKNOWN"
+        release = version = "UNKNOWN"
 
 
 if sys.version_info < (3, 4):
@@ -61,6 +61,7 @@ class meta(setuptools.Command):
 from icat.exception import *
 '''
     meta_template = '''
+release = "%(release)s"
 version = "%(version)s"
 '''
 
@@ -77,6 +78,7 @@ def run(self):
         version = self.distribution.get_version()
         log.info("version: %s", version)
         values = {
+            'release': release,
             'version': version,
             'doc': docstring,
         }
@@ -171,7 +173,7 @@ def run(self):
 # one particular suds clone.  Therefore, we first try if (any clone
 # of) suds is already installed and only add suds to install_requires
 # if not.
-requires = ["lxml", "packaging"]
+requires = ["setuptools", "lxml", "packaging"]
 try:
     import suds
 except ImportError:
@@ -210,8 +212,10 @@ def run(self):
     project_urls = dict(
         Documentation="https://python-icat.readthedocs.io/",
         Source="https://github.com/icatproject/python-icat/",
-        Download="https://github.com/icatproject/python-icat/releases/latest",
-        Changes="https://python-icat.readthedocs.io/en/stable/changelog.html",
+        Download=("https://github.com/icatproject/python-icat/releases/%s/"
+                  % release),
+        Changes=("https://python-icat.readthedocs.io/en/%s/changelog.html"
+                 % release),
     ),
     packages = ["icat"],
     python_requires = ">=3.4",

From 6da785d40adb9a59ecb82301a52ff82e8342463e Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 2 Jan 2024 12:38:06 +0100
Subject: [PATCH 024/102] Use relative imports between modules in the icat
 package

---
 icat/client.py        | 16 ++++++++--------
 icat/config.py        |  7 ++++---
 icat/dump_queries.py  |  3 +--
 icat/dumpfile.py      |  7 ++++---
 icat/dumpfile_xml.py  | 19 ++++++++++---------
 icat/dumpfile_yaml.py | 17 +++++++++--------
 icat/entities.py      |  6 +++---
 icat/entity.py        |  7 ++++---
 icat/eval.py          |  5 ++---
 icat/ids.py           |  8 ++++----
 icat/ingest.py        |  7 ++++---
 icat/query.py         |  7 ++++---
 12 files changed, 57 insertions(+), 52 deletions(-)

diff --git a/icat/client.py b/icat/client.py
index 144b24a2..2a0d8674 100644
--- a/icat/client.py
+++ b/icat/client.py
@@ -17,14 +17,14 @@
 import suds.client
 import suds.sudsobject
 
-from icat.entities import getTypeMap
-from icat.entity import Entity
-from icat.exception import *
-from icat.helper import (Version, simpleqp_unquote, parse_attr_val,
-                         ms_timestamp, disable_logger)
-from icat.ids import *
-from icat.query import Query
-from icat.sslcontext import create_ssl_context, HTTPSTransport
+from .entities import getTypeMap
+from .entity import Entity
+from .exception import *
+from .helper import (Version, simpleqp_unquote, parse_attr_val,
+                     ms_timestamp, disable_logger)
+from .ids import *
+from .query import Query
+from .sslcontext import create_ssl_context, HTTPSTransport
 
 __all__ = ['Client']
 
diff --git a/icat/config.py b/icat/config.py
index 9554c615..f3187df5 100644
--- a/icat/config.py
+++ b/icat/config.py
@@ -8,9 +8,10 @@
 from pathlib import Path
 import sys
 import warnings
-from icat.client import Client
-from icat.authinfo import AuthenticatorInfo, LegacyAuthenticatorInfo
-from icat.exception import ConfigError, VersionMethodError
+
+from .client import Client
+from .authinfo import AuthenticatorInfo, LegacyAuthenticatorInfo
+from .exception import ConfigError, VersionMethodError
 
 __all__ = ['boolean', 'flag', 'Configuration', 'Config']
 
diff --git a/icat/dump_queries.py b/icat/dump_queries.py
index 6bbf7ce2..7c8aa53b 100644
--- a/icat/dump_queries.py
+++ b/icat/dump_queries.py
@@ -32,8 +32,7 @@
     into account and include the new entity types.
 """
 
-import icat
-from icat.query import Query
+from .query import Query
 
 __all__ = [ 'getAuthQueries', 'getStaticQueries', 'getFundingQueries',
             'getInvestigationQueries', 'getDataCollectionQueries',
diff --git a/icat/dumpfile.py b/icat/dumpfile.py
index ca77de96..099f4364 100644
--- a/icat/dumpfile.py
+++ b/icat/dumpfile.py
@@ -50,8 +50,9 @@
 from collections import ChainMap
 import os
 import sys
-import icat
-from icat.query import Query
+
+from .entity import Entity
+from .query import Query
 
 
 def _get_retain_entities(client):
@@ -293,7 +294,7 @@ def writeobjs(self, objs, keyindex, chunksize=100):
         """
         if isinstance(objs, Query) or isinstance(objs, str):
             objs = self.client.searchChunked(objs, chunksize=chunksize)
-        for obj in sorted(objs, key=icat.entity.Entity.__sortkey__):
+        for obj in sorted(objs, key=Entity.__sortkey__):
             # Entities without a constraint will use their id to form
             # the unique key as a last resort.  But we want the keys
             # not to depend on volatile attributes such as the id.
diff --git a/icat/dumpfile_xml.py b/icat/dumpfile_xml.py
index db676b13..4d34287c 100644
--- a/icat/dumpfile_xml.py
+++ b/icat/dumpfile_xml.py
@@ -5,9 +5,11 @@
 import os
 import sys
 from lxml import etree
-import icat
-import icat.dumpfile
-from icat.query import Query
+
+from . import __version__
+from .dumpfile import DumpFileReader, DumpFileWriter, register_backend
+from .entity import Entity
+from .query import Query
 
 utc = datetime.timezone.utc
 
@@ -16,7 +18,7 @@
 # XMLDumpFileReader
 # ------------------------------------------------------------
 
-class XMLDumpFileReader(icat.dumpfile.DumpFileReader):
+class XMLDumpFileReader(DumpFileReader):
     """Backend for reading ICAT data from a XML file.
 
     :param client: a client object configured to connect to the ICAT
@@ -138,7 +140,7 @@ def getobjs_from_data(self, data, objindex):
 # XMLDumpFileWriter
 # ------------------------------------------------------------
 
-class XMLDumpFileWriter(icat.dumpfile.DumpFileWriter):
+class XMLDumpFileWriter(DumpFileWriter):
     """Backend for writing ICAT data to a XML file.
 
     :param client: a client object configured to connect to the ICAT
@@ -200,8 +202,7 @@ def _entity2elem(self, obj, tag, keyindex):
                 k = o.getUniqueKey(keyindex=keyindex)
                 etree.SubElement(d, attr, ref=k)
         for attr in sorted(obj.InstMRel):
-            for o in sorted(getattr(obj, attr), 
-                            key=icat.entity.Entity.__sortkey__):
+            for o in sorted(getattr(obj, attr), key=Entity.__sortkey__):
                 d.append(self._entity2elem(o, tag=attr, keyindex=keyindex))
         return d
 
@@ -213,7 +214,7 @@ def head(self):
         etree.SubElement(head, "service").text = self.client.url
         etree.SubElement(head, "apiversion").text = str(self.client.apiversion)
         etree.SubElement(head, "generator").text = ("icatdump (python-icat %s)" 
-                                                    % icat.__version__)
+                                                    % __version__)
         self.outfile.write(b"""<?xml version="1.0" encoding="utf-8"?>
 <icatdata>
 """)
@@ -241,4 +242,4 @@ def finalize(self):
         self.outfile.write(b"</icatdata>\n")
 
 
-icat.dumpfile.register_backend("XML", XMLDumpFileReader, XMLDumpFileWriter)
+register_backend("XML", XMLDumpFileReader, XMLDumpFileWriter)
diff --git a/icat/dumpfile_yaml.py b/icat/dumpfile_yaml.py
index 091ae537..1321041f 100644
--- a/icat/dumpfile_yaml.py
+++ b/icat/dumpfile_yaml.py
@@ -3,8 +3,10 @@
 
 import datetime
 import yaml
-import icat
-import icat.dumpfile
+
+from . import __version__
+from .dumpfile import DumpFileReader, DumpFileWriter, register_backend
+from .entity import Entity
 
 utc = datetime.timezone.utc
 
@@ -69,7 +71,7 @@
 # YAMLDumpFileReader
 # ------------------------------------------------------------
 
-class YAMLDumpFileReader(icat.dumpfile.DumpFileReader):
+class YAMLDumpFileReader(DumpFileReader):
     """Backend for reading ICAT data from a YAML file.
 
     :param client: a client object configured to connect to the ICAT
@@ -138,7 +140,7 @@ def getobjs_from_data(self, data, objindex):
 # YAMLDumpFileWriter
 # ------------------------------------------------------------
 
-class YAMLDumpFileWriter(icat.dumpfile.DumpFileWriter):
+class YAMLDumpFileWriter(DumpFileWriter):
     """Backend for writing ICAT data to a YAML file.
 
     :param client: a client object configured to connect to the ICAT
@@ -190,8 +192,7 @@ def _entity2dict(self, obj, keyindex):
         for attr in obj.InstMRel:
             if len(getattr(obj, attr)) > 0:
                 d[attr] = []
-                for o in sorted(getattr(obj, attr), 
-                                key=icat.entity.Entity.__sortkey__):
+                for o in sorted(getattr(obj, attr), key=Entity.__sortkey__):
                     d[attr].append(self._entity2dict(o, keyindex=keyindex))
         return d
 
@@ -204,7 +205,7 @@ def head(self):
 # Service: %s
 # ICAT-API: %s
 # Generator: icatdump (python-icat %s)
-""" % (date, self.client.url, self.client.apiversion, icat.__version__)
+""" % (date, self.client.url, self.client.apiversion, __version__)
         self.outfile.write(head)
 
     def startdata(self):
@@ -232,4 +233,4 @@ def finalize(self):
         self.startdata()
 
 
-icat.dumpfile.register_backend("YAML", YAMLDumpFileReader, YAMLDumpFileWriter)
+register_backend("YAML", YAMLDumpFileReader, YAMLDumpFileWriter)
diff --git a/icat/entities.py b/icat/entities.py
index c6b90753..7761aeef 100644
--- a/icat/entities.py
+++ b/icat/entities.py
@@ -17,8 +17,9 @@
 """
 
 import itertools
-from icat.entity import Entity
-from icat.exception import InternalError
+
+from .entity import Entity
+from .exception import InternalError
 
 
 class GroupingMixin:
@@ -241,7 +242,6 @@ def getTypeMap(client):
         may be used as :attr:`icat.client.Client.typemap` for the
         client object.
     :rtype: :class:`dict`
-
     """
     def addType(typemap, cls):
         instanceName = cls.getInstanceName()
diff --git a/icat/entity.py b/icat/entity.py
index 71dd894d..8a778fe1 100644
--- a/icat/entity.py
+++ b/icat/entity.py
@@ -4,9 +4,10 @@
 import re
 from warnings import warn
 import suds.sudsobject
-from icat.listproxy import ListProxy
-from icat.exception import InternalError, EntityTypeError, DataConsistencyError
-from icat.helper import simpleqp_quote
+
+from .listproxy import ListProxy
+from .exception import InternalError, EntityTypeError, DataConsistencyError
+from .helper import simpleqp_quote
 
 __all__ = ['Entity']
 
diff --git a/icat/eval.py b/icat/eval.py
index d21e9df4..b5acba43 100644
--- a/icat/eval.py
+++ b/icat/eval.py
@@ -12,14 +12,13 @@
 """
 
 import logging
-import icat
-import icat.config
+from .config import Config
 
 if __name__ == "__main__":
 
     logging.basicConfig(level=logging.INFO)
 
-    config = icat.config.Config(ids="optional")
+    config = Config(ids="optional")
     config.add_variable('expression', ("-e", "--eval"), 
                         dict(help="Python expression to evaluate"))
     client, conf = config.getconfig()
diff --git a/icat/ids.py b/icat/ids.py
index 8df49d4a..42ee136f 100644
--- a/icat/ids.py
+++ b/icat/ids.py
@@ -20,16 +20,16 @@
 from urllib.request import build_opener
 import zlib
 
-from icat.entity import Entity
-from icat.exception import *
-from icat.helper import Version
+from .entity import Entity
+from .exception import *
+from .helper import Version
 
 # For Python versions older then 3.6.0b1, the standard library does
 # not support sending the body using chunked transfer encoding.  Need
 # to replace the HTTPHandler with our modified versions from
 # icat.chunkedhttp in this case.
 if sys.version_info < (3, 6, 0, 'beta'):
-    from icat.chunkedhttp import HTTPHandler, HTTPSHandler
+    from .chunkedhttp import HTTPHandler, HTTPSHandler
 else:
     from urllib.request import HTTPHandler, HTTPSHandler
 
diff --git a/icat/ingest.py b/icat/ingest.py
index 9c7ddbf1..57f15648 100644
--- a/icat/ingest.py
+++ b/icat/ingest.py
@@ -10,11 +10,12 @@
 
 from pathlib import Path
 from lxml import etree
-import icat.dumpfile_xml
-from icat.exception import InvalidIngestFileError
 
+from .dumpfile_xml import XMLDumpFileReader
+from .exception import InvalidIngestFileError
 
-class IngestReader(icat.dumpfile_xml.XMLDumpFileReader):
+
+class IngestReader(XMLDumpFileReader):
     """Read metadata from XML ingest files into ICAT.
 
     The input file may contain one or more datasets and related
diff --git a/icat/query.py b/icat/query.py
index ccbe7c1e..975cc2e8 100644
--- a/icat/query.py
+++ b/icat/query.py
@@ -5,8 +5,9 @@
 import re
 from warnings import warn
 from collections.abc import Mapping
-import icat.entity
-from icat.exception import *
+
+from .entity import Entity
+from .exception import *
 
 __all__ = ['Query']
 
@@ -119,7 +120,7 @@ def __init__(self, client, entity,
 
         if isinstance(entity, str):
             self.entity = self.client.getEntityClass(entity)
-        elif issubclass(entity, icat.entity.Entity):
+        elif issubclass(entity, Entity):
             if (entity in self.client.typemap.values() and
                 entity.BeanName is not None):
                 self.entity = entity

From 9a289c2d9f1beca19813991beb9f4351bde7d05b Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 2 Jan 2024 13:11:39 +0100
Subject: [PATCH 025/102] - Move source files into src directory - Do not auto
 generate __init__.py - Add a static __init__.py that imports version from
 _meta

---
 .gitignore                                 |  1 -
 Makefile                                   | 11 +++---
 doc/src/conf.py                            |  7 ++--
 setup.py                                   | 39 ++++++----------------
 src/icat/__init__.py                       | 13 ++++++++
 {icat => src/icat}/authinfo.py             |  0
 {icat => src/icat}/chunkedhttp.py          |  0
 {icat => src/icat}/client.py               |  0
 {icat => src/icat}/config.py               |  0
 {icat => src/icat}/dump_queries.py         |  0
 {icat => src/icat}/dumpfile.py             |  0
 {icat => src/icat}/dumpfile_xml.py         |  0
 {icat => src/icat}/dumpfile_yaml.py        |  0
 {icat => src/icat}/entities.py             |  0
 {icat => src/icat}/entity.py               |  0
 {icat => src/icat}/eval.py                 |  0
 {icat => src/icat}/exception.py            |  0
 {icat => src/icat}/helper.py               |  0
 {icat => src/icat}/ids.py                  |  0
 {icat => src/icat}/ingest.py               |  0
 {icat => src/icat}/listproxy.py            |  0
 {icat => src/icat}/query.py                |  0
 {icat => src/icat}/sslcontext.py           |  0
 icatdump.py => src/scripts/icatdump.py     |  0
 icatingest.py => src/scripts/icatingest.py |  0
 wipeicat.py => src/scripts/wipeicat.py     |  0
 26 files changed, 33 insertions(+), 38 deletions(-)
 create mode 100644 src/icat/__init__.py
 rename {icat => src/icat}/authinfo.py (100%)
 rename {icat => src/icat}/chunkedhttp.py (100%)
 rename {icat => src/icat}/client.py (100%)
 rename {icat => src/icat}/config.py (100%)
 rename {icat => src/icat}/dump_queries.py (100%)
 rename {icat => src/icat}/dumpfile.py (100%)
 rename {icat => src/icat}/dumpfile_xml.py (100%)
 rename {icat => src/icat}/dumpfile_yaml.py (100%)
 rename {icat => src/icat}/entities.py (100%)
 rename {icat => src/icat}/entity.py (100%)
 rename {icat => src/icat}/eval.py (100%)
 rename {icat => src/icat}/exception.py (100%)
 rename {icat => src/icat}/helper.py (100%)
 rename {icat => src/icat}/ids.py (100%)
 rename {icat => src/icat}/ingest.py (100%)
 rename {icat => src/icat}/listproxy.py (100%)
 rename {icat => src/icat}/query.py (100%)
 rename {icat => src/icat}/sslcontext.py (100%)
 rename icatdump.py => src/scripts/icatdump.py (100%)
 mode change 100755 => 100644
 rename icatingest.py => src/scripts/icatingest.py (100%)
 mode change 100755 => 100644
 rename wipeicat.py => src/scripts/wipeicat.py (100%)
 mode change 100755 => 100644

diff --git a/.gitignore b/.gitignore
index d9103d85..ecd6841e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,6 @@ __pycache__/
 /_meta.py
 /build/
 /dist/
-/icat/__init__.py
 /tests/data/example_data.yaml
 /tests/data/icat.cfg
 /tests/data/icatdata-*.xsd
diff --git a/Makefile b/Makefile
index c087a875..bdce49b9 100644
--- a/Makefile
+++ b/Makefile
@@ -10,15 +10,15 @@ test:
 sdist: doc-man
 	$(PYTHON) setup.py sdist
 
-doc-html: meta
-	$(MAKE) -C doc html PYTHONPATH=$(CURDIR)
+doc-html: build
+	$(MAKE) -C doc html
 
-doc-man: meta
-	$(MAKE) -C doc man PYTHONPATH=$(CURDIR)
+doc-man: build
+	$(MAKE) -C doc man
 
 clean:
 	rm -rf build
-	rm -rf __pycache__ icat/__pycache__
+	rm -rf __pycache__
 	rm -rf tests/data/example_data.yaml
 	rm -rf tests/data/icatdata-*.xsd
 	rm -rf tests/data/icatdump-* tests/data/ingest-*.xml
@@ -29,7 +29,6 @@ clean:
 
 distclean: clean
 	rm -f MANIFEST _meta.py
-	rm -f icat/__init__.py
 	rm -rf dist
 	rm -rf tests/.pytest_cache
 	$(MAKE) -C doc distclean
diff --git a/doc/src/conf.py b/doc/src/conf.py
index a7603db4..902c1fe2 100644
--- a/doc/src/conf.py
+++ b/doc/src/conf.py
@@ -10,9 +10,10 @@
 import sys
 
 maindir = Path(__file__).resolve().parent.parent.parent
-sys.path[0] = str(maindir)
+buildlib = maindir / "build" / "lib"
+sys.path[0] = str(buildlib)
 
-import _meta
+import icat._meta
 
 
 # -- Project information -----------------------------------------------------
@@ -23,7 +24,7 @@
 author = 'Rolf Krahl'
 
 # The full version, including alpha/beta/rc tags
-release = _meta.version
+release = icat._meta.version
 # The short X.Y version
 version = ".".join(release.split(".")[0:2])
 
diff --git a/setup.py b/setup.py
index 3158cab7..a2007af4 100755
--- a/setup.py
+++ b/setup.py
@@ -49,30 +49,16 @@ class meta(setuptools.Command):
 
     description = "generate meta files"
     user_options = []
-    init_template = '''"""%(doc)s"""
-
-__version__ = "%(version)s"
-
-#
-# Default import
-#
-
-from icat.client import *
-from icat.exception import *
-'''
     meta_template = '''
 release = "%(release)s"
 version = "%(version)s"
 '''
 
     def initialize_options(self):
-        self.package_dir = None
+        pass
 
     def finalize_options(self):
-        self.package_dir = {}
-        if self.distribution.package_dir:
-            for name, path in self.distribution.package_dir.items():
-                self.package_dir[name] = convert_path(path)
+        pass
 
     def run(self):
         version = self.distribution.get_version()
@@ -80,18 +66,7 @@ def run(self):
         values = {
             'release': release,
             'version': version,
-            'doc': docstring,
         }
-        try:
-            pkgname = self.distribution.packages[0]
-        except IndexError:
-            log.warn("warning: no package defined")
-        else:
-            pkgdir = Path(self.package_dir.get(pkgname, pkgname))
-            if not pkgdir.is_dir():
-                pkgdir.mkdir()
-            with (pkgdir / "__init__.py").open("wt") as f:
-                print(self.init_template % values, file=f)
         with Path("_meta.py").open("wt") as f:
             print(self.meta_template % values, file=f)
 
@@ -164,6 +139,9 @@ class build_py(setuptools.command.build_py.build_py):
     def run(self):
         self.run_command('meta')
         super().run()
+        package = self.distribution.packages[0].split('.')
+        outfile = self.get_module_outfile(self.build_lib, package, "_meta")
+        self.copy_file("_meta.py", outfile, preserve_mode=0)
 
 
 # There are several forks of the original suds package around, most of
@@ -218,9 +196,14 @@ def run(self):
                  % release),
     ),
     packages = ["icat"],
+    package_dir = {"": "src"},
     python_requires = ">=3.4",
     install_requires = requires,
-    scripts = ["icatdump.py", "icatingest.py", "wipeicat.py"],
+    scripts = [
+        "src/scripts/icatdump.py",
+        "src/scripts/icatingest.py",
+        "src/scripts/wipeicat.py"
+    ],
     cmdclass = dict(cmdclass,
                     meta=meta,
                     build_py=build_py,
diff --git a/src/icat/__init__.py b/src/icat/__init__.py
new file mode 100644
index 00000000..30475152
--- /dev/null
+++ b/src/icat/__init__.py
@@ -0,0 +1,13 @@
+"""Python interface to ICAT and IDS
+
+This package provides a collection of modules for writing Python
+programs that access an `ICAT`_ service using the SOAP interface.  It
+is based on Suds and extends it with ICAT specific features.
+
+.. _ICAT: https://icatproject.org/
+"""
+
+from ._meta import version as __version__
+from .client import *
+from .exception import *
+
diff --git a/icat/authinfo.py b/src/icat/authinfo.py
similarity index 100%
rename from icat/authinfo.py
rename to src/icat/authinfo.py
diff --git a/icat/chunkedhttp.py b/src/icat/chunkedhttp.py
similarity index 100%
rename from icat/chunkedhttp.py
rename to src/icat/chunkedhttp.py
diff --git a/icat/client.py b/src/icat/client.py
similarity index 100%
rename from icat/client.py
rename to src/icat/client.py
diff --git a/icat/config.py b/src/icat/config.py
similarity index 100%
rename from icat/config.py
rename to src/icat/config.py
diff --git a/icat/dump_queries.py b/src/icat/dump_queries.py
similarity index 100%
rename from icat/dump_queries.py
rename to src/icat/dump_queries.py
diff --git a/icat/dumpfile.py b/src/icat/dumpfile.py
similarity index 100%
rename from icat/dumpfile.py
rename to src/icat/dumpfile.py
diff --git a/icat/dumpfile_xml.py b/src/icat/dumpfile_xml.py
similarity index 100%
rename from icat/dumpfile_xml.py
rename to src/icat/dumpfile_xml.py
diff --git a/icat/dumpfile_yaml.py b/src/icat/dumpfile_yaml.py
similarity index 100%
rename from icat/dumpfile_yaml.py
rename to src/icat/dumpfile_yaml.py
diff --git a/icat/entities.py b/src/icat/entities.py
similarity index 100%
rename from icat/entities.py
rename to src/icat/entities.py
diff --git a/icat/entity.py b/src/icat/entity.py
similarity index 100%
rename from icat/entity.py
rename to src/icat/entity.py
diff --git a/icat/eval.py b/src/icat/eval.py
similarity index 100%
rename from icat/eval.py
rename to src/icat/eval.py
diff --git a/icat/exception.py b/src/icat/exception.py
similarity index 100%
rename from icat/exception.py
rename to src/icat/exception.py
diff --git a/icat/helper.py b/src/icat/helper.py
similarity index 100%
rename from icat/helper.py
rename to src/icat/helper.py
diff --git a/icat/ids.py b/src/icat/ids.py
similarity index 100%
rename from icat/ids.py
rename to src/icat/ids.py
diff --git a/icat/ingest.py b/src/icat/ingest.py
similarity index 100%
rename from icat/ingest.py
rename to src/icat/ingest.py
diff --git a/icat/listproxy.py b/src/icat/listproxy.py
similarity index 100%
rename from icat/listproxy.py
rename to src/icat/listproxy.py
diff --git a/icat/query.py b/src/icat/query.py
similarity index 100%
rename from icat/query.py
rename to src/icat/query.py
diff --git a/icat/sslcontext.py b/src/icat/sslcontext.py
similarity index 100%
rename from icat/sslcontext.py
rename to src/icat/sslcontext.py
diff --git a/icatdump.py b/src/scripts/icatdump.py
old mode 100755
new mode 100644
similarity index 100%
rename from icatdump.py
rename to src/scripts/icatdump.py
diff --git a/icatingest.py b/src/scripts/icatingest.py
old mode 100755
new mode 100644
similarity index 100%
rename from icatingest.py
rename to src/scripts/icatingest.py
diff --git a/wipeicat.py b/src/scripts/wipeicat.py
old mode 100755
new mode 100644
similarity index 100%
rename from wipeicat.py
rename to src/scripts/wipeicat.py

From 079e9b983dc85c2107ddcb6bd4d65ad8361f830d Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 2 Jan 2024 13:18:07 +0100
Subject: [PATCH 026/102] Remove exec permission bit from example scripts

---
 doc/examples/add-investigation-data.py | 0
 doc/examples/add-job.py                | 0
 doc/examples/addfile.py                | 0
 doc/examples/create-datafile.py        | 0
 doc/examples/create-investigation.py   | 0
 doc/examples/create-parametertypes.py  | 0
 doc/examples/create-sampletype.py      | 0
 doc/examples/downloaddata.py           | 0
 doc/examples/dumpinvestigation.py      | 0
 doc/examples/dumprules.py              | 0
 doc/examples/getversion.py             | 0
 doc/examples/icatexport.py             | 0
 doc/examples/icatimport.py             | 0
 doc/examples/icatsummary.py            | 0
 doc/examples/ingest.py                 | 0
 doc/examples/init-icat.py              | 0
 doc/examples/login.py                  | 0
 17 files changed, 0 insertions(+), 0 deletions(-)
 mode change 100755 => 100644 doc/examples/add-investigation-data.py
 mode change 100755 => 100644 doc/examples/add-job.py
 mode change 100755 => 100644 doc/examples/addfile.py
 mode change 100755 => 100644 doc/examples/create-datafile.py
 mode change 100755 => 100644 doc/examples/create-investigation.py
 mode change 100755 => 100644 doc/examples/create-parametertypes.py
 mode change 100755 => 100644 doc/examples/create-sampletype.py
 mode change 100755 => 100644 doc/examples/downloaddata.py
 mode change 100755 => 100644 doc/examples/dumpinvestigation.py
 mode change 100755 => 100644 doc/examples/dumprules.py
 mode change 100755 => 100644 doc/examples/getversion.py
 mode change 100755 => 100644 doc/examples/icatexport.py
 mode change 100755 => 100644 doc/examples/icatimport.py
 mode change 100755 => 100644 doc/examples/icatsummary.py
 mode change 100755 => 100644 doc/examples/ingest.py
 mode change 100755 => 100644 doc/examples/init-icat.py
 mode change 100755 => 100644 doc/examples/login.py

diff --git a/doc/examples/add-investigation-data.py b/doc/examples/add-investigation-data.py
old mode 100755
new mode 100644
diff --git a/doc/examples/add-job.py b/doc/examples/add-job.py
old mode 100755
new mode 100644
diff --git a/doc/examples/addfile.py b/doc/examples/addfile.py
old mode 100755
new mode 100644
diff --git a/doc/examples/create-datafile.py b/doc/examples/create-datafile.py
old mode 100755
new mode 100644
diff --git a/doc/examples/create-investigation.py b/doc/examples/create-investigation.py
old mode 100755
new mode 100644
diff --git a/doc/examples/create-parametertypes.py b/doc/examples/create-parametertypes.py
old mode 100755
new mode 100644
diff --git a/doc/examples/create-sampletype.py b/doc/examples/create-sampletype.py
old mode 100755
new mode 100644
diff --git a/doc/examples/downloaddata.py b/doc/examples/downloaddata.py
old mode 100755
new mode 100644
diff --git a/doc/examples/dumpinvestigation.py b/doc/examples/dumpinvestigation.py
old mode 100755
new mode 100644
diff --git a/doc/examples/dumprules.py b/doc/examples/dumprules.py
old mode 100755
new mode 100644
diff --git a/doc/examples/getversion.py b/doc/examples/getversion.py
old mode 100755
new mode 100644
diff --git a/doc/examples/icatexport.py b/doc/examples/icatexport.py
old mode 100755
new mode 100644
diff --git a/doc/examples/icatimport.py b/doc/examples/icatimport.py
old mode 100755
new mode 100644
diff --git a/doc/examples/icatsummary.py b/doc/examples/icatsummary.py
old mode 100755
new mode 100644
diff --git a/doc/examples/ingest.py b/doc/examples/ingest.py
old mode 100755
new mode 100644
diff --git a/doc/examples/init-icat.py b/doc/examples/init-icat.py
old mode 100755
new mode 100644
diff --git a/doc/examples/login.py b/doc/examples/login.py
old mode 100755
new mode 100644

From 1eda1b5fbbc21b112c0abaaa07a5d90235d84ed9 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 2 Jan 2024 13:54:25 +0100
Subject: [PATCH 027/102] Review setup script

---
 setup.py | 58 +++++++++++++++++++++++---------------------------------
 1 file changed, 24 insertions(+), 34 deletions(-)

diff --git a/setup.py b/setup.py
index a2007af4..9c3b1e17 100755
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,3 @@
-#! /usr/bin/python
 """Python interface to ICAT and IDS
 
 This package provides a collection of modules for writing Python
@@ -13,12 +12,8 @@
 import setuptools.command.build_py
 import distutils.command.sdist
 from distutils import log
-from glob import glob
-import os
-import os.path
 from pathlib import Path
 import string
-import sys
 try:
     import distutils_pytest
     cmdclass = distutils_pytest.cmdclass
@@ -35,13 +30,6 @@
         log.warn("warning: cannot determine version number")
         release = version = "UNKNOWN"
 
-
-if sys.version_info < (3, 4):
-    log.warn("warning: Python %d.%d is not supported! "
-             "This package requires Python 3.4 or newer."
-             % sys.version_info[:2])
-
-
 docstring = __doc__
 
 
@@ -86,34 +74,36 @@ def run(self):
         self.copy_test_data()
 
     def copy_test_scripts(self):
-        destdir = os.path.join("tests", "scripts")
-        self.mkpath(destdir)
+        destdir = Path("tests", "scripts")
+        self.mkpath(str(destdir))
         scripts = []
-        scripts += glob(os.path.join("doc", "examples", "*.py"))
-        scripts += self.distribution.scripts
+        scripts += Path("doc", "examples").glob("*.py")
+        scripts += (Path(s) for s in self.distribution.scripts)
         for script in scripts:
-            dest = os.path.join(destdir, os.path.basename(script))
-            self.copy_file(script, dest, preserve_mode=False)
+            dest = destdir / script.name
+            self.copy_file(str(script), str(dest), preserve_mode=False)
 
     def copy_test_data(self):
-        destdir = os.path.join("tests", "data")
-        self.mkpath(destdir)
+        destdir = Path("tests", "data")
+        self.mkpath(str(destdir))
+        etc = Path("etc")
+        doc = Path("doc")
+        examples = doc / "examples"
         files = []
-        files += [ os.path.join("doc", "examples", f)
-                   for f in ["example_data.yaml",
+        files += ( examples / f
+                   for f in ("example_data.yaml",
                              "ingest-datafiles.xml", "ingest-ds-params.xml",
-                             "ingest-sample-ds.xml"] ]
-        files += [ os.path.join("doc", "examples",
-                                "icatdump-%s.%s" % (ver, ext))
+                             "ingest-sample-ds.xml") )
+        files += ( examples / ("icatdump-%s.%s" % (ver, ext))
                    for ver in ("4.4", "4.7", "4.10", "5.0")
-                   for ext in ("xml", "yaml") ]
-        files += glob(os.path.join("doc", "icatdata-*.xsd"))
-        files += glob(os.path.join("doc", "examples", "metadata-*.xml"))
-        files += [ os.path.join("etc", f)
-                   for f in ["ingest-10.xsd", "ingest-11.xsd", "ingest.xslt"] ]
+                   for ext in ("xml", "yaml") )
+        files += doc.glob("icatdata-*.xsd")
+        files += examples.glob("metadata-*.xml")
+        files += ( etc / f
+                   for f in ("ingest-10.xsd", "ingest-11.xsd", "ingest.xslt") )
         for f in files:
-            dest = os.path.join(destdir, os.path.basename(f))
-            self.copy_file(f, dest, preserve_mode=False)
+            dest = destdir / f.name
+            self.copy_file(str(f), str(dest), preserve_mode=False)
 
 
 # Note: Do not use setuptools for making the source distribution,
@@ -129,8 +119,8 @@ def run(self):
             "description": docstring.split("\n")[0],
             "long_description": docstring.split("\n", maxsplit=2)[2].strip(),
         }
-        for spec in glob("*.spec"):
-            with Path(spec).open('rt') as inf:
+        for spec in Path().glob("*.spec"):
+            with spec.open('rt') as inf:
                 with Path(self.dist_dir, spec).open('wt') as outf:
                     outf.write(string.Template(inf.read()).substitute(subst))
 

From 7f6227e6f7b15a016c9e99e7242f11f5b678590b Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 2 Jan 2024 14:07:44 +0100
Subject: [PATCH 028/102] Add GitHub action to publish releases to PyPI

---
 .github/workflows/publish-to-pypi.yaml | 29 ++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 .github/workflows/publish-to-pypi.yaml

diff --git a/.github/workflows/publish-to-pypi.yaml b/.github/workflows/publish-to-pypi.yaml
new file mode 100644
index 00000000..6c579ad0
--- /dev/null
+++ b/.github/workflows/publish-to-pypi.yaml
@@ -0,0 +1,29 @@
+name: Publish
+on:
+  release:
+    types:
+       - published
+jobs:
+  PyPI:
+    name: publish release to PyPI
+    runs-on: ubuntu-latest
+    environment: release
+    permissions:
+      id-token: write
+    env:
+      SDIST: python-icat-${{ github.event.release.tag_name }}.tar.gz
+    steps:
+      - name: Fetch assets
+        uses: cb80/dlassets@latest
+        with:
+          tag: ${{ github.event.release.tag_name }}
+          to: assets
+      - name: Check assets
+        run: |
+          ls -la assets
+      - name: Copy distfile to dist directory
+        run: |
+          mkdir -p dist
+          cp -p assets/$SDIST dist
+      - name: Upload distfile to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1

From 53d96644f7fa9f0b3a3ce91c7184db1c5fd7c10c Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 2 Jan 2024 15:29:32 +0100
Subject: [PATCH 029/102] Update changelog

---
 CHANGES.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CHANGES.rst b/CHANGES.rst
index c735619f..3e58fe74 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -21,8 +21,14 @@ Incompatible changes
 
   Note that :mod:`icat.ingest` has been declared experimental for now.
 
+Bug fixes and minor changes
+---------------------------
+
++ `#145`_: Review build tool chain
+
 .. _#143: https://github.com/icatproject/python-icat/issues/143
 .. _#144: https://github.com/icatproject/python-icat/pull/144
+.. _#145: https://github.com/icatproject/python-icat/pull/145
 
 
 1.2.0 (2023-10-31)

From c941a9d89ede2f115af91fd99c646e4ac508d066 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 2 Jan 2024 15:38:10 +0100
Subject: [PATCH 030/102] Restrict running ReST lint on push to branches
 develop and master

---
 .github/workflows/rst-lint.yaml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/rst-lint.yaml b/.github/workflows/rst-lint.yaml
index b5e7c2fe..b9b239f7 100644
--- a/.github/workflows/rst-lint.yaml
+++ b/.github/workflows/rst-lint.yaml
@@ -1,5 +1,10 @@
 name: Check ReST input files
-on: [push, pull_request]
+on:
+  push:
+    branches:
+      - develop
+      - master
+  pull_request:
 jobs:
   doc8:
     runs-on: ubuntu-latest

From fe03983313307e1d89c3cfdc2f1763b5b996b7db Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 2 Jan 2024 15:45:33 +0100
Subject: [PATCH 031/102] Fixup cf53813: forgot to update .rtd-require

---
 .rtd-require | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.rtd-require b/.rtd-require
index 2de815cd..f86f61b9 100644
--- a/.rtd-require
+++ b/.rtd-require
@@ -1,8 +1,8 @@
 PyYAML
+git-props
 lxml
 packaging
 setuptools
-setuptools_scm
 suds
 jinja2<3.1
 sphinx>=2,<3

From f12a18362969f32b715ccde9652eaf8de945c96f Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 2 Jan 2024 15:49:35 +0100
Subject: [PATCH 032/102] Fix Read the Docs build

---
 .readthedocs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 8a892916..409b807b 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -11,7 +11,7 @@ build:
     post_checkout:
       - git fetch --unshallow
     post_install:
-      - python setup.py meta
+      - python setup.py build
 
 sphinx:
   configuration: doc/src/conf.py

From 16443e88aa6c3e9b4319c5f67243ad31d42577eb Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 2 Jan 2024 15:59:14 +0100
Subject: [PATCH 033/102] Documentation fix: move a version changed note from
 module icat.ingest to class icat.ingest.IngestReader

---
 doc/src/ingest.rst | 4 ----
 src/icat/ingest.py | 4 ++++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/src/ingest.rst b/doc/src/ingest.rst
index e9abda8e..72eeb07a 100644
--- a/doc/src/ingest.rst
+++ b/doc/src/ingest.rst
@@ -55,10 +55,6 @@ the ``Dataset``.
 .. versionchanged:: 1.2.0
    add version 1.1 of the ingest file format, including references to samples
 
-.. versionchanged:: 1.3.0
-   drop class attribute :attr:`~icat.ingest.IngestReader.XSLT_name` in
-   favour of :attr:`~icat.ingest.IngestReader.XSLT_Map`.
-
 .. autoclass:: icat.ingest.IngestReader
     :members:
     :show-inheritance:
diff --git a/src/icat/ingest.py b/src/icat/ingest.py
index 57f15648..6c725a0f 100644
--- a/src/icat/ingest.py
+++ b/src/icat/ingest.py
@@ -37,6 +37,10 @@ class IngestReader(XMLDumpFileReader):
     :type investigation: :class:`icat.entity.Entity`
     :raise icat.exception.InvalidIngestFileError: if the input in
         metadata is not valid.
+
+    .. versionchanged:: 1.3.0
+       drop class attribute :attr:`~icat.ingest.IngestReader.XSLT_name`
+       in favour of :attr:`~icat.ingest.IngestReader.XSLT_Map`.
     """
 
     SchemaDir = Path("/usr/share/icat")

From 8446200535f437327cfea057c01e7cc1d302c326 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 2 Jan 2024 16:14:12 +0100
Subject: [PATCH 034/102] Minor doc config fixes

---
 doc/src/conf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/src/conf.py b/doc/src/conf.py
index 38c5c319..2f880389 100644
--- a/doc/src/conf.py
+++ b/doc/src/conf.py
@@ -12,10 +12,10 @@
 maindir = Path(__file__).resolve().parent.parent.parent
 buildlib = maindir / "build" / "lib"
 sys.path[0] = str(buildlib)
+sys.dont_write_bytecode = True
 
 import icat._meta
 
-
 # -- Project information -----------------------------------------------------
 
 project = 'python-icat'
@@ -58,7 +58,7 @@
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
-language = None
+language = 'en'
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.

From af28f5de6ade53d8252c1c9021f4ded4ef56ee6f Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 3 Jan 2024 11:28:37 +0100
Subject: [PATCH 035/102] - Add a new section on file formats to the
 documentation - Move the subsection on ICAT data files from the icat.dumpfile
 module   reference into the new file formats section - Add a subsection on
 Metadata ingest files (only a section heading by   now)

---
 doc/src/dumpfile.rst        | 64 ++-----------------------------------
 doc/src/file-icatdata.rst   | 58 +++++++++++++++++++++++++++++++++
 doc/src/file-icatingest.rst |  6 ++++
 doc/src/fileformats.rst     | 11 +++++++
 doc/src/index.rst           |  1 +
 5 files changed, 78 insertions(+), 62 deletions(-)
 create mode 100644 doc/src/file-icatdata.rst
 create mode 100644 doc/src/file-icatingest.rst
 create mode 100644 doc/src/fileformats.rst

diff --git a/doc/src/dumpfile.rst b/doc/src/dumpfile.rst
index 1fc44d6e..d87e8c9f 100644
--- a/doc/src/dumpfile.rst
+++ b/doc/src/dumpfile.rst
@@ -6,8 +6,8 @@
 This module provides the base classes
 :class:`icat.dumpfile.DumpFileReader` and
 :class:`icat.dumpfile.DumpFileWriter` that define the API and the
-logic for reading and writing ICAT data files.  The actual work is
-done in file format specific backend modules that should provide
+logic for reading and writing :ref:`ICAT-data-files`.  The actual work
+is done in file format specific backend modules that should provide
 subclasses that must implement the abstract methods.
 
 .. autoclass:: icat.dumpfile.DumpFileReader
@@ -23,63 +23,3 @@ subclasses that must implement the abstract methods.
 .. autofunction:: icat.dumpfile.register_backend
 
 .. autofunction:: icat.dumpfile.open_dumpfile
-
-
-.. _ICAT-data-files:
-
-ICAT data files
----------------
-
-ICAT data files provide a way to serialize ICAT content to a flat
-file.  This section describes the logical structure of ICAT data
-files.  The actual file format depends on the backend, python-icat
-provides backends using XML and YAML.
-
-There is a one-to-one correspondence of the objects in the data
-file and the corresponding object in ICAT according to the ICAT
-schema, including all attributes and relations to other objects.
-Special unique keys are used to encode the relations.
-:meth:`icat.entity.Entity.getUniqueKey` may be used to get such a
-unique key for an entity object and
-:meth:`icat.client.Client.searchUniqueKey` may be used to search an
-object by its key.  Otherwise these keys should be considered as
-opaque ids.
-
-Data files are partitioned in chunks.  This is done to avoid having
-the whole file, e.g. the complete inventory of the ICAT, at once in
-memory.  The problem is that objects contain references to other
-objects (e.g. Datafiles refer to Datasets, the latter refer to
-Investigations, and so forth).  We keep an index of the objects in
-order to resolve these references.  But there is a memory versus time
-tradeoff: we cannot keep all the objects in the index, that would
-again mean the complete inventory of the ICAT.  And we can't know
-beforehand which object is going to be referenced later on, so we
-don't know which one to keep and which one to discard from the index.
-Fortunately we can query objects we discarded once back from the ICAT
-server.  But this is expensive.  So the strategy is as follows: keep
-all objects from the current chunk in the index and discard the
-complete index each time a chunk has been processed.  This will work
-fine if objects are mostly referencing other objects from the same
-chunk and only a few references go across chunk boundaries.
-
-Therefore, we want these chunks to be small enough to fit into memory,
-but at the same time large enough to keep as many relations between
-objects as possible local in a chunk.  It is in the responsibility of
-the writer of the data file to create the chunks in this manner.
-
-The objects that get written to the data file and how this file is
-organized is controlled by lists of ICAT search expressions, see
-:meth:`icat.dumpfile.DumpFileWriter.writeobjs`.  There is some degree
-of flexibility: an object may include related objects in an
-one-to-many relation, just by including them in the search expression.
-In this case, these related objects should not have a search
-expression on their own again.  For instance, the search expression
-for Grouping may include UserGroup.  The UserGroups will then be
-embedded in their respective grouping in the data file.  There should
-not be a search expression for UserGroup then.
-
-Objects related in a many-to-one relation must always be included in
-the search expression.  This is also true if the object is
-indirectly related to one of the included objects.  In this case,
-only a reference to the related object will be included in the data
-file.  The related object must have its own list entry.
diff --git a/doc/src/file-icatdata.rst b/doc/src/file-icatdata.rst
new file mode 100644
index 00000000..b8d93ed1
--- /dev/null
+++ b/doc/src/file-icatdata.rst
@@ -0,0 +1,58 @@
+.. _ICAT-data-files:
+
+ICAT data files
+===============
+
+ICAT data files provide a way to serialize ICAT content to a flat
+file.  This section describes the logical structure of ICAT data
+files.  The actual file format depends on the backend, python-icat
+provides backends using XML and YAML.
+
+There is a one-to-one correspondence of the objects in the data
+file and the corresponding object in ICAT according to the ICAT
+schema, including all attributes and relations to other objects.
+Special unique keys are used to encode the relations.
+:meth:`icat.entity.Entity.getUniqueKey` may be used to get such a
+unique key for an entity object and
+:meth:`icat.client.Client.searchUniqueKey` may be used to search an
+object by its key.  Otherwise these keys should be considered as
+opaque ids.
+
+Data files are partitioned in chunks.  This is done to avoid having
+the whole file, e.g. the complete inventory of the ICAT, at once in
+memory.  The problem is that objects contain references to other
+objects (e.g. Datafiles refer to Datasets, the latter refer to
+Investigations, and so forth).  We keep an index of the objects in
+order to resolve these references.  But there is a memory versus time
+tradeoff: we cannot keep all the objects in the index, that would
+again mean the complete inventory of the ICAT.  And we can't know
+beforehand which object is going to be referenced later on, so we
+don't know which one to keep and which one to discard from the index.
+Fortunately we can query objects we discarded once back from the ICAT
+server.  But this is expensive.  So the strategy is as follows: keep
+all objects from the current chunk in the index and discard the
+complete index each time a chunk has been processed.  This will work
+fine if objects are mostly referencing other objects from the same
+chunk and only a few references go across chunk boundaries.
+
+Therefore, we want these chunks to be small enough to fit into memory,
+but at the same time large enough to keep as many relations between
+objects as possible local in a chunk.  It is in the responsibility of
+the writer of the data file to create the chunks in this manner.
+
+The objects that get written to the data file and how this file is
+organized is controlled by lists of ICAT search expressions, see
+:meth:`icat.dumpfile.DumpFileWriter.writeobjs`.  There is some degree
+of flexibility: an object may include related objects in an
+one-to-many relation, just by including them in the search expression.
+In this case, these related objects should not have a search
+expression on their own again.  For instance, the search expression
+for Grouping may include UserGroup.  The UserGroups will then be
+embedded in their respective grouping in the data file.  There should
+not be a search expression for UserGroup then.
+
+Objects related in a many-to-one relation must always be included in
+the search expression.  This is also true if the object is
+indirectly related to one of the included objects.  In this case,
+only a reference to the related object will be included in the data
+file.  The related object must have its own list entry.
diff --git a/doc/src/file-icatingest.rst b/doc/src/file-icatingest.rst
new file mode 100644
index 00000000..04954679
--- /dev/null
+++ b/doc/src/file-icatingest.rst
@@ -0,0 +1,6 @@
+.. _ICAT-ingest-files:
+
+Metadata ingest files
+=====================
+
+
diff --git a/doc/src/fileformats.rst b/doc/src/fileformats.rst
new file mode 100644
index 00000000..c90eaec1
--- /dev/null
+++ b/doc/src/fileformats.rst
@@ -0,0 +1,11 @@
+File formats
+============
+
+Some components of python-icat read input files or write output files.
+This section describes the file formats being used.
+
+.. toctree::
+   :maxdepth: 1
+
+   file-icatdata
+   file-icatingest
diff --git a/doc/src/index.rst b/doc/src/index.rst
index 1fdc3c09..a3d947c0 100644
--- a/doc/src/index.rst
+++ b/doc/src/index.rst
@@ -38,6 +38,7 @@ Parts of the documentation
    tutorial
    moduleref
    scripts
+   fileformats
    known-issues
    changelog
 

From 3b9367ece06dbe4c540f7d6ba5e6abe5b07966ed Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 3 Jan 2024 12:24:06 +0100
Subject: [PATCH 036/102] Review introduction of ICAT data files section

---
 doc/src/file-icatdata.rst | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/doc/src/file-icatdata.rst b/doc/src/file-icatdata.rst
index b8d93ed1..2dbb00c1 100644
--- a/doc/src/file-icatdata.rst
+++ b/doc/src/file-icatdata.rst
@@ -4,9 +4,16 @@ ICAT data files
 ===============
 
 ICAT data files provide a way to serialize ICAT content to a flat
-file.  This section describes the logical structure of ICAT data
-files.  The actual file format depends on the backend, python-icat
-provides backends using XML and YAML.
+file.  These files are read by the :ref:`icatingest` and written by
+the :ref:`icatdump` command line scripts respectively.  The program
+logic for reading and writing the files is provided by the
+:mod:`icat.dumpfile` module.
+
+The actual file format depends on the version of the ICAT schema and
+on the backend: python-icat provides backends using XML and YAML.
+
+Logical structure of ICAT data files
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 There is a one-to-one correspondence of the objects in the data
 file and the corresponding object in ICAT according to the ICAT

From 33f8650e84c06ee8bab79a7e80317d7584b5d8e8 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 3 Jan 2024 13:50:40 +0100
Subject: [PATCH 037/102] Some formulation review to the subsection on the
 structure of ICAT data files

---
 doc/src/file-icatdata.rst | 51 ++++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/doc/src/file-icatdata.rst b/doc/src/file-icatdata.rst
index 2dbb00c1..06d8f70c 100644
--- a/doc/src/file-icatdata.rst
+++ b/doc/src/file-icatdata.rst
@@ -29,18 +29,19 @@ Data files are partitioned in chunks.  This is done to avoid having
 the whole file, e.g. the complete inventory of the ICAT, at once in
 memory.  The problem is that objects contain references to other
 objects (e.g. Datafiles refer to Datasets, the latter refer to
-Investigations, and so forth).  We keep an index of the objects in
-order to resolve these references.  But there is a memory versus time
-tradeoff: we cannot keep all the objects in the index, that would
-again mean the complete inventory of the ICAT.  And we can't know
-beforehand which object is going to be referenced later on, so we
-don't know which one to keep and which one to discard from the index.
-Fortunately we can query objects we discarded once back from the ICAT
-server.  But this is expensive.  So the strategy is as follows: keep
-all objects from the current chunk in the index and discard the
-complete index each time a chunk has been processed.  This will work
-fine if objects are mostly referencing other objects from the same
-chunk and only a few references go across chunk boundaries.
+Investigations, and so forth).  We keep an index of the objects as
+cache in order to resolve these references.  But there is a memory
+versus time tradeoff: we cannot keep all the objects in the index,
+that would again mean the complete inventory of the ICAT.  And we
+can't know beforehand which object is going to be referenced later on,
+so we don't know which one to keep and which one to discard from the
+index.  Fortunately we can query objects that we discarded once back
+from the ICAT server.  But this is expensive.  So the strategy is as
+follows: keep all objects from the current chunk in the index and
+discard the complete index each time a chunk has been
+processed. [#dc]_ This will work fine if objects are mostly
+referencing other objects from the same chunk and only a few
+references go across chunk boundaries.
 
 Therefore, we want these chunks to be small enough to fit into memory,
 but at the same time large enough to keep as many relations between
@@ -48,18 +49,24 @@ objects as possible local in a chunk.  It is in the responsibility of
 the writer of the data file to create the chunks in this manner.
 
 The objects that get written to the data file and how this file is
-organized is controlled by lists of ICAT search expressions, see
-:meth:`icat.dumpfile.DumpFileWriter.writeobjs`.  There is some degree
-of flexibility: an object may include related objects in an
-one-to-many relation, just by including them in the search expression.
-In this case, these related objects should not have a search
-expression on their own again.  For instance, the search expression
-for Grouping may include UserGroup.  The UserGroups will then be
-embedded in their respective grouping in the data file.  There should
-not be a search expression for UserGroup then.
+organized is controlled by lists of ICAT search expressions or entity
+objects, see :meth:`icat.dumpfile.DumpFileWriter.writeobjs`.  There is
+some degree of flexibility: an object may include related objects in
+an one-to-many relation.  In this case, these related objects should
+not be added on their own again.  For instance, you may write User,
+Grouping, and UserGroup as separate objects into the file.  In this
+case, the UserGroup entries must properly reference related User and
+Grouping.  Alternatively you may include the UserGroups in the
+corresponding Grouping objects.  In this case, you must not add the
+UserGroups again on their own.
 
 Objects related in a many-to-one relation must always be included in
 the search expression.  This is also true if the object is
 indirectly related to one of the included objects.  In this case,
 only a reference to the related object will be included in the data
-file.  The related object must have its own list entry.
+file.  The related object must have its own entry.
+
+
+.. [#dc] There is one exception: DataCollections don't have a
+         uniqueness constraint and can't reliably be searched by
+         attributes.  They are always kept in the index.

From da2415f3097965660069cf55b3558750b595c515 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 3 Jan 2024 15:10:23 +0100
Subject: [PATCH 038/102] Add simple ICAT data file examples

---
 doc/examples/icatdump-simple-1.xml  | 103 ++++++++++++++++++++++++++
 doc/examples/icatdump-simple-1.yaml |  71 ++++++++++++++++++
 doc/examples/icatdump-simple-2.xml  | 108 ++++++++++++++++++++++++++++
 doc/examples/icatdump-simple-2.yaml |  79 ++++++++++++++++++++
 4 files changed, 361 insertions(+)
 create mode 100644 doc/examples/icatdump-simple-1.xml
 create mode 100644 doc/examples/icatdump-simple-1.yaml
 create mode 100644 doc/examples/icatdump-simple-2.xml
 create mode 100644 doc/examples/icatdump-simple-2.yaml

diff --git a/doc/examples/icatdump-simple-1.xml b/doc/examples/icatdump-simple-1.xml
new file mode 100644
index 00000000..b2c23038
--- /dev/null
+++ b/doc/examples/icatdump-simple-1.xml
@@ -0,0 +1,103 @@
+<?xml version="1.0" encoding="utf-8"?>
+<icatdata>
+<head>
+  <date>2024-01-03T13:21:15+00:00</date>
+  <service>https://icat.example.com:8181/ICATService/ICAT?wsdl</service>
+  <apiversion>6.0.0</apiversion>
+  <generator>icatdump (python-icat 1.2.0)</generator>
+</head>
+<data>
+  <user id="User_name-db=2Fahau">
+    <affiliation>Goethe University Frankfurt, Faculty of Philosophy and History</affiliation>
+    <email>ahau@example.org</email>
+    <familyName>Hau</familyName>
+    <fullName>Arnold Hau</fullName>
+    <givenName>Arnold</givenName>
+    <name>db/ahau</name>
+    <orcidId>0000-0002-3263</orcidId>
+  </user>
+  <user id="User_name-db=2Fahau">
+    <affiliation>Goethe University Frankfurt, Faculty of Philosophy and History</affiliation>
+    <email>ahau@example.org</email>
+    <familyName>Hau</familyName>
+    <fullName>Arnold Hau</fullName>
+    <givenName>Arnold</givenName>
+    <name>db/ahau</name>
+    <orcidId>0000-0002-3263</orcidId>
+  </user>
+  <user id="User_name-db=2Fjbotu">
+    <affiliation>Universit&#233; Paul-Val&#233;ry Montpellier 3</affiliation>
+    <email>jbotu@example.org</email>
+    <familyName>Botul</familyName>
+    <fullName>Jean-Baptiste Botul</fullName>
+    <givenName>Jean-Baptiste</givenName>
+    <name>db/jbotu</name>
+    <orcidId>0000-0002-3264</orcidId>
+  </user>
+  <user id="User_name-db=2Fjdoe">
+    <email>jdoe@example.org</email>
+    <familyName>Doe</familyName>
+    <fullName>John Doe</fullName>
+    <givenName>John</givenName>
+    <name>db/jdoe</name>
+  </user>
+  <user id="User_name-db=2Fnbour">
+    <affiliation>University of Nancago</affiliation>
+    <email>nbour@example.org</email>
+    <familyName>Bourbaki</familyName>
+    <fullName>Nicolas Bourbaki</fullName>
+    <givenName>Nicolas</givenName>
+    <name>db/nbour</name>
+    <orcidId>0000-0002-3266</orcidId>
+  </user>
+  <grouping id="Grouping_name-investigation=5F10100601=2DST=5Fowner">
+    <name>investigation_10100601-ST_owner</name>
+    <userGroups>
+      <user ref="User_name-db=2Fahau"/>
+    </userGroups>
+  </grouping>
+  <grouping id="Grouping_name-investigation=5F10100601=2DST=5Freader">
+    <name>investigation_10100601-ST_reader</name>
+    <userGroups>
+      <user ref="User_name-db=2Fjbotu"/>
+    </userGroups>
+    <userGroups>
+      <user ref="User_name-db=2Fjdoe"/>
+    </userGroups>
+    <userGroups>
+      <user ref="User_name-db=2Fnbour"/>
+    </userGroups>
+  </grouping>
+  <grouping id="Grouping_name-investigation=5F10100601=2DST=5Fwriter">
+    <name>investigation_10100601-ST_writer</name>
+    <userGroups>
+      <user ref="User_name-db=2Fahau"/>
+    </userGroups>
+  </grouping>
+</data>
+<data>
+  <investigation id="Investigation_facility-(name-ESNF)_name-10100601=2DST_visitId-1=2E1=2DN">
+    <doi>DOI:00.0815/inv-00601</doi>
+    <endDate>2010-10-12T15:00:00+00:00</endDate>
+    <fileCount>4</fileCount>
+    <fileSize>127125</fileSize>
+    <name>10100601-ST</name>
+    <startDate>2010-09-30T10:27:24+00:00</startDate>
+    <title>Ni-Mn-Ga flat cone</title>
+    <visitId>1.1-N</visitId>
+    <facility ref="Facility_name-ESNF"/>
+    <investigationGroups>
+      <role>owner</role>
+      <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Fowner"/>
+    </investigationGroups>
+    <investigationGroups>
+      <role>reader</role>
+      <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Freader"/>
+    </investigationGroups>
+    <investigationGroups>
+      <role>writer</role>
+      <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Fwriter"/>
+    </investigationGroups>
+  </investigation>
+</data>
+</icatdata>
diff --git a/doc/examples/icatdump-simple-1.yaml b/doc/examples/icatdump-simple-1.yaml
new file mode 100644
index 00000000..26648f3b
--- /dev/null
+++ b/doc/examples/icatdump-simple-1.yaml
@@ -0,0 +1,71 @@
+%YAML 1.1
+# Date: Wed, 03 Jan 2024 13:24:51 +0000
+# Service: https://icat.example.com:8181/ICATService/ICAT?wsdl
+# ICAT-API: 6.0.0
+# Generator: icatdump (python-icat 1.2.0)
+---
+grouping:
+  Grouping_name-investigation=5F10100601=2DST=5Fowner:
+    name: investigation_10100601-ST_owner
+    userGroups:
+    - user: User_name-db=2Fahau
+  Grouping_name-investigation=5F10100601=2DST=5Freader:
+    name: investigation_10100601-ST_reader
+    userGroups:
+    - user: User_name-db=2Fjbotu
+    - user: User_name-db=2Fjdoe
+    - user: User_name-db=2Fnbour
+  Grouping_name-investigation=5F10100601=2DST=5Fwriter:
+    name: investigation_10100601-ST_writer
+    userGroups:
+    - user: User_name-db=2Fahau
+user:
+  User_name-db=2Fahau:
+    affiliation: Goethe University Frankfurt, Faculty of Philosophy and History
+    email: ahau@example.org
+    familyName: Hau
+    fullName: Arnold Hau
+    givenName: Arnold
+    name: db/ahau
+    orcidId: 0000-0002-3263
+  User_name-db=2Fjbotu:
+    affiliation: "Universit\xE9 Paul-Val\xE9ry Montpellier 3"
+    email: jbotu@example.org
+    familyName: Botul
+    fullName: Jean-Baptiste Botul
+    givenName: Jean-Baptiste
+    name: db/jbotu
+    orcidId: 0000-0002-3264
+  User_name-db=2Fjdoe:
+    email: jdoe@example.org
+    familyName: Doe
+    fullName: John Doe
+    givenName: John
+    name: db/jdoe
+  User_name-db=2Fnbour:
+    affiliation: University of Nancago
+    email: nbour@example.org
+    familyName: Bourbaki
+    fullName: Nicolas Bourbaki
+    givenName: Nicolas
+    name: db/nbour
+    orcidId: 0000-0002-3266
+---
+investigation:
+  Investigation_facility-(name-ESNF)_name-10100601=2DST_visitId-1=2E1=2DN:
+    doi: DOI:00.0815/inv-00601
+    endDate: '2010-10-12T15:00:00+00:00'
+    facility: Facility_name-ESNF
+    fileCount: 4
+    fileSize: 127125
+    investigationGroups:
+    - grouping: Grouping_name-investigation=5F10100601=2DST=5Fowner
+      role: owner
+    - grouping: Grouping_name-investigation=5F10100601=2DST=5Freader
+      role: reader
+    - grouping: Grouping_name-investigation=5F10100601=2DST=5Fwriter
+      role: writer
+    name: 10100601-ST
+    startDate: '2010-09-30T10:27:24+00:00'
+    title: Ni-Mn-Ga flat cone
+    visitId: 1.1-N
diff --git a/doc/examples/icatdump-simple-2.xml b/doc/examples/icatdump-simple-2.xml
new file mode 100644
index 00000000..1c309602
--- /dev/null
+++ b/doc/examples/icatdump-simple-2.xml
@@ -0,0 +1,108 @@
+<?xml version="1.0" encoding="utf-8"?>
+<icatdata>
+<head>
+  <date>2024-01-03T13:27:37+00:00</date>
+  <service>https://icat.example.com:8181/ICATService/ICAT?wsdl</service>
+  <apiversion>6.0.0</apiversion>
+  <generator>icatdump (python-icat 1.2.0)</generator>
+</head>
+<data>
+  <user id="User_name-db=2Fahau">
+    <affiliation>Goethe University Frankfurt, Faculty of Philosophy and History</affiliation>
+    <email>ahau@example.org</email>
+    <familyName>Hau</familyName>
+    <fullName>Arnold Hau</fullName>
+    <givenName>Arnold</givenName>
+    <name>db/ahau</name>
+    <orcidId>0000-0002-3263</orcidId>
+  </user>
+  <user id="User_name-db=2Fahau">
+    <affiliation>Goethe University Frankfurt, Faculty of Philosophy and History</affiliation>
+    <email>ahau@example.org</email>
+    <familyName>Hau</familyName>
+    <fullName>Arnold Hau</fullName>
+    <givenName>Arnold</givenName>
+    <name>db/ahau</name>
+    <orcidId>0000-0002-3263</orcidId>
+  </user>
+  <user id="User_name-db=2Fjbotu">
+    <affiliation>Universit&#233; Paul-Val&#233;ry Montpellier 3</affiliation>
+    <email>jbotu@example.org</email>
+    <familyName>Botul</familyName>
+    <fullName>Jean-Baptiste Botul</fullName>
+    <givenName>Jean-Baptiste</givenName>
+    <name>db/jbotu</name>
+    <orcidId>0000-0002-3264</orcidId>
+  </user>
+  <user id="User_name-db=2Fjdoe">
+    <email>jdoe@example.org</email>
+    <familyName>Doe</familyName>
+    <fullName>John Doe</fullName>
+    <givenName>John</givenName>
+    <name>db/jdoe</name>
+  </user>
+  <user id="User_name-db=2Fnbour">
+    <affiliation>University of Nancago</affiliation>
+    <email>nbour@example.org</email>
+    <familyName>Bourbaki</familyName>
+    <fullName>Nicolas Bourbaki</fullName>
+    <givenName>Nicolas</givenName>
+    <name>db/nbour</name>
+    <orcidId>0000-0002-3266</orcidId>
+  </user>
+  <grouping id="Grouping_name-investigation=5F10100601=2DST=5Fowner">
+    <name>investigation_10100601-ST_owner</name>
+  </grouping>
+  <grouping id="Grouping_name-investigation=5F10100601=2DST=5Freader">
+    <name>investigation_10100601-ST_reader</name>
+  </grouping>
+  <grouping id="Grouping_name-investigation=5F10100601=2DST=5Fwriter">
+    <name>investigation_10100601-ST_writer</name>
+  </grouping>
+  <userGroup id="UserGroup_user-(name-db=2Fahau)_grouping-(name-investigation=5F10100601=2DST=5Fowner)">
+    <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Fowner"/>
+    <user ref="User_name-db=2Fahau"/>
+  </userGroup>
+  <userGroup id="UserGroup_user-(name-db=2Fahau)_grouping-(name-investigation=5F10100601=2DST=5Fwriter)">
+    <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Fwriter"/>
+    <user ref="User_name-db=2Fahau"/>
+  </userGroup>
+  <userGroup id="UserGroup_user-(name-db=2Fjbotu)_grouping-(name-investigation=5F10100601=2DST=5Freader)">
+    <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Freader"/>
+    <user ref="User_name-db=2Fjbotu"/>
+  </userGroup>
+  <userGroup id="UserGroup_user-(name-db=2Fjdoe)_grouping-(name-investigation=5F10100601=2DST=5Freader)">
+    <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Freader"/>
+    <user ref="User_name-db=2Fjdoe"/>
+  </userGroup>
+  <userGroup id="UserGroup_user-(name-db=2Fnbour)_grouping-(name-investigation=5F10100601=2DST=5Freader)">
+    <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Freader"/>
+    <user ref="User_name-db=2Fnbour"/>
+  </userGroup>
+</data>
+<data>
+  <investigation id="Investigation_facility-(name-ESNF)_name-10100601=2DST_visitId-1=2E1=2DN">
+    <doi>DOI:00.0815/inv-00601</doi>
+    <endDate>2010-10-12T15:00:00+00:00</endDate>
+    <fileCount>4</fileCount>
+    <fileSize>127125</fileSize>
+    <name>10100601-ST</name>
+    <startDate>2010-09-30T10:27:24+00:00</startDate>
+    <title>Ni-Mn-Ga flat cone</title>
+    <visitId>1.1-N</visitId>
+    <facility ref="Facility_name-ESNF"/>
+    <investigationGroups>
+      <role>owner</role>
+      <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Fowner"/>
+    </investigationGroups>
+    <investigationGroups>
+      <role>reader</role>
+      <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Freader"/>
+    </investigationGroups>
+    <investigationGroups>
+      <role>writer</role>
+      <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Fwriter"/>
+    </investigationGroups>
+  </investigation>
+</data>
+</icatdata>
diff --git a/doc/examples/icatdump-simple-2.yaml b/doc/examples/icatdump-simple-2.yaml
new file mode 100644
index 00000000..79e4a296
--- /dev/null
+++ b/doc/examples/icatdump-simple-2.yaml
@@ -0,0 +1,79 @@
+%YAML 1.1
+# Date: Wed, 03 Jan 2024 13:27:52 +0000
+# Service: https://icat.example.com:8181/ICATService/ICAT?wsdl
+# ICAT-API: 6.0.0
+# Generator: icatdump (python-icat 1.2.0)
+---
+grouping:
+  Grouping_name-investigation=5F10100601=2DST=5Fowner:
+    name: investigation_10100601-ST_owner
+  Grouping_name-investigation=5F10100601=2DST=5Freader:
+    name: investigation_10100601-ST_reader
+  Grouping_name-investigation=5F10100601=2DST=5Fwriter:
+    name: investigation_10100601-ST_writer
+user:
+  User_name-db=2Fahau:
+    affiliation: Goethe University Frankfurt, Faculty of Philosophy and History
+    email: ahau@example.org
+    familyName: Hau
+    fullName: Arnold Hau
+    givenName: Arnold
+    name: db/ahau
+    orcidId: 0000-0002-3263
+  User_name-db=2Fjbotu:
+    affiliation: "Universit\xE9 Paul-Val\xE9ry Montpellier 3"
+    email: jbotu@example.org
+    familyName: Botul
+    fullName: Jean-Baptiste Botul
+    givenName: Jean-Baptiste
+    name: db/jbotu
+    orcidId: 0000-0002-3264
+  User_name-db=2Fjdoe:
+    email: jdoe@example.org
+    familyName: Doe
+    fullName: John Doe
+    givenName: John
+    name: db/jdoe
+  User_name-db=2Fnbour:
+    affiliation: University of Nancago
+    email: nbour@example.org
+    familyName: Bourbaki
+    fullName: Nicolas Bourbaki
+    givenName: Nicolas
+    name: db/nbour
+    orcidId: 0000-0002-3266
+userGroup:
+  UserGroup_user-(name-db=2Fahau)_grouping-(name-investigation=5F10100601=2DST=5Fowner):
+    grouping: Grouping_name-investigation=5F10100601=2DST=5Fowner
+    user: User_name-db=2Fahau
+  UserGroup_user-(name-db=2Fahau)_grouping-(name-investigation=5F10100601=2DST=5Fwriter):
+    grouping: Grouping_name-investigation=5F10100601=2DST=5Fwriter
+    user: User_name-db=2Fahau
+  UserGroup_user-(name-db=2Fjbotu)_grouping-(name-investigation=5F10100601=2DST=5Freader):
+    grouping: Grouping_name-investigation=5F10100601=2DST=5Freader
+    user: User_name-db=2Fjbotu
+  UserGroup_user-(name-db=2Fjdoe)_grouping-(name-investigation=5F10100601=2DST=5Freader):
+    grouping: Grouping_name-investigation=5F10100601=2DST=5Freader
+    user: User_name-db=2Fjdoe
+  UserGroup_user-(name-db=2Fnbour)_grouping-(name-investigation=5F10100601=2DST=5Freader):
+    grouping: Grouping_name-investigation=5F10100601=2DST=5Freader
+    user: User_name-db=2Fnbour
+---
+investigation:
+  Investigation_facility-(name-ESNF)_name-10100601=2DST_visitId-1=2E1=2DN:
+    doi: DOI:00.0815/inv-00601
+    endDate: '2010-10-12T15:00:00+00:00'
+    facility: Facility_name-ESNF
+    fileCount: 4
+    fileSize: 127125
+    investigationGroups:
+    - grouping: Grouping_name-investigation=5F10100601=2DST=5Fowner
+      role: owner
+    - grouping: Grouping_name-investigation=5F10100601=2DST=5Freader
+      role: reader
+    - grouping: Grouping_name-investigation=5F10100601=2DST=5Fwriter
+      role: writer
+    name: 10100601-ST
+    startDate: '2010-09-30T10:27:24+00:00'
+    title: Ni-Mn-Ga flat cone
+    visitId: 1.1-N

From af0ee8dc1fd9b401b73ca6644863da8c1fb2315d Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 3 Jan 2024 17:15:03 +0100
Subject: [PATCH 039/102] Add subsections on ICAT data XML files and on ICAT
 data YAML files including the example data files, but no other content yet

---
 doc/src/file-icatdata.rst | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/doc/src/file-icatdata.rst b/doc/src/file-icatdata.rst
index 06d8f70c..a0126383 100644
--- a/doc/src/file-icatdata.rst
+++ b/doc/src/file-icatdata.rst
@@ -66,6 +66,30 @@ indirectly related to one of the included objects.  In this case,
 only a reference to the related object will be included in the data
 file.  The related object must have its own entry.
 
+ICAT data XML files
+~~~~~~~~~~~~~~~~~~~
+
+In this section we describe the ICAT data file format using the XML
+backend.
+
+.. literalinclude:: ../examples/icatdump-simple-1.xml
+   :language: xml
+
+.. literalinclude:: ../examples/icatdump-simple-2.xml
+   :language: xml
+
+ICAT data YAML files
+~~~~~~~~~~~~~~~~~~~~
+
+In this section we describe the ICAT data file format using the YAML
+backend.
+
+.. literalinclude:: ../examples/icatdump-simple-1.yaml
+   :language: yaml
+
+.. literalinclude:: ../examples/icatdump-simple-2.yaml
+   :language: yaml
+
 
 .. [#dc] There is one exception: DataCollections don't have a
          uniqueness constraint and can't reliably be searched by

From 4ad8e9e9dd3b48907dab282c46ecf037f55cb2d6 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 3 Jan 2024 19:09:50 +0100
Subject: [PATCH 040/102] Add the text content for the subsection on ICAT data
 XML files

---
 doc/src/file-icatdata.rst | 73 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/doc/src/file-icatdata.rst b/doc/src/file-icatdata.rst
index a0126383..b856e625 100644
--- a/doc/src/file-icatdata.rst
+++ b/doc/src/file-icatdata.rst
@@ -70,13 +70,83 @@ ICAT data XML files
 ~~~~~~~~~~~~~~~~~~~
 
 In this section we describe the ICAT data file format using the XML
-backend.
+backend.  Consider the following example:
 
 .. literalinclude:: ../examples/icatdump-simple-1.xml
    :language: xml
 
+The root element of ICAT data XML files is ``icatdata``.  It may
+optionally have one ``head`` subelement and one or more ``data``
+subelements.
+
+The ``head`` element will be ignored by :ref:`icatingest`.  It serves
+to provide some information on the context of the creation of the data
+file, which may be useful for debugging in case of issues.
+
+The content of each ``data`` element is one chunk according to the
+logical structure explained above.  The present example contains two
+chunks.  Each element within the ``data`` element corresponds to an
+ICAT object according to the ICAT schema.  In the present example, the
+first chunk contains five User objects and three Grouping objects.
+The second chunk only contains one Investigation.
+
+These object elements should have an ``id`` attribute that may be used
+to reference the object in relations later on.  The ``id`` value has
+no meaning other than this file internal referencing between objects.
+The subelements of the object elements correspond to the object's
+attributes and relations in the ICAT schema.  All many-to-one
+relations must be provided and reference already existing objects,
+e.g. they must either already have existed before starting the
+ingestion or appear earlier in the ICAT data file than the referencing
+object, so that they will be created earlier.  The related object may
+either be referenced by id using the special attribute ``ref`` or by
+the related object's attribute values, using XML attributes of the
+same name.  In the latter case, the attribute values must uniquely
+define the related object.
+
+The object elements may include one-to-many relations.  In this case,
+the related objects will be created along with the parent in one
+single cascading call.  Alternatively, these related objects may be
+added separately as subelements of the ``data`` element later in the
+file.  In the present example, the Grouping object include their
+related UserGroup objects.  Note that these UserGroups include their
+relation to the User.  The User object is referenced by their
+respective id in the ``ref`` attribute.  But the UserGroups do not
+include their relation with Grouping.  That relationship is implied by
+the parent relation of the object in the file.
+
+In a similar way, the Investigation in the second chunk includes
+related InvestigationGroups that will be created along with the
+Investigation.  The InvestigationGroup objects include a reference to
+the corresponding Grouping.  Note that these references go across
+chunk boundaries.  The index that caches the object ids to resolve
+object relations from the first chunk that did contain the ids of the
+Groupings will already have been discarded from memeory when the
+second chunk is read.  But the references use the key that can be
+passed to :meth:`icat.client.Client.searchUniqueKey` to search these
+Groupings from ICAT.
+
+Finally note the the file format also depends on the ICAT schema
+version: the present example can only be ingested into ICAT server 5.0
+or newer, because the attributes fileCount and fileSize have been
+added to Investigation in this version.  With older ICAT versions, it
+will fail because the attributes are not defined.
+
+Consider a second example, it defines a subset of the same content
+as the previous example:
+
 .. literalinclude:: ../examples/icatdump-simple-2.xml
    :language: xml
+   :lines: 1-9,28-52,56-58,70-82,108
+
+The difference is that we now add the Usergroup objects separately in
+direct subelements of ``data`` instead of including them in the
+related Grouping objects.
+
+You will find more extensive examples in the source distribution of
+python-icat.  The distribution also provides XML Schema Definition
+files for the ICAT data XML file format corresponding to various ICAT
+schema versions.
 
 ICAT data YAML files
 ~~~~~~~~~~~~~~~~~~~~
@@ -89,6 +159,7 @@ backend.
 
 .. literalinclude:: ../examples/icatdump-simple-2.yaml
    :language: yaml
+   :lines: 1-7,10-11,14,23-45,52-60
 
 
 .. [#dc] There is one exception: DataCollections don't have a

From 85db750d944dea33d7f9ef6e00ff503ff28ad224 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 5 Jan 2024 10:27:41 +0100
Subject: [PATCH 041/102] Add tests on error handling in
 Client.searchMatching(), in particular, test the condition already fixed in
 023f4c0

---
 tests/test_06_client.py | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/tests/test_06_client.py b/tests/test_06_client.py
index b36cefb1..d7e79f05 100644
--- a/tests/test_06_client.py
+++ b/tests/test_06_client.py
@@ -498,3 +498,38 @@ def test_searchMatching_include(client):
     assert obj.name == "e208945"
     assert len(obj.datafiles) > 0
 
+def test_searchMatching_error_attribute_missing(client):
+    """Test error handling with searchMatching():
+    leaving out a required attribute
+    """
+    facility = client.assertedSearch("Facility")[0]
+    # Neglect to set visitId
+    investigation = client.new("Investigation",
+                               name="12100409-ST",
+                               facility=facility)
+    with pytest.raises(ValueError):
+        obj = client.searchMatching(investigation)
+
+def test_searchMatching_error_relation_missing(client):
+    """Test error handling with searchMatching():
+    leaving out a required many-to-one relation
+    """
+    facility = client.assertedSearch("Facility")[0]
+    # Neglect to set facility
+    investigation = client.new("Investigation",
+                               name="12100409-ST", visitId="1.1-P")
+    with pytest.raises(ValueError):
+        obj = client.searchMatching(investigation)
+
+@pytest.mark.xfail(raises=TypeError)
+def test_searchMatching_error_relation_id_missing(client):
+    """Test error handling with searchMatching():
+    a required many-to-one relation has no id
+    """
+    facility = client.assertedSearch("Facility")[0]
+    fac = client.new("Facility", name=str(facility.name))
+    investigation = client.new("Investigation",
+                               name="12100409-ST", visitId="1.1-P",
+                               facility=fac)
+    with pytest.raises(ValueError):
+        obj = client.searchMatching(investigation)

From c5584f8077989cc6c81d90cd0c81976bc1d145c9 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 5 Jan 2024 11:20:12 +0100
Subject: [PATCH 042/102] Add a IngestReader test reading from a file object

---
 tests/test_06_ingest.py | 68 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py
index bdd08f1e..d8ece993 100644
--- a/tests/test_06_ingest.py
+++ b/tests/test_06_ingest.py
@@ -3,6 +3,7 @@
 
 from collections import namedtuple
 import datetime
+import io
 import pytest
 pytest.importorskip("lxml")
 from lxml import etree
@@ -19,6 +20,11 @@ def get_test_investigation(client):
     })
     return client.assertedSearch(query)[0]
 
+class NamedBytesIO(io.BytesIO):
+    def __init__(self, initial_bytes, name):
+        super().__init__(initial_bytes)
+        self.name = name
+
 @pytest.fixture(scope="module")
 def client(setupicat):
     client, conf = getConfig(confSection="ingest", ids=False)
@@ -376,6 +382,68 @@ def test_ingest(client, investigation, samples, schemadir, case):
         for query, res in case.checks[name]:
             assert client.assertedSearch(query % ds.id)[0] == res
 
+io_metadata = NamedBytesIO("""<?xml version='1.0' encoding='UTF-8'?>
+<icatingest version="1.1">
+  <head>
+    <date>2023-06-16T11:01:15+02:00</date>
+    <generator>metadata-writer 0.27a</generator>
+  </head>
+  <data>
+    <dataset id="Dataset_1">
+      <name>testingest_io_1</name>
+      <description>Dy01Cp02 at 10.2 K</description>
+      <startDate>2022-02-03T15:40:12+01:00</startDate>
+      <endDate>2022-02-03T17:04:22+01:00</endDate>
+      <parameters>
+        <stringValue>neutron</stringValue>
+        <type name="Probe"/>
+      </parameters>
+    </dataset>
+  </data>
+</icatingest>
+""".encode("utf8"), "io_metadata")
+io_cases = [
+    Case(
+        data = ["testingest_io_1"],
+        metadata = io_metadata,
+        schema = gettestdata("icatdata-4.4.xsd"),
+        checks = {
+            "testingest_io_1": [
+                ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
+                 "Dy01Cp02 at 10.2 K"),
+                (("SELECT p.stringValue FROM DatasetParameter p "
+                  "JOIN p.dataset AS ds JOIN p.type AS t "
+                  "WHERE ds.id = %d AND t.name = 'Probe'"),
+                 "neutron"),
+            ],
+        },
+        marks = (),
+    ),
+]
+
+@pytest.mark.parametrize("case", [
+    pytest.param(c, id=c.metadata.name, marks=c.marks) for c in io_cases
+])
+def test_ingest_fileobj(client, investigation, samples, schemadir, case):
+    """Test ingest reading from a file object rather than a Path
+    """
+    datasets = []
+    for name in case.data:
+        datasets.append(client.new("Dataset", name=name))
+    reader = IngestReader(client, case.metadata, investigation)
+    reader.ingest(datasets, dry_run=True, update_ds=True)
+    for ds in datasets:
+        ds.create()
+    reader.ingest(datasets)
+    for name in case.checks.keys():
+        query = Query(client, "Dataset", conditions={
+            "name": "= '%s'" % name,
+            "investigation.id": "= %d" % investigation.id,
+        })
+        ds = client.assertedSearch(query)[0]
+        for query, res in case.checks[name]:
+            assert client.assertedSearch(query % ds.id)[0] == res
+
 
 badcases = [
     Case(

From 6f592c38b18606efa5b0aceff745955192a111b0 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 5 Jan 2024 11:36:09 +0100
Subject: [PATCH 043/102] - Inline metadata-5.0-badref.xml into
 test_06_ingest.py - Rename test_badref_ingest to test_ingest_error_invalid

---
 tests/data/metadata-5.0-badref.xml | 29 ------------------------
 tests/test_06_ingest.py            | 36 +++++++++++++++++++++++++-----
 2 files changed, 31 insertions(+), 34 deletions(-)
 delete mode 100644 tests/data/metadata-5.0-badref.xml

diff --git a/tests/data/metadata-5.0-badref.xml b/tests/data/metadata-5.0-badref.xml
deleted file mode 100644
index 252a69cb..00000000
--- a/tests/data/metadata-5.0-badref.xml
+++ /dev/null
@@ -1,29 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<icatingest version="1.0">
-  <!-- NOTE:
-       this is an example for an *invalid* icatingest file,
-       using bad dataset references.
-    -->
-  <head>
-    <date>2023-06-16T11:01:15+02:00</date>
-    <generator>metadata-writer 0.27a</generator>
-  </head>
-  <data>
-    <dataset id="Dataset_1">
-      <name>e208339</name>
-    </dataset>
-    <datasetInstrument>
-      <dataset ref="Dataset_investigation-(name-10100601=2DST)_name-e208339"/>
-      <instrument pid="DOI:00.0815/inst-00048"/>
-    </datasetInstrument>
-    <datasetTechnique>
-      <dataset ref="Dataset_investigation-(name-10100601=2DST)_name-e208339"/>
-      <technique pid="PaNET:PaNET01089"/>
-    </datasetTechnique>
-    <datasetParameter>
-      <stringValue>very evil</stringValue>
-      <dataset ref="Dataset_investigation-(name-10100601=2DST)_name-e208339"/>
-      <type name="Probe"/>
-    </datasetParameter>
-  </data>
-</icatingest>
diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py
index d8ece993..f2f2784c 100644
--- a/tests/test_06_ingest.py
+++ b/tests/test_06_ingest.py
@@ -445,10 +445,36 @@ def test_ingest_fileobj(client, investigation, samples, schemadir, case):
             assert client.assertedSearch(query % ds.id)[0] == res
 
 
-badcases = [
+invalid_ref_metadata = NamedBytesIO("""<?xml version='1.0' encoding='UTF-8'?>
+<icatingest version="1.0">
+  <head>
+    <date>2023-06-16T11:01:15+02:00</date>
+    <generator>metadata-writer 0.27a</generator>
+  </head>
+  <data>
+    <dataset id="Dataset_1">
+      <name>testingest_err_invalid_ref</name>
+    </dataset>
+    <datasetInstrument>
+      <dataset ref="Dataset_investigation-(name-12100409=2DST)_name-testingest=5Ferr=5Finvalid=5Fref"/>
+      <instrument pid="DOI:00.0815/inst-00048"/>
+    </datasetInstrument>
+    <datasetTechnique>
+      <dataset ref="Dataset_investigation-(name-12100409=2DST)_name-testingest=5Ferr=5Finvalid=5Fref"/>
+      <technique pid="PaNET:PaNET01089"/>
+    </datasetTechnique>
+    <datasetParameter>
+      <stringValue>very evil</stringValue>
+      <dataset ref="Dataset_investigation-(name-12100409=2DST)_name-testingest=5Ferr=5Finvalid=5Fref"/>
+      <type name="Probe"/>
+    </datasetParameter>
+  </data>
+</icatingest>
+""".encode("utf8"), "invalid_ref")
+invalid_cases = [
     Case(
-        data = ["e208339"],
-        metadata = gettestdata("metadata-5.0-badref.xml"),
+        data = ["testingest_err_invalid_ref"],
+        metadata = invalid_ref_metadata,
         schema = gettestdata("icatdata-5.0.xsd"),
         checks = {},
         marks = (
@@ -458,9 +484,9 @@ def test_ingest_fileobj(client, investigation, samples, schemadir, case):
     ),
 ]
 @pytest.mark.parametrize("case", [
-    pytest.param(c, id=c.metadata.name, marks=c.marks) for c in badcases
+    pytest.param(c, id=c.metadata.name, marks=c.marks) for c in invalid_cases
 ])
-def test_badref_ingest(client, investigation, schemadir, case):
+def test_ingest_error_invalid(client, investigation, schemadir, case):
     datasets = []
     for name in case.data:
         datasets.append(client.new("Dataset", name=name))

From 44ac19b0b7f16a7b1c104c0b8bb1ecc8f438c689 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 5 Jan 2024 12:33:53 +0100
Subject: [PATCH 044/102] - Simplify the existing test case for invalid
 metadata - Add another test case for invalid metadata (xfail #146) - Add a
 test for search errors during ingest (one xfail #146)

---
 tests/test_06_ingest.py | 106 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 96 insertions(+), 10 deletions(-)

diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py
index f2f2784c..28268237 100644
--- a/tests/test_06_ingest.py
+++ b/tests/test_06_ingest.py
@@ -455,14 +455,6 @@ def test_ingest_fileobj(client, investigation, samples, schemadir, case):
     <dataset id="Dataset_1">
       <name>testingest_err_invalid_ref</name>
     </dataset>
-    <datasetInstrument>
-      <dataset ref="Dataset_investigation-(name-12100409=2DST)_name-testingest=5Ferr=5Finvalid=5Fref"/>
-      <instrument pid="DOI:00.0815/inst-00048"/>
-    </datasetInstrument>
-    <datasetTechnique>
-      <dataset ref="Dataset_investigation-(name-12100409=2DST)_name-testingest=5Ferr=5Finvalid=5Fref"/>
-      <technique pid="PaNET:PaNET01089"/>
-    </datasetTechnique>
     <datasetParameter>
       <stringValue>very evil</stringValue>
       <dataset ref="Dataset_investigation-(name-12100409=2DST)_name-testingest=5Ferr=5Finvalid=5Fref"/>
@@ -471,15 +463,44 @@ def test_ingest_fileobj(client, investigation, samples, schemadir, case):
   </data>
 </icatingest>
 """.encode("utf8"), "invalid_ref")
+invalid_dup_metadata = NamedBytesIO("""<?xml version='1.0' encoding='UTF-8'?>
+<icatingest version="1.0">
+  <head>
+    <date>2023-06-16T11:01:15+02:00</date>
+    <generator>metadata-writer 0.27a</generator>
+  </head>
+  <data>
+    <dataset id="Dataset_1">
+      <name>testingest_err_invalid_dup</name>
+    </dataset>
+    <datasetParameter>
+      <numericValue>10.0</numericValue>
+      <dataset ref="Dataset_1"/>
+      <type name="Reactor power" units="MW"/>
+    </datasetParameter>
+    <datasetParameter>
+      <numericValue>17.0</numericValue>
+      <dataset ref="Dataset_1"/>
+      <type name="Reactor power" units="MW"/>
+    </datasetParameter>
+  </data>
+</icatingest>
+""".encode("utf8"), "invalid_dup")
 invalid_cases = [
     Case(
         data = ["testingest_err_invalid_ref"],
         metadata = invalid_ref_metadata,
         schema = gettestdata("icatdata-5.0.xsd"),
         checks = {},
+        marks = (),
+    ),
+    Case(
+        data = ["testingest_err_invalid_dup"],
+        metadata = invalid_dup_metadata,
+        schema = gettestdata("icatdata-4.4.xsd"),
+        checks = {},
         marks = (
-            pytest.mark.skipif(icat_version < "5.0",
-                               reason="Need ICAT schema 5.0 or newer"),
+            pytest.mark.xfail(reason="Issue #146"),
         ),
     ),
 ]
@@ -494,6 +515,71 @@ def test_ingest_error_invalid(client, investigation, schemadir, case):
         reader = IngestReader(client, case.metadata, investigation)
         reader.ingest(datasets, dry_run=True, update_ds=True)
 
+searcherr_attr_metadata = NamedBytesIO("""<?xml version='1.0' encoding='UTF-8'?>
+<icatingest version="1.0">
+  <head>
+    <date>2023-06-16T11:01:15+02:00</date>
+    <generator>metadata-writer 0.27a</generator>
+  </head>
+  <data>
+    <dataset id="Dataset_1">
+      <name>testingest_err_search_attr</name>
+    </dataset>
+    <datasetParameter>
+      <numericValue>10.0</numericValue>
+      <dataset ref="Dataset_1"/>
+      <type name="not found"/>
+    </datasetParameter>
+  </data>
+</icatingest>
+""".encode("utf8"), "search_attr")
+searcherr_ref_metadata = NamedBytesIO("""<?xml version='1.0' encoding='UTF-8'?>
+<icatingest version="1.0">
+  <head>
+    <date>2023-06-16T11:01:15+02:00</date>
+    <generator>metadata-writer 0.27a</generator>
+  </head>
+  <data>
+    <dataset id="Dataset_1">
+      <name>testingest_err_search_ref</name>
+    </dataset>
+    <datasetParameter>
+      <numericValue>10.0</numericValue>
+      <dataset ref="Dataset_notfound"/>
+      <type name="Reactor power" units="MW"/>
+    </datasetParameter>
+  </data>
+</icatingest>
+""".encode("utf8"), "search_ref")
+searcherr_cases = [
+    Case(
+        data = ["testingest_err_search_attr"],
+        metadata = searcherr_attr_metadata,
+        schema = gettestdata("icatdata-5.0.xsd"),
+        checks = {},
+        marks = (),
+    ),
+    Case(
+        data = ["testingest_err_search_ref"],
+        metadata = searcherr_ref_metadata,
+        schema = gettestdata("icatdata-4.4.xsd"),
+        checks = {},
+        marks = (
+            pytest.mark.xfail(raises=ValueError, reason="Issue #146"),
+        ),
+    ),
+]
+@pytest.mark.parametrize("case", [
+    pytest.param(c, id=c.metadata.name, marks=c.marks) for c in searcherr_cases
+])
+def test_ingest_error_searcherr(client, investigation, schemadir, case):
+    datasets = []
+    for name in case.data:
+        datasets.append(client.new("Dataset", name=name))
+    with pytest.raises(icat.SearchResultError):
+        reader = IngestReader(client, case.metadata, investigation)
+        reader.ingest(datasets, dry_run=True, update_ds=True)
+
 
 customcases = [
     Case(

From aba1cf2aff4684b77b65a08e1a71ebf28f0f7d79 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 5 Jan 2024 13:21:49 +0100
Subject: [PATCH 045/102] Minor: (the transformations of) these data actually
 conform to icatdata-4.4.xsd

---
 tests/test_06_ingest.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py
index 03c29792..9349a0f1 100644
--- a/tests/test_06_ingest.py
+++ b/tests/test_06_ingest.py
@@ -490,7 +490,7 @@ def test_ingest_fileobj(client, investigation, samples, schemadir, case):
     Case(
         data = ["testingest_err_invalid_ref"],
         metadata = invalid_ref_metadata,
-        schema = gettestdata("icatdata-5.0.xsd"),
+        schema = gettestdata("icatdata-4.4.xsd"),
         checks = {},
         marks = (),
     ),
@@ -555,7 +555,7 @@ def test_ingest_error_invalid(client, investigation, schemadir, case):
     Case(
         data = ["testingest_err_search_attr"],
         metadata = searcherr_attr_metadata,
-        schema = gettestdata("icatdata-5.0.xsd"),
+        schema = gettestdata("icatdata-4.4.xsd"),
         checks = {},
         marks = (),
     ),

From 22af27d5cb889f17c076fd767977042b64de3eba Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 5 Jan 2024 14:21:07 +0100
Subject: [PATCH 046/102] Add yet another test case for invalid metadata (xfail
 #146)

---
 tests/test_06_ingest.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py
index 9349a0f1..0c5642c3 100644
--- a/tests/test_06_ingest.py
+++ b/tests/test_06_ingest.py
@@ -486,6 +486,27 @@ def test_ingest_fileobj(client, investigation, samples, schemadir, case):
   </data>
 </icatingest>
 """.encode("utf8"), "invalid_dup")
+invalid_dup_id_metadata = NamedBytesIO("""<?xml version='1.0' encoding='UTF-8'?>
+<icatingest version="1.0">
+  <head>
+    <date>2023-06-16T11:01:15+02:00</date>
+    <generator>metadata-writer 0.27a</generator>
+  </head>
+  <data>
+    <dataset id="Dataset_1">
+      <name>testingest_err_invalid_dup_id_1</name>
+    </dataset>
+    <dataset id="Dataset_1">
+      <name>testingest_err_invalid_dup_id_2</name>
+    </dataset>
+    <datasetParameter>
+      <numericValue>10.0</numericValue>
+      <dataset ref="Dataset_1"/>
+      <type name="Reactor power" units="MW"/>
+    </datasetParameter>
+  </data>
+</icatingest>
+""".encode("utf8"), "invalid_dup_id")
 invalid_cases = [
     Case(
         data = ["testingest_err_invalid_ref"],
@@ -503,6 +524,16 @@ def test_ingest_fileobj(client, investigation, samples, schemadir, case):
             pytest.mark.xfail(reason="Issue #146"),
         ),
     ),
+    Case(
+        data = ["testingest_err_invalid_dup_id_1",
+                "testingest_err_invalid_dup_id_2"],
+        metadata = invalid_dup_id_metadata,
+        schema = gettestdata("icatdata-4.4.xsd"),
+        checks = {},
+        marks = (
+            pytest.mark.xfail(reason="Issue #146"),
+        ),
+    ),
 ]
 @pytest.mark.parametrize("case", [
     pytest.param(c, id=c.metadata.name, marks=c.marks) for c in invalid_cases

From 769e3c01b616bba7d5fd190af9aaccfb2e3b67ec Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 5 Jan 2024 14:43:01 +0100
Subject: [PATCH 047/102] Check for duplicate ids in IngestReader

---
 src/icat/ingest.py      | 6 ++++++
 tests/test_06_ingest.py | 4 +---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/icat/ingest.py b/src/icat/ingest.py
index 57f15648..8f83bf1f 100644
--- a/src/icat/ingest.py
+++ b/src/icat/ingest.py
@@ -144,6 +144,12 @@ def get_xslt(self, ingest_data):
             raise InvalidIngestFileError("unknown format")
         return self.SchemaDir / xslt
 
+    def getobjs_from_data(self, data, objindex):
+        for key, obj in super().getobjs_from_data(data, objindex):
+            if key in objindex:
+                raise InvalidIngestFileError("Duplicate id %s" % key)
+            yield key, obj
+
     def getobjs(self):
         """Iterate over the objects in the ingest file.
         """
diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py
index 0c5642c3..ba842124 100644
--- a/tests/test_06_ingest.py
+++ b/tests/test_06_ingest.py
@@ -530,9 +530,7 @@ def test_ingest_fileobj(client, investigation, samples, schemadir, case):
         metadata = invalid_dup_id_metadata,
         schema = gettestdata("icatdata-4.4.xsd"),
         checks = {},
-        marks = (
-            pytest.mark.xfail(reason="Issue #146"),
-        ),
+        marks = (),
     ),
 ]
 @pytest.mark.parametrize("case", [

From 8f2b172a6f8d85d361759e9f3343ba6dde52cba9 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 5 Jan 2024 16:07:16 +0100
Subject: [PATCH 048/102] Check for duplicate objects in IngestReader

---
 src/icat/ingest.py      | 37 +++++++++++++++++++++++++++++++++++++
 tests/test_06_ingest.py |  4 +---
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/src/icat/ingest.py b/src/icat/ingest.py
index 8f83bf1f..126580e0 100644
--- a/src/icat/ingest.py
+++ b/src/icat/ingest.py
@@ -8,6 +8,7 @@
 .. versionadded:: 1.1.0
 """
 
+from collections import namedtuple
 from pathlib import Path
 from lxml import etree
 
@@ -15,6 +16,37 @@
 from .exception import InvalidIngestFileError
 
 
+_ObjIdTuple = namedtuple('_ObjIdTuple', ['t', 'dsname', 'relid'])
+class _ObjId(_ObjIdTuple):
+    _MsgTemplate = {
+        'Dataset':
+            "Dataset, name:%(dsname)s",
+        'DatasetInstrument':
+            "DatasetInstrument, Dataset:%(dsname)s, Instrument:%(relid)d",
+        'DatasetTechnique':
+            "DatasetTechnique, Dataset:%(dsname)s, Technique:%(relid)d",
+        'DatasetParameter':
+            "DatasetParameter, Dataset:%(dsname)s, ParameterType:%(relid)d",
+    }
+    def __new__(cls, obj):
+        kwargs = dict(t=obj.BeanName, relid=None)
+        if obj.BeanName == "Dataset":
+            kwargs['dsname'] = obj.name
+        else:
+            kwargs['dsname'] = obj.dataset.name
+            if obj.BeanName == "DatasetInstrument":
+                kwargs['relid'] = obj.instrument.id
+            elif obj.BeanName == "DatasetTechnique":
+                kwargs['relid'] = obj.technique.id
+            elif obj.BeanName == "DatasetParameter":
+                kwargs['relid'] = obj.type.id
+            else:
+                raise InvalidIngestFileError("Invalid %s object"
+                                             % (obj.BeanName))
+        return super().__new__(cls, **kwargs)
+    def __str__(self):
+        return self._MsgTemplate[self.t] % self._asdict()
+
 class IngestReader(XMLDumpFileReader):
     """Read metadata from XML ingest files into ICAT.
 
@@ -145,9 +177,14 @@ def get_xslt(self, ingest_data):
         return self.SchemaDir / xslt
 
     def getobjs_from_data(self, data, objindex):
+        typed_objindex = set()
         for key, obj in super().getobjs_from_data(data, objindex):
             if key in objindex:
                 raise InvalidIngestFileError("Duplicate id %s" % key)
+            objid = _ObjId(obj)
+            if objid in typed_objindex:
+                raise InvalidIngestFileError("Duplicate %s" % str(objid))
+            typed_objindex.add(objid)
             yield key, obj
 
     def getobjs(self):
diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py
index ba842124..a68f20f9 100644
--- a/tests/test_06_ingest.py
+++ b/tests/test_06_ingest.py
@@ -520,9 +520,7 @@ def test_ingest_fileobj(client, investigation, samples, schemadir, case):
         metadata = invalid_dup_metadata,
         schema = gettestdata("icatdata-4.4.xsd"),
         checks = {},
-        marks = (
-            pytest.mark.xfail(reason="Issue #146"),
-        ),
+        marks = (),
     ),
     Case(
         data = ["testingest_err_invalid_dup_id_1",

From f50b236000313c2279482e492f660fcff076d1e6 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 5 Jan 2024 16:31:07 +0100
Subject: [PATCH 049/102] Update documentation for IngestReader

---
 src/icat/ingest.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/icat/ingest.py b/src/icat/ingest.py
index 126580e0..5d69b963 100644
--- a/src/icat/ingest.py
+++ b/src/icat/ingest.py
@@ -220,8 +220,11 @@ def ingest(self, datasets, dry_run=False, update_ds=False):
         :param update_ds: flag whether to update the `datasets` in the
             argument.
         :type update_ds: :class:`bool`
-        :raise icat.exception.InvalidIngestFileError: if any unallowed
-            object is read from the input.
+        :raise icat.exception.InvalidIngestFileError: if the input is
+            not valid, for instance if there is any unallowed object
+            or duplicate objects.
+        :raise icat.exception.SearchResultError: if any object
+            references in the input could not be resolved.
         """
         dataset_map = { ds.name: ds for ds in datasets }
         allowed_ds_related = {

From 16388f298e297859bc9bed87f40ee8374d6a6e63 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 5 Jan 2024 16:39:20 +0100
Subject: [PATCH 050/102] Add more test cases

---
 tests/test_06_ingest.py | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py
index a68f20f9..b9c560f3 100644
--- a/tests/test_06_ingest.py
+++ b/tests/test_06_ingest.py
@@ -445,6 +445,24 @@ def test_ingest_fileobj(client, investigation, samples, schemadir, case):
             assert client.assertedSearch(query % ds.id)[0] == res
 
 
+invalid_root_metadata = NamedBytesIO("""<?xml version='1.0' encoding='UTF-8'?>
+<icatinvalid version="1.0">
+  <head>
+    <date>2023-06-16T11:01:15+02:00</date>
+    <generator>metadata-writer 0.27a</generator>
+  </head>
+  <data/>
+</icatinvalid>
+""".encode("utf8"), "invalid_root")
+invalid_ver_metadata = NamedBytesIO("""<?xml version='1.0' encoding='UTF-8'?>
+<icatingest version="0.7">
+  <head>
+    <date>2023-06-16T11:01:15+02:00</date>
+    <generator>metadata-writer 0.27a</generator>
+  </head>
+  <data/>
+</icatingest>
+""".encode("utf8"), "invalid_version")
 invalid_ref_metadata = NamedBytesIO("""<?xml version='1.0' encoding='UTF-8'?>
 <icatingest version="1.0">
   <head>
@@ -508,6 +526,20 @@ def test_ingest_fileobj(client, investigation, samples, schemadir, case):
 </icatingest>
 """.encode("utf8"), "invalid_dup_id")
 invalid_cases = [
+    Case(
+        data = [],
+        metadata = invalid_root_metadata,
+        schema = None,
+        checks = {},
+        marks = (),
+    ),
+    Case(
+        data = [],
+        metadata = invalid_ver_metadata,
+        schema = None,
+        checks = {},
+        marks = (),
+    ),
     Case(
         data = ["testingest_err_invalid_ref"],
         metadata = invalid_ref_metadata,

From 8b8c1f46a1bf91898e1753cecffca5fd938b839a Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 5 Jan 2024 16:56:48 +0100
Subject: [PATCH 051/102] Update changelog

---
 CHANGES.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGES.rst b/CHANGES.rst
index 3e58fe74..1744b6ef 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -12,6 +12,9 @@ New features
   processing the input in custom versions of
   :class:`icat.ingest.IngestReader`.
 
++ `#146`_, `#147`_: Better error handling in
+  :class:`icat.ingest.IngestReader`.
+
 Incompatible changes
 --------------------
 
@@ -29,6 +32,8 @@ Bug fixes and minor changes
 .. _#143: https://github.com/icatproject/python-icat/issues/143
 .. _#144: https://github.com/icatproject/python-icat/pull/144
 .. _#145: https://github.com/icatproject/python-icat/pull/145
+.. _#146: https://github.com/icatproject/python-icat/issues/146
+.. _#147: https://github.com/icatproject/python-icat/pull/147
 
 
 1.2.0 (2023-10-31)

From ea4b9d60f6035f4a9604dc2579de23232a9b2e73 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Sat, 6 Jan 2024 18:59:53 +0100
Subject: [PATCH 052/102] Typo

---
 doc/src/scripts.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/src/scripts.rst b/doc/src/scripts.rst
index 82f57d75..f944efde 100644
--- a/doc/src/scripts.rst
+++ b/doc/src/scripts.rst
@@ -2,7 +2,7 @@ Command line scripts
 ====================
 
 This section provides a reference for the command line scripts that
-are alongside with python-icat.
+are installed alongside with python-icat.
 
 .. toctree::
    :maxdepth: 1

From e62dc5ae52a8f4a835e96fad9a4b1976181b19e1 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 16 Jan 2024 12:36:27 +0100
Subject: [PATCH 053/102] - Review documentation Section "ICAT data XML files",
 adding more   inline examples - Drop icatdump-simple-2.xml example, rename
 icatdump-simple-1.xml to   icatdump-simple.xml

---
 doc/examples/icatdump-simple-2.xml            | 108 ------------------
 ...tdump-simple-1.xml => icatdump-simple.xml} |   0
 doc/src/file-icatdata.rst                     | 108 +++++++++++++-----
 3 files changed, 80 insertions(+), 136 deletions(-)
 delete mode 100644 doc/examples/icatdump-simple-2.xml
 rename doc/examples/{icatdump-simple-1.xml => icatdump-simple.xml} (100%)

diff --git a/doc/examples/icatdump-simple-2.xml b/doc/examples/icatdump-simple-2.xml
deleted file mode 100644
index 1c309602..00000000
--- a/doc/examples/icatdump-simple-2.xml
+++ /dev/null
@@ -1,108 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<icatdata>
-<head>
-  <date>2024-01-03T13:27:37+00:00</date>
-  <service>https://icat.example.com:8181/ICATService/ICAT?wsdl</service>
-  <apiversion>6.0.0</apiversion>
-  <generator>icatdump (python-icat 1.2.0)</generator>
-</head>
-<data>
-  <user id="User_name-db=2Fahau">
-    <affiliation>Goethe University Frankfurt, Faculty of Philosophy and History</affiliation>
-    <email>ahau@example.org</email>
-    <familyName>Hau</familyName>
-    <fullName>Arnold Hau</fullName>
-    <givenName>Arnold</givenName>
-    <name>db/ahau</name>
-    <orcidId>0000-0002-3263</orcidId>
-  </user>
-  <user id="User_name-db=2Fahau">
-    <affiliation>Goethe University Frankfurt, Faculty of Philosophy and History</affiliation>
-    <email>ahau@example.org</email>
-    <familyName>Hau</familyName>
-    <fullName>Arnold Hau</fullName>
-    <givenName>Arnold</givenName>
-    <name>db/ahau</name>
-    <orcidId>0000-0002-3263</orcidId>
-  </user>
-  <user id="User_name-db=2Fjbotu">
-    <affiliation>Universit&#233; Paul-Val&#233;ry Montpellier 3</affiliation>
-    <email>jbotu@example.org</email>
-    <familyName>Botul</familyName>
-    <fullName>Jean-Baptiste Botul</fullName>
-    <givenName>Jean-Baptiste</givenName>
-    <name>db/jbotu</name>
-    <orcidId>0000-0002-3264</orcidId>
-  </user>
-  <user id="User_name-db=2Fjdoe">
-    <email>jdoe@example.org</email>
-    <familyName>Doe</familyName>
-    <fullName>John Doe</fullName>
-    <givenName>John</givenName>
-    <name>db/jdoe</name>
-  </user>
-  <user id="User_name-db=2Fnbour">
-    <affiliation>University of Nancago</affiliation>
-    <email>nbour@example.org</email>
-    <familyName>Bourbaki</familyName>
-    <fullName>Nicolas Bourbaki</fullName>
-    <givenName>Nicolas</givenName>
-    <name>db/nbour</name>
-    <orcidId>0000-0002-3266</orcidId>
-  </user>
-  <grouping id="Grouping_name-investigation=5F10100601=2DST=5Fowner">
-    <name>investigation_10100601-ST_owner</name>
-  </grouping>
-  <grouping id="Grouping_name-investigation=5F10100601=2DST=5Freader">
-    <name>investigation_10100601-ST_reader</name>
-  </grouping>
-  <grouping id="Grouping_name-investigation=5F10100601=2DST=5Fwriter">
-    <name>investigation_10100601-ST_writer</name>
-  </grouping>
-  <userGroup id="UserGroup_user-(name-db=2Fahau)_grouping-(name-investigation=5F10100601=2DST=5Fowner)">
-    <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Fowner"/>
-    <user ref="User_name-db=2Fahau"/>
-  </userGroup>
-  <userGroup id="UserGroup_user-(name-db=2Fahau)_grouping-(name-investigation=5F10100601=2DST=5Fwriter)">
-    <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Fwriter"/>
-    <user ref="User_name-db=2Fahau"/>
-  </userGroup>
-  <userGroup id="UserGroup_user-(name-db=2Fjbotu)_grouping-(name-investigation=5F10100601=2DST=5Freader)">
-    <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Freader"/>
-    <user ref="User_name-db=2Fjbotu"/>
-  </userGroup>
-  <userGroup id="UserGroup_user-(name-db=2Fjdoe)_grouping-(name-investigation=5F10100601=2DST=5Freader)">
-    <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Freader"/>
-    <user ref="User_name-db=2Fjdoe"/>
-  </userGroup>
-  <userGroup id="UserGroup_user-(name-db=2Fnbour)_grouping-(name-investigation=5F10100601=2DST=5Freader)">
-    <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Freader"/>
-    <user ref="User_name-db=2Fnbour"/>
-  </userGroup>
-</data>
-<data>
-  <investigation id="Investigation_facility-(name-ESNF)_name-10100601=2DST_visitId-1=2E1=2DN">
-    <doi>DOI:00.0815/inv-00601</doi>
-    <endDate>2010-10-12T15:00:00+00:00</endDate>
-    <fileCount>4</fileCount>
-    <fileSize>127125</fileSize>
-    <name>10100601-ST</name>
-    <startDate>2010-09-30T10:27:24+00:00</startDate>
-    <title>Ni-Mn-Ga flat cone</title>
-    <visitId>1.1-N</visitId>
-    <facility ref="Facility_name-ESNF"/>
-    <investigationGroups>
-      <role>owner</role>
-      <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Fowner"/>
-    </investigationGroups>
-    <investigationGroups>
-      <role>reader</role>
-      <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Freader"/>
-    </investigationGroups>
-    <investigationGroups>
-      <role>writer</role>
-      <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Fwriter"/>
-    </investigationGroups>
-  </investigation>
-</data>
-</icatdata>
diff --git a/doc/examples/icatdump-simple-1.xml b/doc/examples/icatdump-simple.xml
similarity index 100%
rename from doc/examples/icatdump-simple-1.xml
rename to doc/examples/icatdump-simple.xml
diff --git a/doc/src/file-icatdata.rst b/doc/src/file-icatdata.rst
index b856e625..fa82f96d 100644
--- a/doc/src/file-icatdata.rst
+++ b/doc/src/file-icatdata.rst
@@ -72,7 +72,7 @@ ICAT data XML files
 In this section we describe the ICAT data file format using the XML
 backend.  Consider the following example:
 
-.. literalinclude:: ../examples/icatdump-simple-1.xml
+.. literalinclude:: ../examples/icatdump-simple.xml
    :language: xml
 
 The root element of ICAT data XML files is ``icatdata``.  It may
@@ -88,7 +88,8 @@ logical structure explained above.  The present example contains two
 chunks.  Each element within the ``data`` element corresponds to an
 ICAT object according to the ICAT schema.  In the present example, the
 first chunk contains five User objects and three Grouping objects.
-The second chunk only contains one Investigation.
+The Groupings include related UserGroups.  The second chunk only
+contains one Investigation, including related investigationGroups.
 
 These object elements should have an ``id`` attribute that may be used
 to reference the object in relations later on.  The ``id`` value has
@@ -104,27 +105,87 @@ the related object's attribute values, using XML attributes of the
 same name.  In the latter case, the attribute values must uniquely
 define the related object.
 
+In the present example, consider the first grouping:
+
+.. code-block:: XML
+
+  <grouping id="Grouping_name-investigation=5F10100601=2DST=5Fowner">
+    <name>investigation_10100601-ST_owner</name>
+    <userGroups>
+      <user ref="User_name-db=2Fahau"/>
+    </userGroups>
+  </grouping>
+
+It includes a related userGroup object that in turn references a
+related User.  This User is referenced in the ``ref`` attribute using
+a key defined in the User's ``id`` attribute earlier in the file.
+Another example is how the Investigation references its Facility:
+
+.. code-block:: XML
+
+  <investigation>
+    <!--  ... -->
+    <facility ref="Facility_name-ESNF"/>
+    <!--  ... -->
+  </investigation>
+
+The Facility is not defined in the data file.  It is assumed to exist
+in ICAT before ingesting the file.  In this case, it must be
+referenced by the unique key that could have been obtained by calling
+``facility.getUniqueKey()``.  Alternatively, the Facility could have
+been referenced by attribute as in:
+
+.. code-block:: XML
+
+  <investigation>
+    <!--  ... -->
+    <facility name="ESNF"/>
+    <!--  ... -->
+  </investigation>
+
+
 The object elements may include one-to-many relations.  In this case,
 the related objects will be created along with the parent in one
-single cascading call.  Alternatively, these related objects may be
-added separately as subelements of the ``data`` element later in the
-file.  In the present example, the Grouping object include their
-related UserGroup objects.  Note that these UserGroups include their
-relation to the User.  The User object is referenced by their
-respective id in the ``ref`` attribute.  But the UserGroups do not
-include their relation with Grouping.  That relationship is implied by
-the parent relation of the object in the file.
-
-In a similar way, the Investigation in the second chunk includes
+single cascading call.  In the present example, the Grouping objects
+include their related UserGroup objects.  Note that these UserGroups
+include their relation to the User, but not their relation with
+Grouping.  The latter relationship is implied by the parent relation
+of the object in the file.
+
+As an alternative, the Usergroups could have been added to the file as
+separate objects as direct subelements of ``data`` as in:
+
+.. code-block:: XML
+
+  <data>
+    <user id="User_name-db=2Fahau">
+      <affiliation>Goethe University Frankfurt, Faculty of Philosophy and History</affiliation>
+      <email>ahau@example.org</email>
+      <familyName>Hau</familyName>
+      <fullName>Arnold Hau</fullName>
+      <givenName>Arnold</givenName>
+      <name>db/ahau</name>
+      <orcidId>0000-0002-3263</orcidId>
+    </user>
+    <grouping id="Grouping_name-investigation=5F10100601=2DST=5Fowner">
+      <name>investigation_10100601-ST_owner</name>
+    </grouping>
+    <userGroup id="UserGroup_user-(name-db=2Fahau)_grouping-(name-investigation=5F10100601=2DST=5Fowner)">
+      <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Fowner"/>
+      <user ref="User_name-db=2Fahau"/>
+    </userGroup>
+  </data>
+
+The Investigation in the second chunk in the present example includes
 related InvestigationGroups that will be created along with the
 Investigation.  The InvestigationGroup objects include a reference to
 the corresponding Grouping.  Note that these references go across
 chunk boundaries.  The index that caches the object ids to resolve
 object relations from the first chunk that did contain the ids of the
-Groupings will already have been discarded from memeory when the
-second chunk is read.  But the references use the key that can be
-passed to :meth:`icat.client.Client.searchUniqueKey` to search these
-Groupings from ICAT.
+Groupings will already have been discarded from memory when the second
+chunk is read.  But the references use the key that can be passed to
+:meth:`icat.client.Client.searchUniqueKey` to search these Groupings
+from ICAT.
 
 Finally note the the file format also depends on the ICAT schema
 version: the present example can only be ingested into ICAT server 5.0
@@ -132,21 +193,12 @@ or newer, because the attributes fileCount and fileSize have been
 added to Investigation in this version.  With older ICAT versions, it
 will fail because the attributes are not defined.
 
-Consider a second example, it defines a subset of the same content
-as the previous example:
-
-.. literalinclude:: ../examples/icatdump-simple-2.xml
-   :language: xml
-   :lines: 1-9,28-52,56-58,70-82,108
-
-The difference is that we now add the Usergroup objects separately in
-direct subelements of ``data`` instead of including them in the
-related Grouping objects.
-
 You will find more extensive examples in the source distribution of
 python-icat.  The distribution also provides XML Schema Definition
 files for the ICAT data XML file format corresponding to various ICAT
-schema versions.
+schema versions.  Note the these  XML Schema Definition
+files are provided for reference only.  The :ref:`icatingest` script
+does not validate its input.
 
 ICAT data YAML files
 ~~~~~~~~~~~~~~~~~~~~

From a472fc795e82bc9901d89f869529b063b494242b Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 16 Jan 2024 14:24:44 +0100
Subject: [PATCH 054/102] Fix duplicate user entry in example ICAT data file

---
 doc/examples/icatdump-simple.xml | 9 ---------
 doc/src/file-icatdata.rst        | 2 +-
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/doc/examples/icatdump-simple.xml b/doc/examples/icatdump-simple.xml
index b2c23038..63dc689d 100644
--- a/doc/examples/icatdump-simple.xml
+++ b/doc/examples/icatdump-simple.xml
@@ -7,15 +7,6 @@
   <generator>icatdump (python-icat 1.2.0)</generator>
 </head>
 <data>
-  <user id="User_name-db=2Fahau">
-    <affiliation>Goethe University Frankfurt, Faculty of Philosophy and History</affiliation>
-    <email>ahau@example.org</email>
-    <familyName>Hau</familyName>
-    <fullName>Arnold Hau</fullName>
-    <givenName>Arnold</givenName>
-    <name>db/ahau</name>
-    <orcidId>0000-0002-3263</orcidId>
-  </user>
   <user id="User_name-db=2Fahau">
     <affiliation>Goethe University Frankfurt, Faculty of Philosophy and History</affiliation>
     <email>ahau@example.org</email>
diff --git a/doc/src/file-icatdata.rst b/doc/src/file-icatdata.rst
index fa82f96d..84d587b2 100644
--- a/doc/src/file-icatdata.rst
+++ b/doc/src/file-icatdata.rst
@@ -87,7 +87,7 @@ The content of each ``data`` element is one chunk according to the
 logical structure explained above.  The present example contains two
 chunks.  Each element within the ``data`` element corresponds to an
 ICAT object according to the ICAT schema.  In the present example, the
-first chunk contains five User objects and three Grouping objects.
+first chunk contains four User objects and three Grouping objects.
 The Groupings include related UserGroups.  The second chunk only
 contains one Investigation, including related investigationGroups.
 

From b3e30520d4dfce0bc92d028cd4f2afea31e5217e Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 16 Jan 2024 14:55:33 +0100
Subject: [PATCH 055/102] - Review documentation Section "ICAT data YAML files"
 - Drop icatdump-simple-2.yaml example, rename icatdump-simple-1.yaml   to
 icatdump-simple.yaml

---
 doc/examples/icatdump-simple-2.yaml           | 79 -------------------
 ...ump-simple-1.yaml => icatdump-simple.yaml} |  0
 doc/src/file-icatdata.rst                     | 73 +++++++++++++++--
 3 files changed, 67 insertions(+), 85 deletions(-)
 delete mode 100644 doc/examples/icatdump-simple-2.yaml
 rename doc/examples/{icatdump-simple-1.yaml => icatdump-simple.yaml} (100%)

diff --git a/doc/examples/icatdump-simple-2.yaml b/doc/examples/icatdump-simple-2.yaml
deleted file mode 100644
index 79e4a296..00000000
--- a/doc/examples/icatdump-simple-2.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-%YAML 1.1
-# Date: Wed, 03 Jan 2024 13:27:52 +0000
-# Service: https://icat.example.com:8181/ICATService/ICAT?wsdl
-# ICAT-API: 6.0.0
-# Generator: icatdump (python-icat 1.2.0)
----
-grouping:
-  Grouping_name-investigation=5F10100601=2DST=5Fowner:
-    name: investigation_10100601-ST_owner
-  Grouping_name-investigation=5F10100601=2DST=5Freader:
-    name: investigation_10100601-ST_reader
-  Grouping_name-investigation=5F10100601=2DST=5Fwriter:
-    name: investigation_10100601-ST_writer
-user:
-  User_name-db=2Fahau:
-    affiliation: Goethe University Frankfurt, Faculty of Philosophy and History
-    email: ahau@example.org
-    familyName: Hau
-    fullName: Arnold Hau
-    givenName: Arnold
-    name: db/ahau
-    orcidId: 0000-0002-3263
-  User_name-db=2Fjbotu:
-    affiliation: "Universit\xE9 Paul-Val\xE9ry Montpellier 3"
-    email: jbotu@example.org
-    familyName: Botul
-    fullName: Jean-Baptiste Botul
-    givenName: Jean-Baptiste
-    name: db/jbotu
-    orcidId: 0000-0002-3264
-  User_name-db=2Fjdoe:
-    email: jdoe@example.org
-    familyName: Doe
-    fullName: John Doe
-    givenName: John
-    name: db/jdoe
-  User_name-db=2Fnbour:
-    affiliation: University of Nancago
-    email: nbour@example.org
-    familyName: Bourbaki
-    fullName: Nicolas Bourbaki
-    givenName: Nicolas
-    name: db/nbour
-    orcidId: 0000-0002-3266
-userGroup:
-  UserGroup_user-(name-db=2Fahau)_grouping-(name-investigation=5F10100601=2DST=5Fowner):
-    grouping: Grouping_name-investigation=5F10100601=2DST=5Fowner
-    user: User_name-db=2Fahau
-  UserGroup_user-(name-db=2Fahau)_grouping-(name-investigation=5F10100601=2DST=5Fwriter):
-    grouping: Grouping_name-investigation=5F10100601=2DST=5Fwriter
-    user: User_name-db=2Fahau
-  UserGroup_user-(name-db=2Fjbotu)_grouping-(name-investigation=5F10100601=2DST=5Freader):
-    grouping: Grouping_name-investigation=5F10100601=2DST=5Freader
-    user: User_name-db=2Fjbotu
-  UserGroup_user-(name-db=2Fjdoe)_grouping-(name-investigation=5F10100601=2DST=5Freader):
-    grouping: Grouping_name-investigation=5F10100601=2DST=5Freader
-    user: User_name-db=2Fjdoe
-  UserGroup_user-(name-db=2Fnbour)_grouping-(name-investigation=5F10100601=2DST=5Freader):
-    grouping: Grouping_name-investigation=5F10100601=2DST=5Freader
-    user: User_name-db=2Fnbour
----
-investigation:
-  Investigation_facility-(name-ESNF)_name-10100601=2DST_visitId-1=2E1=2DN:
-    doi: DOI:00.0815/inv-00601
-    endDate: '2010-10-12T15:00:00+00:00'
-    facility: Facility_name-ESNF
-    fileCount: 4
-    fileSize: 127125
-    investigationGroups:
-    - grouping: Grouping_name-investigation=5F10100601=2DST=5Fowner
-      role: owner
-    - grouping: Grouping_name-investigation=5F10100601=2DST=5Freader
-      role: reader
-    - grouping: Grouping_name-investigation=5F10100601=2DST=5Fwriter
-      role: writer
-    name: 10100601-ST
-    startDate: '2010-09-30T10:27:24+00:00'
-    title: Ni-Mn-Ga flat cone
-    visitId: 1.1-N
diff --git a/doc/examples/icatdump-simple-1.yaml b/doc/examples/icatdump-simple.yaml
similarity index 100%
rename from doc/examples/icatdump-simple-1.yaml
rename to doc/examples/icatdump-simple.yaml
diff --git a/doc/src/file-icatdata.rst b/doc/src/file-icatdata.rst
index 84d587b2..a568969e 100644
--- a/doc/src/file-icatdata.rst
+++ b/doc/src/file-icatdata.rst
@@ -143,7 +143,6 @@ been referenced by attribute as in:
     <!--  ... -->
   </investigation>
 
-
 The object elements may include one-to-many relations.  In this case,
 the related objects will be created along with the parent in one
 single cascading call.  In the present example, the Grouping objects
@@ -204,14 +203,76 @@ ICAT data YAML files
 ~~~~~~~~~~~~~~~~~~~~
 
 In this section we describe the ICAT data file format using the YAML
-backend.
+backend.  Consider the following example, it corresponds to the same
+ICAT content as the XML example above:
 
-.. literalinclude:: ../examples/icatdump-simple-1.yaml
+.. literalinclude:: ../examples/icatdump-simple.yaml
    :language: yaml
 
-.. literalinclude:: ../examples/icatdump-simple-2.yaml
-   :language: yaml
-   :lines: 1-7,10-11,14,23-45,52-60
+ICAT data YAML files start with a head consisting of a few comment
+lines, followed by one or more YAML documents.  YAML documents are
+separated by a line containing only ``---``.  The comments in the head
+provide some information on the context of the creation of the data
+file, which may be useful for debugging in case of issues.
+
+Each YAML document defines one chunk of data according to the logical
+structure explained above.  It consists of a mapping having the name
+of entity types in the ICAT schema as keys.  The values are in turn
+mappings that map object ids as key to ICAT object definitions as
+value.  The object id may be used to reference that object in
+relations later on.  It has no meaning other than this file internal
+referencing between objects.  In the present example, the first chunk
+contains four User objects and three Grouping objects.  The Groupings
+include related UserGroups.  The second chunk only contains one
+Investigation, including related investigationGroups.
+
+Each of the ICAT object definitions corresponds to an object in the
+ICAT schema.  It is again a mapping with the object's attribute and
+relation names as keys and corresponding values.  All many-to-one
+relations must be provided and reference existing objects, e.g. they
+must either already have existed before starting the ingestion or
+appear in the same or an earlier YAML document in the ICAT data file.
+The values of many-to-one relations are the related object's id,
+either as defined in the same YAML document or the unique key as
+returned by :meth:`icat.entity.Entity.getUniqueKey`.
+
+The object definitions may include one-to-many relations.  In this
+case, the value for the relation name is a list of object definitions
+for the related objects.  These related objects will be created along
+with the parent in one single cascading call.  In the present example,
+the Grouping objects include their related UserGroup objects.  Note
+that these UserGroups include their relation to the User, but not
+their relation with Grouping.  The latter relationship is implied by
+the parent relation of the object in the file.
+
+As an alternative, in the present example, the Usergroups could have
+been added to the file as separate objects as in:
+
+.. code-block:: YAML
+
+  ---
+  grouping:
+    Grouping_name-investigation=5F10100601=2DST=5Fowner:
+      name: investigation_10100601-ST_owner
+  user:
+    User_name-db=2Fahau:
+      affiliation: Goethe University Frankfurt, Faculty of Philosophy and History
+      email: ahau@example.org
+      familyName: Hau
+      fullName: Arnold Hau
+      givenName: Arnold
+      name: db/ahau
+      orcidId: 0000-0002-3263
+  userGroup:
+    UserGroup_user-(name-db=2Fahau)_grouping-(name-investigation=5F10100601=2DST=5Fowner):
+      grouping: Grouping_name-investigation=5F10100601=2DST=5Fowner
+      user: User_name-db=2Fahau
+  ---
+
+Note that the entries in the mappings have no inherent order.  The
+:ref:`icatingest` script uses a predefined order to read the ICAT
+entity types in order to make sure that referenced objects are created
+before any object that may reference them.
 
 
 .. [#dc] There is one exception: DataCollections don't have a

From acaff9d4ffe9987bc0324f2e4e01459dc5bd547c Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 16 Jan 2024 21:21:21 +0100
Subject: [PATCH 056/102] Review Section ICAT data files with respect to object
 references, add a Subsection References to ICAT objects and unique keys

---
 doc/src/file-icatdata.rst | 108 ++++++++++++++++++++------------------
 1 file changed, 57 insertions(+), 51 deletions(-)

diff --git a/doc/src/file-icatdata.rst b/doc/src/file-icatdata.rst
index a568969e..6b8730cc 100644
--- a/doc/src/file-icatdata.rst
+++ b/doc/src/file-icatdata.rst
@@ -18,27 +18,19 @@ Logical structure of ICAT data files
 There is a one-to-one correspondence of the objects in the data
 file and the corresponding object in ICAT according to the ICAT
 schema, including all attributes and relations to other objects.
-Special unique keys are used to encode the relations.
-:meth:`icat.entity.Entity.getUniqueKey` may be used to get such a
-unique key for an entity object and
-:meth:`icat.client.Client.searchUniqueKey` may be used to search an
-object by its key.  Otherwise these keys should be considered as
-opaque ids.
 
 Data files are partitioned in chunks.  This is done to avoid having
 the whole file, e.g. the complete inventory of the ICAT, at once in
 memory.  The problem is that objects contain references to other
-objects (e.g. Datafiles refer to Datasets, the latter refer to
-Investigations, and so forth).  We keep an index of the objects as
+objects, e.g. Datafiles refer to Datasets, the latter refer to
+Investigations, and so forth.  We keep an index of the objects as
 cache in order to resolve these references.  But there is a memory
-versus time tradeoff: we cannot keep all the objects in the index,
-that would again mean the complete inventory of the ICAT.  And we
-can't know beforehand which object is going to be referenced later on,
-so we don't know which one to keep and which one to discard from the
-index.  Fortunately we can query objects that we discarded once back
-from the ICAT server.  But this is expensive.  So the strategy is as
-follows: keep all objects from the current chunk in the index and
-discard the complete index each time a chunk has been
+versus time tradeoff: in order to avoid the index to grow beyond
+bounds, objects need to be discarded from the index from time to time.
+References to objects that can not be resolved from the index need to
+be searched from the ICAT server, which of course is expensive.  So
+the strategy is as follows: keep all objects from the current chunk in
+the index and discard the complete index each time a chunk has been
 processed. [#dc]_ This will work fine if objects are mostly
 referencing other objects from the same chunk and only a few
 references go across chunk boundaries.
@@ -66,6 +58,26 @@ indirectly related to one of the included objects.  In this case,
 only a reference to the related object will be included in the data
 file.  The related object must have its own entry.
 
+References to ICAT objects and unique keys
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+References to related objects are encoded in ICAT data files by
+reference keys.  There are two kinds of those keys: local keys and
+unique keys.
+
+When an ICAT object is defined in the file, it generally defines a
+local key at the same time.  Local keys are stored in the object index
+and may be used to reference this object from other obejcts in the
+same data chunk.  Unique keys can be obtained from an object by
+calling :meth:`icat.entity.Entity.getUniqueKey`.  An object can be
+searched by its unique key from the ICAT server by calling
+:meth:`icat.client.Client.searchUniqueKey`.  As a result, it is
+possible to reference an object by its unique key even if the
+reference is not in the object index.  All references that go across
+chunk boundaries must use unique keys. [#dc]_
+
+Reference keys should be considered as opaque ids.
+
 ICAT data XML files
 ~~~~~~~~~~~~~~~~~~~
 
@@ -91,19 +103,17 @@ first chunk contains four User objects and three Grouping objects.
 The Groupings include related UserGroups.  The second chunk only
 contains one Investigation, including related investigationGroups.
 
-These object elements should have an ``id`` attribute that may be used
-to reference the object in relations later on.  The ``id`` value has
-no meaning other than this file internal referencing between objects.
-The subelements of the object elements correspond to the object's
-attributes and relations in the ICAT schema.  All many-to-one
-relations must be provided and reference already existing objects,
-e.g. they must either already have existed before starting the
-ingestion or appear earlier in the ICAT data file than the referencing
-object, so that they will be created earlier.  The related object may
-either be referenced by id using the special attribute ``ref`` or by
-the related object's attribute values, using XML attributes of the
-same name.  In the latter case, the attribute values must uniquely
-define the related object.
+These object elements may have an ``id`` attribute that defines a
+local key to reference the object later on.  The subelements of the
+object elements correspond to the object's attributes and relations in
+the ICAT schema.  All many-to-one relations must be provided and
+reference already existing objects, e.g. they must either already have
+existed before starting the ingestion or appear earlier in the ICAT
+data file than the referencing object, so that they will be created
+earlier.  The related object may either be referenced by reference key
+using the ``ref`` attribute or by the related object's attribute
+values, using XML attributes of the same name.  In the latter case,
+the attribute values must uniquely define the related object.
 
 In the present example, consider the first grouping:
 
@@ -118,8 +128,9 @@ In the present example, consider the first grouping:
 
 It includes a related userGroup object that in turn references a
 related User.  This User is referenced in the ``ref`` attribute using
-a key defined in the User's ``id`` attribute earlier in the file.
-Another example is how the Investigation references its Facility:
+a local key defined in the User's ``id`` attribute earlier in the
+file.  Another example is how the Investigation references its
+Facility:
 
 .. code-block:: XML
 
@@ -131,8 +142,7 @@ Another example is how the Investigation references its Facility:
 
 The Facility is not defined in the data file.  It is assumed to exist
 in ICAT before ingesting the file.  In this case, it must be
-referenced by the unique key that could have been obtained by calling
-``facility.getUniqueKey()``.  Alternatively, the Facility could have
+referenced by its unique key.  Alternatively, the Facility could have
 been referenced by attribute as in:
 
 .. code-block:: XML
@@ -179,14 +189,10 @@ The Investigation in the second chunk in the present example includes
 related InvestigationGroups that will be created along with the
 Investigation.  The InvestigationGroup objects include a reference to
 the corresponding Grouping.  Note that these references go across
-chunk boundaries.  The index that caches the object ids to resolve
-object relations from the first chunk that did contain the ids of the
-Groupings will already have been discarded from memory when the second
-chunk is read.  But the references use the key that can be passed to
-:meth:`icat.client.Client.searchUniqueKey` to search these Groupings
-from ICAT.
-
-Finally note the the file format also depends on the ICAT schema
+chunk boundaries.  Thus, unique keys for the Groupings need to be used
+here.
+
+Finally note that the file format also depends on the ICAT schema
 version: the present example can only be ingested into ICAT server 5.0
 or newer, because the attributes fileCount and fileSize have been
 added to Investigation in this version.  With older ICAT versions, it
@@ -219,12 +225,11 @@ Each YAML document defines one chunk of data according to the logical
 structure explained above.  It consists of a mapping having the name
 of entity types in the ICAT schema as keys.  The values are in turn
 mappings that map object ids as key to ICAT object definitions as
-value.  The object id may be used to reference that object in
-relations later on.  It has no meaning other than this file internal
-referencing between objects.  In the present example, the first chunk
-contains four User objects and three Grouping objects.  The Groupings
-include related UserGroups.  The second chunk only contains one
-Investigation, including related investigationGroups.
+value.  These object ids define local keys that may be used to
+reference the respective object later on.  In the present example, the
+first chunk contains four User objects and three Grouping objects.
+The Groupings include related UserGroups.  The second chunk only
+contains one Investigation, including related investigationGroups.
 
 Each of the ICAT object definitions corresponds to an object in the
 ICAT schema.  It is again a mapping with the object's attribute and
@@ -232,9 +237,8 @@ relation names as keys and corresponding values.  All many-to-one
 relations must be provided and reference existing objects, e.g. they
 must either already have existed before starting the ingestion or
 appear in the same or an earlier YAML document in the ICAT data file.
-The values of many-to-one relations are the related object's id,
-either as defined in the same YAML document or the unique key as
-returned by :meth:`icat.entity.Entity.getUniqueKey`.
+The values of many-to-one relations are reference keys, either local
+keys defined in the same YAML document or unique keys.
 
 The object definitions may include one-to-many relations.  In this
 case, the value for the relation name is a list of object definitions
@@ -277,4 +281,6 @@ before any object that may reference them.
 
 .. [#dc] There is one exception: DataCollections don't have a
          uniqueness constraint and can't reliably be searched by
-         attributes.  They are always kept in the index.
+         attributes.  Therefore local keys for DataCollections are
+         always kept in the object index and may be used to reference
+         them across chunk boundaries.

From 9c5085c63a4b92493a98cfc05adc0362c2f77968 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Thu, 18 Jan 2024 18:51:14 +0100
Subject: [PATCH 057/102] Rework Section ICAT data files once again

---
 doc/src/file-icatdata.rst | 189 +++++++++++++++++++-------------------
 1 file changed, 97 insertions(+), 92 deletions(-)

diff --git a/doc/src/file-icatdata.rst b/doc/src/file-icatdata.rst
index 6b8730cc..57183153 100644
--- a/doc/src/file-icatdata.rst
+++ b/doc/src/file-icatdata.rst
@@ -15,20 +15,16 @@ on the backend: python-icat provides backends using XML and YAML.
 Logical structure of ICAT data files
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-There is a one-to-one correspondence of the objects in the data
-file and the corresponding object in ICAT according to the ICAT
-schema, including all attributes and relations to other objects.
-
 Data files are partitioned in chunks.  This is done to avoid having
 the whole file, e.g. the complete inventory of the ICAT, at once in
 memory.  The problem is that objects contain references to other
 objects, e.g. Datafiles refer to Datasets, the latter refer to
 Investigations, and so forth.  We keep an index of the objects as
-cache in order to resolve these references.  But there is a memory
+a cache in order to resolve these references.  But there is a memory
 versus time tradeoff: in order to avoid the index to grow beyond
 bounds, objects need to be discarded from the index from time to time.
 References to objects that can not be resolved from the index need to
-be searched from the ICAT server, which of course is expensive.  So
+be searched from the ICAT server, which is of course expensive.  So
 the strategy is as follows: keep all objects from the current chunk in
 the index and discard the complete index each time a chunk has been
 processed. [#dc]_ This will work fine if objects are mostly
@@ -40,37 +36,40 @@ but at the same time large enough to keep as many relations between
 objects as possible local in a chunk.  It is in the responsibility of
 the writer of the data file to create the chunks in this manner.
 
-The objects that get written to the data file and how this file is
-organized is controlled by lists of ICAT search expressions or entity
-objects, see :meth:`icat.dumpfile.DumpFileWriter.writeobjs`.  There is
-some degree of flexibility: an object may include related objects in
-an one-to-many relation.  In this case, these related objects should
-not be added on their own again.  For instance, you may write User,
-Grouping, and UserGroup as separate objects into the file.  In this
-case, the UserGroup entries must properly reference related User and
-Grouping.  Alternatively you may include the UserGroups in the
-corresponding Grouping objects.  In this case, you must not add the
-UserGroups again on their own.
-
-Objects related in a many-to-one relation must always be included in
-the search expression.  This is also true if the object is
-indirectly related to one of the included objects.  In this case,
-only a reference to the related object will be included in the data
-file.  The related object must have its own entry.
+The data chunks contain ICAT object definitions, e.g. serializations
+of individual ICAT objects, including all attribute values and
+many-to-one relations.  The many-to-one relations are provided as
+references to other objects that must exist in the ICAT server at the
+moment that this object definition is read.
+
+There is some degree of flexibility with respect to related objects in
+one-to-many relations: object definitions for these related objects
+may be included in the object definitions of the parent object.  When
+the parent is read, these related objects will be created along with
+the parent in one single cascading call.  Thus, the related objects
+must not be included again as a separate object in the ICAT data file.
+For instance, an ICAT data file may include User, Grouping, and
+UserGroup as separate objects.  In this case, the UserGroup entries
+must properly reference User and Grouping as their related objects.
+Alternatively the file may only contain User and Grouping objects,
+with the UserGroups being included into the object definition of the
+corresponding Grouping objects.
 
 References to ICAT objects and unique keys
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 References to related objects are encoded in ICAT data files by
-reference keys.  There are two kinds of those keys: local keys and
-unique keys.
+reference keys.  There are two kinds of those keys, local keys and
+unique keys:
 
 When an ICAT object is defined in the file, it generally defines a
 local key at the same time.  Local keys are stored in the object index
 and may be used to reference this object from other obejcts in the
-same data chunk.  Unique keys can be obtained from an object by
-calling :meth:`icat.entity.Entity.getUniqueKey`.  An object can be
-searched by its unique key from the ICAT server by calling
+same data chunk.
+
+Unique keys can be obtained from an object by calling
+:meth:`icat.entity.Entity.getUniqueKey`.  An object can be searched by
+its unique key from the ICAT server by calling
 :meth:`icat.client.Client.searchUniqueKey`.  As a result, it is
 possible to reference an object by its unique key even if the
 reference is not in the object index.  All references that go across
@@ -95,42 +94,80 @@ The ``head`` element will be ignored by :ref:`icatingest`.  It serves
 to provide some information on the context of the creation of the data
 file, which may be useful for debugging in case of issues.
 
-The content of each ``data`` element is one chunk according to the
-logical structure explained above.  The present example contains two
-chunks.  Each element within the ``data`` element corresponds to an
-ICAT object according to the ICAT schema.  In the present example, the
-first chunk contains four User objects and three Grouping objects.
-The Groupings include related UserGroups.  The second chunk only
-contains one Investigation, including related investigationGroups.
+The content of each ``data`` element is one chunk, its subelements are
+the ICAT object definitions according to the logical structure
+explained above.  The present example contains two chunks: the first
+chunk contains four User objects and three Grouping objects.  The
+Groupings include related UserGroups.  The second chunk only contains
+one Investigation, including related InvestigationGroups.
+
+The object elements may have an ``id`` attribute that define a local
+key to reference the object later on.  The subelements of the object
+elements correspond to the object's attributes and relations in the
+ICAT schema.  All many-to-one relations must be provided and reference
+already existing objects, e.g. they must either already have existed
+before starting the ingestion or appear earlier in the ICAT data file
+than the referencing object, so that they will be created earlier.
+The related object may either be referenced by reference key using the
+``ref`` attribute or by the related object's attribute values, using
+XML attributes of the same name.  In the latter case, the attribute
+values must uniquely define the related object.
+
+Consider a simplified version of the first chunk from the present
+example, defining only one User, Grouping and UserGroup respectively:
 
-These object elements may have an ``id`` attribute that defines a
-local key to reference the object later on.  The subelements of the
-object elements correspond to the object's attributes and relations in
-the ICAT schema.  All many-to-one relations must be provided and
-reference already existing objects, e.g. they must either already have
-existed before starting the ingestion or appear earlier in the ICAT
-data file than the referencing object, so that they will be created
-earlier.  The related object may either be referenced by reference key
-using the ``ref`` attribute or by the related object's attribute
-values, using XML attributes of the same name.  In the latter case,
-the attribute values must uniquely define the related object.
+.. code-block:: XML
 
-In the present example, consider the first grouping:
+  <data>
+    <user id="User_name-db=2Fahau">
+      <affiliation>Goethe University Frankfurt, Faculty of Philosophy and History</affiliation>
+      <email>ahau@example.org</email>
+      <familyName>Hau</familyName>
+      <fullName>Arnold Hau</fullName>
+      <givenName>Arnold</givenName>
+      <name>db/ahau</name>
+      <orcidId>0000-0002-3263</orcidId>
+    </user>
+    <grouping id="Grouping_name-investigation=5F10100601=2DST=5Fowner">
+      <name>investigation_10100601-ST_owner</name>
+      <userGroups>
+        <user ref="User_name-db=2Fahau"/>
+      </userGroups>
+    </grouping>
+  </data>
+
+The Grouping includes the related UserGroup object that in turn
+references the related User.  This User is referenced in the ``ref``
+attribute using a local key defined in the User's ``id`` attribute.
+Note that the UserGroup does not include its relation with Grouping.
+The latter relationship is implied by the parent relation of the
+object in the file.
+
+As an alternative, the Usergroup could have been added to the file as
+separate object as direct subelement of ``data``:
 
 .. code-block:: XML
 
-  <grouping id="Grouping_name-investigation=5F10100601=2DST=5Fowner">
-    <name>investigation_10100601-ST_owner</name>
-    <userGroups>
+  <data>
+    <user id="User_name-db=2Fahau">
+      <affiliation>Goethe University Frankfurt, Faculty of Philosophy and History</affiliation>
+      <email>ahau@example.org</email>
+      <familyName>Hau</familyName>
+      <fullName>Arnold Hau</fullName>
+      <givenName>Arnold</givenName>
+      <name>db/ahau</name>
+      <orcidId>0000-0002-3263</orcidId>
+    </user>
+    <grouping id="Grouping_name-investigation=5F10100601=2DST=5Fowner">
+      <name>investigation_10100601-ST_owner</name>
+    </grouping>
+    <userGroup id="UserGroup_user-(name-db=2Fahau)_grouping-(name-investigation=5F10100601=2DST=5Fowner)">
+      <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Fowner"/>
       <user ref="User_name-db=2Fahau"/>
-    </userGroups>
-  </grouping>
+    </userGroup>
+  </data>
 
-It includes a related userGroup object that in turn references a
-related User.  This User is referenced in the ``ref`` attribute using
-a local key defined in the User's ``id`` attribute earlier in the
-file.  Another example is how the Investigation references its
-Facility:
+Another example is how the Investigation references its Facility:
 
 .. code-block:: XML
 
@@ -153,44 +190,12 @@ been referenced by attribute as in:
     <!--  ... -->
   </investigation>
 
-The object elements may include one-to-many relations.  In this case,
-the related objects will be created along with the parent in one
-single cascading call.  In the present example, the Grouping objects
-include their related UserGroup objects.  Note that these UserGroups
-include their relation to the User, but not their relation with
-Grouping.  The latter relationship is implied by the parent relation
-of the object in the file.
-
-As an alternative, the Usergroups could have been added to the file as
-separate objects as direct subelements of ``data`` as in:
-
-.. code-block:: XML
-
-  <data>
-    <user id="User_name-db=2Fahau">
-      <affiliation>Goethe University Frankfurt, Faculty of Philosophy and History</affiliation>
-      <email>ahau@example.org</email>
-      <familyName>Hau</familyName>
-      <fullName>Arnold Hau</fullName>
-      <givenName>Arnold</givenName>
-      <name>db/ahau</name>
-      <orcidId>0000-0002-3263</orcidId>
-    </user>
-    <grouping id="Grouping_name-investigation=5F10100601=2DST=5Fowner">
-      <name>investigation_10100601-ST_owner</name>
-    </grouping>
-    <userGroup id="UserGroup_user-(name-db=2Fahau)_grouping-(name-investigation=5F10100601=2DST=5Fowner)">
-      <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Fowner"/>
-      <user ref="User_name-db=2Fahau"/>
-    </userGroup>
-  </data>
-
 The Investigation in the second chunk in the present example includes
 related InvestigationGroups that will be created along with the
 Investigation.  The InvestigationGroup objects include a reference to
-the corresponding Grouping.  Note that these references go across
-chunk boundaries.  Thus, unique keys for the Groupings need to be used
-here.
+the corresponding Grouping respectively.  Note that these references
+go across chunk boundaries.  Thus, unique keys for the Groupings need
+to be used here.
 
 Finally note that the file format also depends on the ICAT schema
 version: the present example can only be ingested into ICAT server 5.0

From 0fc8b0030dd2b4e9ccea39cc68c0fdac4fcf1e87 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Thu, 18 Jan 2024 20:13:40 +0100
Subject: [PATCH 058/102] Drop most of the docstring from module icat.dumpfile
 as this is now much better explained in the online documentation

---
 src/icat/dumpfile.py | 40 ----------------------------------------
 1 file changed, 40 deletions(-)

diff --git a/src/icat/dumpfile.py b/src/icat/dumpfile.py
index 099f4364..c5c5a002 100644
--- a/src/icat/dumpfile.py
+++ b/src/icat/dumpfile.py
@@ -5,46 +5,6 @@
 writing ICAT data files.  The actual work is done in file format
 specific modules that should provide subclasses that must implement
 the abstract methods.
-
-Data files are partitioned in chunks.  This is done to avoid having
-the whole file, e.g. the complete inventory of the ICAT, at once in
-memory.  The problem is that objects contain references to other
-objects (e.g. Datafiles refer to Datasets, the latter refer to
-Investigations, and so forth).  We keep an index of the objects in
-order to resolve these references.  But there is a memory versus time
-tradeoff: we cannot keep all the objects in the index, that would
-again mean the complete inventory of the ICAT.  And we can't know
-beforehand which object is going to be referenced later on, so we
-don't know which one to keep and which one to discard from the index.
-Fortunately we can query objects we discarded once back from the ICAT
-server with :meth:`icat.client.Client.searchUniqueKey`.  But this is
-expensive.  So the strategy is as follows: keep all objects from the
-current chunk in the index and discard the complete index each time a
-chunk has been processed.  This will work fine if objects are mostly
-referencing other objects from the same chunk and only a few
-references go across chunk boundaries.
-
-Therefore, we want these chunks to be small enough to fit into memory,
-but at the same time large enough to keep as many relations between
-objects as possible local in a chunk.  It is in the responsibility of
-the writer of the data file to create the chunks in this manner.
-
-The objects that get written to the data file and how this file is
-organized is controlled by lists of ICAT search expressions, see
-:meth:`icat.dumpfile.DumpFileWriter.writeobjs`.  There is some degree
-of flexibility: an object may include related objects in an
-one-to-many relation, just by including them in the search expression.
-In this case, these related objects should not have a search
-expression on their own again.  For instance, the search expression
-for Grouping may include UserGroup.  The UserGroups will then be
-embedded in their respective grouping in the data file.  There should
-not be a search expression for UserGroup then.
-
-Objects related in a many-to-one relation must always be included in
-the search expression.  This is also true if the object is
-indirectly related to one of the included objects.  In this case,
-only a reference to the related object will be included in the data
-file.  The related object must have its own list entry.
 """
 
 from collections import ChainMap

From 504d09179465fb18946bd0a690bff349a22d25b4 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Thu, 18 Jan 2024 20:49:36 +0100
Subject: [PATCH 059/102] Indicate in the documentation of icat.dumpfile which
 methods of class icat.dumpfile.DumpFileReader and class
 icat.dumpfile.DumpFileWriter are abstract and thus need to implemented in the
 file format specific backend

---
 src/icat/dumpfile.py | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/src/icat/dumpfile.py b/src/icat/dumpfile.py
index c5c5a002..a18832a3 100644
--- a/src/icat/dumpfile.py
+++ b/src/icat/dumpfile.py
@@ -99,6 +99,9 @@ def getdata(self):
         specific to the implementing backend and should be passed as
         the `data` argument to
         :meth:`~icat.dumpfile.DumpFileReader.getobjs_from_data`.
+
+        This abstract method must be implemented in the file format
+        specific backend.
         """
         raise NotImplementedError
 
@@ -107,6 +110,9 @@ def getobjs_from_data(self, data, objindex):
 
         Yield a new entity object in each iteration.  The object is
         initialized from the data, but not yet created at the client.
+
+        This abstract method must be implemented in the file format
+        specific backend.
         """
         raise NotImplementedError
 
@@ -197,7 +203,11 @@ def __exit__(self, type, value, traceback):
             self.outfile.close()
 
     def head(self):
-        """Write a header with some meta information to the data file."""
+        """Write a header with some meta information to the data file.
+
+        This abstract method must be implemented in the file format
+        specific backend.
+        """
         raise NotImplementedError
 
     def startdata(self):
@@ -205,15 +215,26 @@ def startdata(self):
 
         If the current chunk contains any data, write it to the data
         file.
+
+        This abstract method must be implemented in the file format
+        specific backend.
         """
         raise NotImplementedError
 
     def writeobj(self, key, obj, keyindex):
-        """Add an entity object to the current data chunk."""
+        """Add an entity object to the current data chunk.
+
+        This abstract method must be implemented in the file format
+        specific backend.
+        """
         raise NotImplementedError
 
     def finalize(self):
-        """Finalize the data file."""
+        """Finalize the data file.
+
+        This abstract method must be implemented in the file format
+        specific backend.
+        """
         raise NotImplementedError
 
     def writeobjs(self, objs, keyindex, chunksize=100):

From a52714b1c066e55e43bef02cae7a37c767064cd9 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 19 Jan 2024 11:51:54 +0100
Subject: [PATCH 060/102] Minor language fixes

---
 doc/src/file-icatdata.rst | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/doc/src/file-icatdata.rst b/doc/src/file-icatdata.rst
index 57183153..97efd819 100644
--- a/doc/src/file-icatdata.rst
+++ b/doc/src/file-icatdata.rst
@@ -103,15 +103,15 @@ one Investigation, including related InvestigationGroups.
 
 The object elements may have an ``id`` attribute that define a local
 key to reference the object later on.  The subelements of the object
-elements correspond to the object's attributes and relations in the
-ICAT schema.  All many-to-one relations must be provided and reference
-already existing objects, e.g. they must either already have existed
-before starting the ingestion or appear earlier in the ICAT data file
-than the referencing object, so that they will be created earlier.
-The related object may either be referenced by reference key using the
-``ref`` attribute or by the related object's attribute values, using
-XML attributes of the same name.  In the latter case, the attribute
-values must uniquely define the related object.
+elements correspond to the object's attributes and relations according
+to the ICAT schema.  All many-to-one relations must be provided and
+reference already existing objects, e.g. they must either already have
+existed before starting the ingestion or appear earlier in the ICAT
+data file than the referencing object, so that they will be created
+earlier.  The related object may either be referenced by reference key
+using the ``ref`` attribute or by the related object's attribute
+values, using XML attributes of the same name.  In the latter case,
+the attribute values must uniquely define the related object.
 
 Consider a simplified version of the first chunk from the present
 example, defining only one User, Grouping and UserGroup respectively:
@@ -201,7 +201,7 @@ Finally note that the file format also depends on the ICAT schema
 version: the present example can only be ingested into ICAT server 5.0
 or newer, because the attributes fileCount and fileSize have been
 added to Investigation in this version.  With older ICAT versions, it
-will fail because the attributes are not defined.
+will fail because these attributes are not defined.
 
 You will find more extensive examples in the source distribution of
 python-icat.  The distribution also provides XML Schema Definition

From f0112b8f3fa88312ded8a6df1e110f1665176a7c Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 19 Jan 2024 14:04:54 +0100
Subject: [PATCH 061/102] Add first input to Section Metadata ingest files

---
 doc/src/file-icatdata.rst   |  8 ++++++
 doc/src/file-icatingest.rst | 51 +++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

diff --git a/doc/src/file-icatdata.rst b/doc/src/file-icatdata.rst
index 97efd819..73e84f3e 100644
--- a/doc/src/file-icatdata.rst
+++ b/doc/src/file-icatdata.rst
@@ -12,6 +12,8 @@ logic for reading and writing the files is provided by the
 The actual file format depends on the version of the ICAT schema and
 on the backend: python-icat provides backends using XML and YAML.
 
+.. _ICAT-data-files-structure:
+
 Logical structure of ICAT data files
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -55,6 +57,8 @@ Alternatively the file may only contain User and Grouping objects,
 with the UserGroups being included into the object definition of the
 corresponding Grouping objects.
 
+.. _ICAT-data-files-references:
+
 References to ICAT objects and unique keys
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -77,6 +81,8 @@ chunk boundaries must use unique keys. [#dc]_
 
 Reference keys should be considered as opaque ids.
 
+.. _ICAT-data-xml-files:
+
 ICAT data XML files
 ~~~~~~~~~~~~~~~~~~~
 
@@ -210,6 +216,8 @@ schema versions.  Note the these  XML Schema Definition
 files are provided for reference only.  The :ref:`icatingest` script
 does not validate its input.
 
+.. _ICAT-data-yaml-files:
+
 ICAT data YAML files
 ~~~~~~~~~~~~~~~~~~~~
 
diff --git a/doc/src/file-icatingest.rst b/doc/src/file-icatingest.rst
index 04954679..c7103833 100644
--- a/doc/src/file-icatingest.rst
+++ b/doc/src/file-icatingest.rst
@@ -3,4 +3,55 @@
 Metadata ingest files
 =====================
 
+Metadata ingest files are the input format for class
+:class:`icat.ingest.IngestReader`.  This class is intended to be uesd
+in scripts that read the metadata created by experimments into ICAT.
+The file format is basically a restricted version of
+:ref:`ICAT-data-xml-files`.
 
+The underlying idea is that ICAT data files are in principle suitable
+to encode the metadata to be ingested from the experiment.  The only
+problem is that this file format is too powerful: it can encode any
+ICAT content.  We want the ingest files from the experiment to create
+new Datasets and DatasetParameters, we certainly don't want these
+files to create new Instruments or Users in ICAT.  And we also want to
+control the Investigation that newly created Datasets will be added
+to.  It would be rather difficult to control the power of the input
+format if we would use plain ICAT data files for this purpose.
+
+Class :class:`icat.ingest.IngestReader` takes an ``investigation``
+argument.  We will refer to the Investigation given in this argument
+as the *prescribed Investigation* in the following.  The metadata
+ingest file format restricts ICAT data XML files in the following
+ways:
+
+* ingest files must contain one and only one  ``data`` element,
+  e.g. chunks according to the :ref:`ICAT-data-files-structure`.
+
+* the allowed object types are restricted to Dataset,
+  DatasetInstrument, DatasetTechnique, and DatasetParameter.
+
+* the attributes in the object definitions for Datasets are restricted
+  to name, description, startDate, and endDate.
+
+* object definitions for Datasets can not include a reference to the
+  related Investigation.  The relation with the prescribed
+  Investigation will be implied.
+
+* object definitions for Datasets can reference a related Sample only
+  by name or by pid.  A relation of the related Sample with the
+  prescribed Investigation will be implied.
+
+* references to the related Dataset in DatasetInstrument,
+  DatasetTechnique, and DatasetParameter definitions are restricted to
+  :ref:`local keys <ICAT-data-files-references>`.  These objects can
+  thus only relate to Datasets defined in the same ingest file.
+
+* other object references are restricted to reference by attributes.
+
+These restrictions are enforced by validating the input against an XML
+Schema Definition (XSD).
+
+Another change with respect to ICAT data XML files is that the name of
+the root element is ``icatingest`` and that it must have a ``version``
+attrbute.

From 048a98cb0d151a918262628601716cb7da1092c5 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 19 Jan 2024 14:29:11 +0100
Subject: [PATCH 062/102] Minor language fixes

---
 doc/src/file-icatingest.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/src/file-icatingest.rst b/doc/src/file-icatingest.rst
index c7103833..bf2af389 100644
--- a/doc/src/file-icatingest.rst
+++ b/doc/src/file-icatingest.rst
@@ -26,7 +26,7 @@ ingest file format restricts ICAT data XML files in the following
 ways:
 
 * ingest files must contain one and only one  ``data`` element,
-  e.g. chunks according to the :ref:`ICAT-data-files-structure`.
+  e.g. one chunk according to the :ref:`ICAT-data-files-structure`.
 
 * the allowed object types are restricted to Dataset,
   DatasetInstrument, DatasetTechnique, and DatasetParameter.
@@ -44,8 +44,8 @@ ways:
 
 * references to the related Dataset in DatasetInstrument,
   DatasetTechnique, and DatasetParameter definitions are restricted to
-  :ref:`local keys <ICAT-data-files-references>`.  These objects can
-  thus only relate to Datasets defined in the same ingest file.
+  :ref:`local keys <ICAT-data-files-references>`.  As a result, these
+  objects can only relate to Datasets defined in the same ingest file.
 
 * other object references are restricted to reference by attributes.
 

From d0832106b61b277080068850a014ca210d8fb234 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 22 Jan 2024 10:19:52 +0100
Subject: [PATCH 063/102] .gitignore .local

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index ecd6841e..67904d8e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 __pycache__/
+/.local/
 /MANIFEST
 /_meta.py
 /build/

From addbdb3547458dd8a34bd67ba2f124582f85d111 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 22 Jan 2024 11:45:14 +0100
Subject: [PATCH 064/102] Inject an environment element into the input in
 IngestReader

---
 etc/ingest.xslt    |  2 ++
 src/icat/ingest.py | 14 ++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/etc/ingest.xslt b/etc/ingest.xslt
index 6e1e5cee..2e8df084 100644
--- a/etc/ingest.xslt
+++ b/etc/ingest.xslt
@@ -12,6 +12,8 @@
 
     <xsl:template match="/icatingest/head"/>
 
+    <xsl:template match="/icatingest/_environment"/>
+
     <xsl:template match="/icatingest/data">
 	<data>
 	    <xsl:apply-templates/>
diff --git a/src/icat/ingest.py b/src/icat/ingest.py
index 5d69b963..47b00a2a 100644
--- a/src/icat/ingest.py
+++ b/src/icat/ingest.py
@@ -106,6 +106,9 @@ def __init__(self, client, metadata, investigation):
             schema = etree.XMLSchema(etree.parse(f))
         if not schema.validate(ingest_data):
             raise InvalidIngestFileError("validation failed")
+        env = self.get_environment(client)
+        env_elem = etree.Element("_environment", **env)
+        ingest_data.getroot().insert(1, env_elem)
         with self.get_xslt(ingest_data).open("rb") as f:
             xslt = etree.XSLT(etree.parse(f))
         super().__init__(client, xslt(ingest_data))
@@ -176,6 +179,17 @@ def get_xslt(self, ingest_data):
             raise InvalidIngestFileError("unknown format")
         return self.SchemaDir / xslt
 
+    def get_environment(self, client):
+        """Get the environment to be injected as an element into the input.
+
+        :param client: the client object being used by this
+            IngestReader.
+        :type client: :class:`icat.client.Client`
+        :return: the environment.
+        :rtype: :class:`dict`
+        """
+        return dict(icat_version=str(client.apiversion))
+
     def getobjs_from_data(self, data, objindex):
         typed_objindex = set()
         for key, obj in super().getobjs_from_data(data, objindex):

From 56e5bb2ffb76ddcb1e468bfb538780acbf782f40 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 22 Jan 2024 14:37:56 +0100
Subject: [PATCH 065/102] Add a test for accessing the environment added in
 IngestReader with XSLT

---
 MANIFEST.in                |  1 +
 tests/data/ingest-env.xslt | 59 ++++++++++++++++++++++++++++++++++++++
 tests/test_06_ingest.py    | 35 ++++++++++++++++++++++
 3 files changed, 95 insertions(+)
 create mode 100644 tests/data/ingest-env.xslt

diff --git a/MANIFEST.in b/MANIFEST.in
index a7c92f8b..1a57e4e0 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -16,6 +16,7 @@ include doc/tutorial/*.py
 include etc/ingest-*.xsd
 include etc/ingest.xslt
 include tests/conftest.py
+include tests/data/ingest-env.xslt
 include tests/data/legacy-icatdump-*.xml
 include tests/data/legacy-icatdump-*.yaml
 include tests/data/metadata-*.xml
diff --git a/tests/data/ingest-env.xslt b/tests/data/ingest-env.xslt
new file mode 100644
index 00000000..1c60c4bc
--- /dev/null
+++ b/tests/data/ingest-env.xslt
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xsl:stylesheet version="1.0"
+		xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+
+    <xsl:output method="xml"/>
+
+    <xsl:template match="/icatingest">
+	<icatdata>
+	    <xsl:apply-templates/>
+	</icatdata>
+    </xsl:template>
+
+    <xsl:template match="/icatingest/head">
+	<head>
+	    <date>2024-01-22T14:30:51+01:00</date>
+	    <apiversion>
+		<xsl:copy-of select="string(/icatingest/_environment/@icat_version)"/>
+	    </apiversion>
+	    <generator>ingest-env.xslt</generator>
+	</head>
+    </xsl:template>
+
+    <xsl:template match="/icatingest/_environment"/>
+
+    <xsl:template match="/icatingest/data">
+	<data>
+	    <xsl:apply-templates/>
+	</data>
+    </xsl:template>
+
+    <xsl:template match="/icatingest/data/dataset">
+	<dataset>
+	    <xsl:copy-of select="@id"/>
+	    <complete>false</complete>
+	    <xsl:copy-of select="description"/>
+	    <xsl:copy-of select="endDate"/>
+	    <xsl:copy-of select="name"/>
+	    <xsl:copy-of select="startDate"/>
+	    <investigation ref="_Investigation"/>
+	    <xsl:apply-templates select="sample"/>
+	    <type name="raw"/>
+	    <xsl:copy-of select="datasetInstruments"/>
+	    <xsl:copy-of select="datasetTechniques"/>
+	    <xsl:copy-of select="parameters"/>
+	</dataset>
+    </xsl:template>
+
+    <xsl:template match="/icatingest/data/dataset/sample">
+	<xsl:copy>
+	    <xsl:attribute name="investigation.ref">_Investigation</xsl:attribute>
+	    <xsl:copy-of select="@*"/>
+	</xsl:copy>
+    </xsl:template>
+
+    <xsl:template match="*">
+	<xsl:copy-of select="."/>
+    </xsl:template>
+
+</xsl:stylesheet>
diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py
index b9c560f3..8806d957 100644
--- a/tests/test_06_ingest.py
+++ b/tests/test_06_ingest.py
@@ -714,3 +714,38 @@ def test_custom_ingest(client, investigation, samples, schemadir, case):
         ds = client.assertedSearch(query)[0]
         for query, res in case.checks[name]:
             assert client.assertedSearch(query % ds.id)[0] == res
+
+
+env_cases = [
+    Case(
+        data = ["testingest_inl_1", "testingest_inl_2"],
+        metadata = gettestdata("metadata-4.4-inl.xml"),
+        schema = gettestdata("icatdata-4.4.xsd"),
+        checks = {},
+        marks = (),
+    ),
+]
+@pytest.mark.parametrize("case", [
+    pytest.param(c, id=c.metadata.name, marks=c.marks) for c in env_cases
+])
+def test_ingest_env(monkeypatch, client, investigation, schemadir, case):
+    """Test using the _environment element.
+
+    Applying a custom XSLT that extracts an attribute from the
+    _environment element that is injected by IngestReader into the
+    input data and puts that values into the head element of the
+    transformed input.  This is to test that adding the _environment
+    element works and it is in principle possible to make use of the
+    values in the XSLT.
+    """
+    monkeypatch.setattr(IngestReader,
+                        "XSLT_Map", dict(icatingest="ingest-env.xslt"))
+    datasets = []
+    for name in case.data:
+        datasets.append(client.new("Dataset", name=name))
+    reader = IngestReader(client, case.metadata, investigation)
+    with case.schema.open("rb") as f:
+        schema = etree.XMLSchema(etree.parse(f))
+    assert schema.validate(reader.infile)
+    ver = reader.infile.xpath("/icatdata/head/apiversion")[0].text
+    assert ver == str(client.apiversion)

From 0052bc526f30fe52e7960315b85312badef097af Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 22 Jan 2024 15:33:46 +0100
Subject: [PATCH 066/102] Add an assertion the the version element is found
 before accessing its text value

---
 tests/test_06_ingest.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py
index 8806d957..b27a9afe 100644
--- a/tests/test_06_ingest.py
+++ b/tests/test_06_ingest.py
@@ -747,5 +747,6 @@ def test_ingest_env(monkeypatch, client, investigation, schemadir, case):
     with case.schema.open("rb") as f:
         schema = etree.XMLSchema(etree.parse(f))
     assert schema.validate(reader.infile)
-    ver = reader.infile.xpath("/icatdata/head/apiversion")[0].text
-    assert ver == str(client.apiversion)
+    version_elem = reader.infile.xpath("/icatdata/head/apiversion")
+    assert version_elem
+    assert version_elem[0].text == str(client.apiversion)

From c28521b647429a5afc2a0052c0c4f300ec9fc10e Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 22 Jan 2024 15:52:52 +0100
Subject: [PATCH 067/102] Minor: move the additional _environment element at
 index 0

---
 src/icat/ingest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/icat/ingest.py b/src/icat/ingest.py
index 47b00a2a..6297bab6 100644
--- a/src/icat/ingest.py
+++ b/src/icat/ingest.py
@@ -108,7 +108,7 @@ def __init__(self, client, metadata, investigation):
             raise InvalidIngestFileError("validation failed")
         env = self.get_environment(client)
         env_elem = etree.Element("_environment", **env)
-        ingest_data.getroot().insert(1, env_elem)
+        ingest_data.getroot().insert(0, env_elem)
         with self.get_xslt(ingest_data).open("rb") as f:
             xslt = etree.XSLT(etree.parse(f))
         super().__init__(client, xslt(ingest_data))

From 7894e9e038e7b0197f2d195ab4db5aa155c43139 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 22 Jan 2024 16:38:48 +0100
Subject: [PATCH 068/102] Document the change

---
 doc/src/ingest.rst |  4 ----
 src/icat/ingest.py | 10 ++++++++++
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/doc/src/ingest.rst b/doc/src/ingest.rst
index 37785613..a9610f30 100644
--- a/doc/src/ingest.rst
+++ b/doc/src/ingest.rst
@@ -55,10 +55,6 @@ the ``Dataset``.
 .. versionchanged:: 1.2.0
    add version 1.1 of the ingest file format, including references to samples
 
-.. versionchanged:: 1.3.0
-   drop class attribute :attr:`~icat.ingest.IngestReader.XSLT_name` in
-   favour of :attr:`~icat.ingest.IngestReader.XSLT_Map`.
-
 .. autoclass:: icat.ingest.IngestReader
     :members:
     :show-inheritance:
diff --git a/src/icat/ingest.py b/src/icat/ingest.py
index 6297bab6..98dff164 100644
--- a/src/icat/ingest.py
+++ b/src/icat/ingest.py
@@ -69,6 +69,14 @@ class IngestReader(XMLDumpFileReader):
     :type investigation: :class:`icat.entity.Entity`
     :raise icat.exception.InvalidIngestFileError: if the input in
         metadata is not valid.
+
+    .. versionchanged:: 1.3.0
+       drop class attribute :attr:`~icat.ingest.IngestReader.XSLT_name`
+       in favour of :attr:`~icat.ingest.IngestReader.XSLT_Map`.
+
+    .. versionchanged:: 1.3.0
+        inject an element `_environment` as first child of the root
+        element into the input data.
     """
 
     SchemaDir = Path("/usr/share/icat")
@@ -187,6 +195,8 @@ def get_environment(self, client):
         :type client: :class:`icat.client.Client`
         :return: the environment.
         :rtype: :class:`dict`
+
+        .. versionadded:: 1.3.0
         """
         return dict(icat_version=str(client.apiversion))
 

From 76701a1912afb0ff71c3ca1a5459014a47830f3f Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 22 Jan 2024 16:17:59 +0100
Subject: [PATCH 069/102] Move the code to inject the environment element into
 the input into a separate method of IngestReader, so it can be overridden in
 subclasses

---
 src/icat/ingest.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/icat/ingest.py b/src/icat/ingest.py
index 98dff164..0b8f2e8f 100644
--- a/src/icat/ingest.py
+++ b/src/icat/ingest.py
@@ -114,9 +114,7 @@ def __init__(self, client, metadata, investigation):
             schema = etree.XMLSchema(etree.parse(f))
         if not schema.validate(ingest_data):
             raise InvalidIngestFileError("validation failed")
-        env = self.get_environment(client)
-        env_elem = etree.Element("_environment", **env)
-        ingest_data.getroot().insert(0, env_elem)
+        self.add_environment(client, ingest_data)
         with self.get_xslt(ingest_data).open("rb") as f:
             xslt = etree.XSLT(etree.parse(f))
         super().__init__(client, xslt(ingest_data))
@@ -200,6 +198,21 @@ def get_environment(self, client):
         """
         return dict(icat_version=str(client.apiversion))
 
+    def add_environment(self, client, ingest_data):
+        """Inject environment information into input data.
+
+        :param client: the client object being used by this
+            IngestReader.
+        :type client: :class:`icat.client.Client`
+        :param ingest_data: input data
+        :type ingest_data: :class:`lxml.etree._ElementTree`
+
+        .. versionadded:: 1.3.0
+        """
+        env = self.get_environment(client)
+        env_elem = etree.Element("_environment", **env)
+        ingest_data.getroot().insert(0, env_elem)
+
     def getobjs_from_data(self, data, objindex):
         typed_objindex = set()
         for key, obj in super().getobjs_from_data(data, objindex):

From 47ef98facf21ad6b4becb217515411327d66862b Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 22 Jan 2024 17:00:38 +0100
Subject: [PATCH 070/102] - Fix: should filter out environment element in XSLT
 for   test_custom_ingest() - Minor: update the order of the templates in the
 XSLT according to   order of elements in the input

---
 etc/ingest.xslt            | 4 ++--
 tests/data/ingest-env.xslt | 4 ++--
 tests/data/myingest.xslt   | 2 ++
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/etc/ingest.xslt b/etc/ingest.xslt
index 2e8df084..ad14d715 100644
--- a/etc/ingest.xslt
+++ b/etc/ingest.xslt
@@ -10,10 +10,10 @@
 	</icatdata>
     </xsl:template>
 
-    <xsl:template match="/icatingest/head"/>
-
     <xsl:template match="/icatingest/_environment"/>
 
+    <xsl:template match="/icatingest/head"/>
+
     <xsl:template match="/icatingest/data">
 	<data>
 	    <xsl:apply-templates/>
diff --git a/tests/data/ingest-env.xslt b/tests/data/ingest-env.xslt
index 1c60c4bc..8e0eb4e7 100644
--- a/tests/data/ingest-env.xslt
+++ b/tests/data/ingest-env.xslt
@@ -10,6 +10,8 @@
 	</icatdata>
     </xsl:template>
 
+    <xsl:template match="/icatingest/_environment"/>
+
     <xsl:template match="/icatingest/head">
 	<head>
 	    <date>2024-01-22T14:30:51+01:00</date>
@@ -20,8 +22,6 @@
 	</head>
     </xsl:template>
 
-    <xsl:template match="/icatingest/_environment"/>
-
     <xsl:template match="/icatingest/data">
 	<data>
 	    <xsl:apply-templates/>
diff --git a/tests/data/myingest.xslt b/tests/data/myingest.xslt
index 4016a60e..7b7f591b 100644
--- a/tests/data/myingest.xslt
+++ b/tests/data/myingest.xslt
@@ -10,6 +10,8 @@
 	</icatdata>
     </xsl:template>
 
+    <xsl:template match="/myingest/_environment"/>
+
     <xsl:template match="/myingest/head"/>
 
     <xsl:template match="/myingest/data">

From fbe060005434ebe1423678a8fc6ead52ca7b4f54 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Tue, 30 Jan 2024 11:50:44 +0100
Subject: [PATCH 071/102] test_06_ingest.py: don't use an icatdata schema
 adapted to the test case, always validate against the schema according to the
 ICAT server version, we are talking to

---
 tests/conftest.py       | 14 ++++++++++++++
 tests/test_06_ingest.py | 25 +++++--------------------
 2 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 25c01dcb..104901da 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -159,6 +159,20 @@ def require_dumpfile_backend(backend):
         _skip("need %s backend for icat.dumpfile" % (backend))
 
 
+def get_icatdata_schema():
+    if icat_version < "4.4":
+        fname = "icatdata-4.3.xsd"
+    elif icat_version < "4.7":
+        fname = "icatdata-4.4.xsd"
+    elif icat_version < "4.10":
+        fname = "icatdata-4.7.xsd"
+    elif icat_version < "5.0":
+        fname = "icatdata-4.10.xsd"
+    else:
+        fname = "icatdata-5.0.xsd"
+    return gettestdata(fname)
+
+
 def get_reference_dumpfile(ext = "yaml"):
     require_icat_version("4.4.0", "oldest available set of test data")
     if icat_version < "4.7":
diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py
index b27a9afe..e0456d67 100644
--- a/tests/test_06_ingest.py
+++ b/tests/test_06_ingest.py
@@ -11,7 +11,8 @@
 import icat.config
 from icat.ingest import IngestReader
 from icat.query import Query
-from conftest import getConfig, gettestdata, icat_version, testdatadir
+from conftest import (getConfig, gettestdata, icat_version,
+                      get_icatdata_schema, testdatadir)
 
 
 def get_test_investigation(client):
@@ -80,14 +81,13 @@ class MyIngestReader(IngestReader):
 cet = datetime.timezone(datetime.timedelta(hours=1))
 cest = datetime.timezone(datetime.timedelta(hours=2))
 
-Case = namedtuple('Case', ['data', 'metadata', 'schema', 'checks', 'marks'])
+Case = namedtuple('Case', ['data', 'metadata', 'checks', 'marks'])
 
 # Try out different variants for the metadata input file
 cases = [
     Case(
         data = ["testingest_inl_1", "testingest_inl_2"],
         metadata = gettestdata("metadata-4.4-inl.xml"),
-        schema = gettestdata("icatdata-4.4.xsd"),
         checks = {
             "testingest_inl_1": [
                 ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
@@ -127,7 +127,6 @@ class MyIngestReader(IngestReader):
     Case(
         data = ["testingest_inl5_1", "testingest_inl5_2"],
         metadata = gettestdata("metadata-5.0-inl.xml"),
-        schema = gettestdata("icatdata-5.0.xsd"),
         checks = {
             "testingest_inl5_1": [
                 ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
@@ -186,7 +185,6 @@ class MyIngestReader(IngestReader):
     Case(
         data = ["testingest_sep_1", "testingest_sep_2"],
         metadata = gettestdata("metadata-4.4-sep.xml"),
-        schema = gettestdata("icatdata-4.4.xsd"),
         checks = {
             "testingest_sep_1": [
                 ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
@@ -226,7 +224,6 @@ class MyIngestReader(IngestReader):
     Case(
         data = ["testingest_sep5_1", "testingest_sep5_2"],
         metadata = gettestdata("metadata-5.0-sep.xml"),
-        schema = gettestdata("icatdata-5.0.xsd"),
         checks = {
             "testingest_sep5_1": [
                 ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
@@ -286,7 +283,6 @@ class MyIngestReader(IngestReader):
         data = [ "testingest_sample_1", "testingest_sample_2",
                  "testingest_sample_3", "testingest_sample_4" ],
         metadata = gettestdata("metadata-sample.xml"),
-        schema = gettestdata("icatdata-4.4.xsd"),
         checks = {
             "testingest_sample_1": [
                 ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
@@ -357,7 +353,7 @@ def test_ingest_schema(client, investigation, schemadir, case):
     for name in case.data:
         datasets.append(client.new("Dataset", name=name))
     reader = IngestReader(client, case.metadata, investigation)
-    with case.schema.open("rb") as f:
+    with get_icatdata_schema().open("rb") as f:
         schema = etree.XMLSchema(etree.parse(f))
     assert schema.validate(reader.infile)
 
@@ -406,7 +402,6 @@ def test_ingest(client, investigation, samples, schemadir, case):
     Case(
         data = ["testingest_io_1"],
         metadata = io_metadata,
-        schema = gettestdata("icatdata-4.4.xsd"),
         checks = {
             "testingest_io_1": [
                 ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
@@ -529,28 +524,24 @@ def test_ingest_fileobj(client, investigation, samples, schemadir, case):
     Case(
         data = [],
         metadata = invalid_root_metadata,
-        schema = None,
         checks = {},
         marks = (),
     ),
     Case(
         data = [],
         metadata = invalid_ver_metadata,
-        schema = None,
         checks = {},
         marks = (),
     ),
     Case(
         data = ["testingest_err_invalid_ref"],
         metadata = invalid_ref_metadata,
-        schema = gettestdata("icatdata-4.4.xsd"),
         checks = {},
         marks = (),
     ),
     Case(
         data = ["testingest_err_invalid_dup"],
         metadata = invalid_dup_metadata,
-        schema = gettestdata("icatdata-4.4.xsd"),
         checks = {},
         marks = (),
     ),
@@ -558,7 +549,6 @@ def test_ingest_fileobj(client, investigation, samples, schemadir, case):
         data = ["testingest_err_invalid_dup_id_1",
                 "testingest_err_invalid_dup_id_2"],
         metadata = invalid_dup_id_metadata,
-        schema = gettestdata("icatdata-4.4.xsd"),
         checks = {},
         marks = (),
     ),
@@ -614,14 +604,12 @@ def test_ingest_error_invalid(client, investigation, schemadir, case):
     Case(
         data = ["testingest_err_search_attr"],
         metadata = searcherr_attr_metadata,
-        schema = gettestdata("icatdata-4.4.xsd"),
         checks = {},
         marks = (),
     ),
     Case(
         data = ["testingest_err_search_ref"],
         metadata = searcherr_ref_metadata,
-        schema = gettestdata("icatdata-4.4.xsd"),
         checks = {},
         marks = (),
     ),
@@ -642,7 +630,6 @@ def test_ingest_error_searcherr(client, investigation, schemadir, case):
     Case(
         data = ["testingest_custom_icatingest_1"],
         metadata = gettestdata("metadata-custom-icatingest.xml"),
-        schema = gettestdata("icatdata-4.4.xsd"),
         checks = {
             "testingest_custom_icatingest_1": [
                 ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
@@ -662,7 +649,6 @@ def test_ingest_error_searcherr(client, investigation, schemadir, case):
     Case(
         data = ["testingest_custom_myingest_1"],
         metadata = gettestdata("metadata-custom-myingest.xml"),
-        schema = gettestdata("icatdata-4.4.xsd"),
         checks = {
             "testingest_custom_myingest_1": [
                 ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
@@ -720,7 +706,6 @@ def test_custom_ingest(client, investigation, samples, schemadir, case):
     Case(
         data = ["testingest_inl_1", "testingest_inl_2"],
         metadata = gettestdata("metadata-4.4-inl.xml"),
-        schema = gettestdata("icatdata-4.4.xsd"),
         checks = {},
         marks = (),
     ),
@@ -744,7 +729,7 @@ def test_ingest_env(monkeypatch, client, investigation, schemadir, case):
     for name in case.data:
         datasets.append(client.new("Dataset", name=name))
     reader = IngestReader(client, case.metadata, investigation)
-    with case.schema.open("rb") as f:
+    with get_icatdata_schema().open("rb") as f:
         schema = etree.XMLSchema(etree.parse(f))
     assert schema.validate(reader.infile)
     version_elem = reader.infile.xpath("/icatdata/head/apiversion")

From dd7473f5d07a608fdeaf153008ad61d1a2428565 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Fri, 2 Feb 2024 15:50:11 +0100
Subject: [PATCH 072/102] Add an example to the Metadata ingest files Section
 of the documentation

---
 MANIFEST.in                 |  1 +
 doc/examples/metadata.xml   | 94 +++++++++++++++++++++++++++++++++++++
 doc/src/file-icatingest.rst | 34 ++++++++++++++
 3 files changed, 129 insertions(+)
 create mode 100644 doc/examples/metadata.xml

diff --git a/MANIFEST.in b/MANIFEST.in
index a7c92f8b..655665c1 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -10,6 +10,7 @@ include doc/examples/icatdump-*.xml
 include doc/examples/icatdump-*.yaml
 include doc/examples/ingest-*.xml
 include doc/examples/metadata-*.xml
+include doc/examples/metadata.xml
 include doc/icatdata*.xsd
 include doc/man/*
 include doc/tutorial/*.py
diff --git a/doc/examples/metadata.xml b/doc/examples/metadata.xml
new file mode 100644
index 00000000..121b0432
--- /dev/null
+++ b/doc/examples/metadata.xml
@@ -0,0 +1,94 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<icatingest version="1.1">
+  <head>
+    <date>2024-02-02T12:52:00+01:00</date>
+    <generator>metadata-writer 0.28</generator>
+  </head>
+  <data>
+    <dataset id="Dataset_1">
+      <name>e202553</name>
+      <description>Dy01Cp02 at 2.7 K</description>
+      <startDate>2020-09-30T18:02:17+02:00</startDate>
+      <endDate>2020-09-30T20:18:36+02:00</endDate>
+      <sample name="ab3465"/>
+      <datasetInstruments>
+        <instrument pid="DOI:00.0815/inst-00001"/>
+      </datasetInstruments>
+      <datasetTechniques>
+        <technique pid="PaNET:PaNET01217"/>
+      </datasetTechniques>
+    </dataset>
+    <dataset id="Dataset_2">
+      <name>e202554</name>
+      <description>Dy01Cp02 at 5.1 K</description>
+      <startDate>2020-09-30T20:29:19+02:00</startDate>
+      <endDate>2020-09-30T21:23:49+02:00</endDate>
+      <sample name="ab3465"/>
+      <datasetInstruments>
+        <instrument pid="DOI:00.0815/inst-00001"/>
+      </datasetInstruments>
+      <datasetTechniques>
+        <technique pid="PaNET:PaNET01217"/>
+      </datasetTechniques>
+    </dataset>
+    <dataset id="Dataset_3">
+      <name>e202555</name>
+      <description>Dy01Cp02 at 2.7 K</description>
+      <startDate>2020-09-30T21:35:16+02:00</startDate>
+      <endDate>2020-09-30T23:04:27+02:00</endDate>
+      <sample name="ab3466"/>
+      <datasetInstruments>
+        <instrument pid="DOI:00.0815/inst-00001"/>
+      </datasetInstruments>
+      <datasetTechniques>
+        <technique pid="PaNET:PaNET01217"/>
+      </datasetTechniques>
+    </dataset>
+    <dataset id="Dataset_4">
+      <name>e202556</name>
+      <description>reference</description>
+      <startDate>2020-09-30T23:04:31+02:00</startDate>
+      <endDate>2020-10-01T01:26:07+02:00</endDate>
+      <datasetInstruments>
+        <instrument pid="DOI:00.0815/inst-00001"/>
+      </datasetInstruments>
+      <datasetTechniques>
+        <technique pid="PaNET:PaNET01217"/>
+      </datasetTechniques>
+    </dataset>
+    <datasetParameter>
+      <stringValue>neutron</stringValue>
+      <dataset ref="Dataset_1"/>
+      <type name="Probe"/>
+    </datasetParameter>
+    <datasetParameter>
+      <numericValue>5.3</numericValue>
+      <dataset ref="Dataset_1"/>
+      <type name="Reactor power" units="MW"/>
+    </datasetParameter>
+    <datasetParameter>
+      <numericValue>2.74103</numericValue>
+      <rangeBottom>2.7408</rangeBottom>
+      <rangeTop>2.7414</rangeTop>
+      <dataset ref="Dataset_1"/>
+      <type name="Sample temperature" units="K"/>
+    </datasetParameter>
+    <datasetParameter>
+      <stringValue>neutron</stringValue>
+      <dataset ref="Dataset_2"/>
+      <type name="Probe"/>
+    </datasetParameter>
+    <datasetParameter>
+      <numericValue>5.3</numericValue>
+      <dataset ref="Dataset_2"/>
+      <type name="Reactor power" units="MW"/>
+    </datasetParameter>
+    <datasetParameter>
+      <numericValue>5.1239</numericValue>
+      <rangeBottom>5.1045</rangeBottom>
+      <rangeTop>5.1823</rangeTop>
+      <dataset ref="Dataset_2"/>
+      <type name="Sample temperature" units="K"/>
+    </datasetParameter>
+  </data>
+</icatingest>
diff --git a/doc/src/file-icatingest.rst b/doc/src/file-icatingest.rst
index bf2af389..2c650263 100644
--- a/doc/src/file-icatingest.rst
+++ b/doc/src/file-icatingest.rst
@@ -19,6 +19,9 @@ control the Investigation that newly created Datasets will be added
 to.  It would be rather difficult to control the power of the input
 format if we would use plain ICAT data files for this purpose.
 
+Differences compared to ICAT data XML files
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 Class :class:`icat.ingest.IngestReader` takes an ``investigation``
 argument.  We will refer to the Investigation given in this argument
 as the *prescribed Investigation* in the following.  The metadata
@@ -55,3 +58,34 @@ Schema Definition (XSD).
 Another change with respect to ICAT data XML files is that the name of
 the root element is ``icatingest`` and that it must have a ``version``
 attrbute.
+
+Example
+~~~~~~~
+
+Consider the following example:
+
+.. literalinclude:: ../examples/metadata.xml
+   :language: xml
+
+This file defines four Datasets with related objects.  All datasets
+have a ``name``, ``description``, ``startDate``, and ``endDate``
+attribute and include a relation with an Instrument and a Technique,
+respectively.
+
+Note that the Datasets have no ``complete`` attribute and no relation
+with Investigation or DatasetType respectively.  All of these are
+added with prescribed values by class
+:class:`icat.ingest.IngestReader`.
+
+Some Datasets relate to Samples: the first two Datasets relate to the
+same Sample, the third Dataset to another Sample, while the last
+Dataset has no relation with any Sample.  All Samples a referenced by
+their name.  Class :class:`icat.ingest.IngestReader` will add a
+reference to the Investigation to this, so that only Samples that are
+related to prescribed Investigation can actually be referenced.
+
+Some DatasetParameter are added as separate objects in the file.  They
+respectively reference their related Datasets using local keys that
+are defined in the ``id`` attribute of the corresponding Dataset
+earlier in the file.  Alternatively, the DatasetParameter could have
+been included into into the respective Datasets.

From 634a4163b344acfd8360a3a274a479167fcf097b Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 12 Feb 2024 12:01:05 +0100
Subject: [PATCH 073/102] Language fixes in the documentation

---
 doc/src/file-icatdata.rst   | 12 ++++++------
 doc/src/file-icatingest.rst | 10 +++++-----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/doc/src/file-icatdata.rst b/doc/src/file-icatdata.rst
index 73e84f3e..878c87f6 100644
--- a/doc/src/file-icatdata.rst
+++ b/doc/src/file-icatdata.rst
@@ -6,7 +6,7 @@ ICAT data files
 ICAT data files provide a way to serialize ICAT content to a flat
 file.  These files are read by the :ref:`icatingest` and written by
 the :ref:`icatdump` command line scripts respectively.  The program
-logic for reading and writing the files is provided by the
+logic for reading and writing the files is provided in the
 :mod:`icat.dumpfile` module.
 
 The actual file format depends on the version of the ICAT schema and
@@ -62,13 +62,13 @@ corresponding Grouping objects.
 References to ICAT objects and unique keys
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-References to related objects are encoded in ICAT data files by
+References to related objects are encoded in ICAT data files with
 reference keys.  There are two kinds of those keys, local keys and
 unique keys:
 
 When an ICAT object is defined in the file, it generally defines a
 local key at the same time.  Local keys are stored in the object index
-and may be used to reference this object from other obejcts in the
+and may be used to reference this object from other objects in the
 same data chunk.
 
 Unique keys can be obtained from an object by calling
@@ -149,7 +149,7 @@ Note that the UserGroup does not include its relation with Grouping.
 The latter relationship is implied by the parent relation of the
 object in the file.
 
-As an alternative, the Usergroup could have been added to the file as
+As an alternative, the UserGroup could have been added to the file as
 separate object as direct subelement of ``data``:
 
 .. code-block:: XML
@@ -262,7 +262,7 @@ that these UserGroups include their relation to the User, but not
 their relation with Grouping.  The latter relationship is implied by
 the parent relation of the object in the file.
 
-As an alternative, in the present example, the Usergroups could have
+As an alternative, in the present example, the UserGroups could have
 been added to the file as separate objects as in:
 
 .. code-block:: YAML
@@ -292,7 +292,7 @@ entity types in order to make sure that referenced objects are created
 before any object that may reference them.
 
 
-.. [#dc] There is one exception: DataCollections don't have a
+.. [#dc] There is one exception: DataCollections doesn't have a
          uniqueness constraint and can't reliably be searched by
          attributes.  Therefore local keys for DataCollections are
          always kept in the object index and may be used to reference
diff --git a/doc/src/file-icatingest.rst b/doc/src/file-icatingest.rst
index 2c650263..20b853f2 100644
--- a/doc/src/file-icatingest.rst
+++ b/doc/src/file-icatingest.rst
@@ -4,8 +4,8 @@ Metadata ingest files
 =====================
 
 Metadata ingest files are the input format for class
-:class:`icat.ingest.IngestReader`.  This class is intended to be uesd
-in scripts that read the metadata created by experimments into ICAT.
+:class:`icat.ingest.IngestReader`.  This class is intended to be used
+in scripts that read the metadata created by experiments into ICAT.
 The file format is basically a restricted version of
 :ref:`ICAT-data-xml-files`.
 
@@ -57,7 +57,7 @@ Schema Definition (XSD).
 
 Another change with respect to ICAT data XML files is that the name of
 the root element is ``icatingest`` and that it must have a ``version``
-attrbute.
+attribute.
 
 Example
 ~~~~~~~
@@ -79,8 +79,8 @@ added with prescribed values by class
 
 Some Datasets relate to Samples: the first two Datasets relate to the
 same Sample, the third Dataset to another Sample, while the last
-Dataset has no relation with any Sample.  All Samples a referenced by
-their name.  Class :class:`icat.ingest.IngestReader` will add a
+Dataset has no relation with any Sample.  All Samples are referenced
+by their name.  Class :class:`icat.ingest.IngestReader` will add a
 reference to the Investigation to this, so that only Samples that are
 related to prescribed Investigation can actually be referenced.
 

From 987f22ed0c53df433e7406d7d966c5592109ebb0 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 12 Feb 2024 12:19:22 +0100
Subject: [PATCH 074/102] Documentation fix: also the relation to DatasetType
 is added by IngestReader

---
 doc/src/file-icatingest.rst | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/src/file-icatingest.rst b/doc/src/file-icatingest.rst
index 20b853f2..4ba46517 100644
--- a/doc/src/file-icatingest.rst
+++ b/doc/src/file-icatingest.rst
@@ -37,9 +37,10 @@ ways:
 * the attributes in the object definitions for Datasets are restricted
   to name, description, startDate, and endDate.
 
-* object definitions for Datasets can not include a reference to the
-  related Investigation.  The relation with the prescribed
-  Investigation will be implied.
+* object definitions for Datasets can not include references to the
+  related Investigation or DatasetType.  These relation will be added
+  by :class:`icat.ingest.IngestReader`.  The relation to the
+  Investigation will be set to the prescribed Investigation.
 
 * object definitions for Datasets can reference a related Sample only
   by name or by pid.  A relation of the related Sample with the

From 21235fadd3be2ae519b358f3d6e4aa923979149f Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 12 Feb 2024 13:45:56 +0100
Subject: [PATCH 075/102] - add a note on the versioning to metadata ingest
 file documentation - move the versionchanged note about adding icatingest 1.1
 from   documentation on module ingest to the metadata ingest file page

---
 doc/src/file-icatingest.rst | 10 ++++++++++
 doc/src/ingest.rst          |  3 ---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/doc/src/file-icatingest.rst b/doc/src/file-icatingest.rst
index 4ba46517..22c77814 100644
--- a/doc/src/file-icatingest.rst
+++ b/doc/src/file-icatingest.rst
@@ -19,6 +19,16 @@ control the Investigation that newly created Datasets will be added
 to.  It would be rather difficult to control the power of the input
 format if we would use plain ICAT data files for this purpose.
 
+.. note::
+   The metadata ingest file format is versioned.  This version number
+   is independent from the python-icat version.  It is incremented
+   only when the format changes.  The latest version of the metadata
+   ingest file format is 1.1.
+
+.. versionchanged:: 1.2.0
+   add metadata ingest file format version 1.1: add support for
+   relating Datasets with Samples.
+
 Differences compared to ICAT data XML files
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/doc/src/ingest.rst b/doc/src/ingest.rst
index 72eeb07a..ab6db393 100644
--- a/doc/src/ingest.rst
+++ b/doc/src/ingest.rst
@@ -52,9 +52,6 @@ reference to a ``Sample``.  That ``Sample`` objects needs to exist
 beforehand and needs to be related to the same ``Investigation`` as
 the ``Dataset``.
 
-.. versionchanged:: 1.2.0
-   add version 1.1 of the ingest file format, including references to samples
-
 .. autoclass:: icat.ingest.IngestReader
     :members:
     :show-inheritance:

From c3d360b997e25700228fc773e4d9b0625108208b Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 12 Feb 2024 14:07:53 +0100
Subject: [PATCH 076/102] Update documentation for module icat.ingest taking
 into account the new file format documentation

---
 doc/src/ingest.rst | 31 +++++++++----------------------
 1 file changed, 9 insertions(+), 22 deletions(-)

diff --git a/doc/src/ingest.rst b/doc/src/ingest.rst
index ab6db393..9ab94740 100644
--- a/doc/src/ingest.rst
+++ b/doc/src/ingest.rst
@@ -11,7 +11,7 @@
    even in minor releases of python-icat.
 
 This module provides class :class:`icat.ingest.IngestReader` that
-reads metadata from an XML file to add them to ICAT.  It is designed
+reads :ref:`ICAT-ingest-files` to add them to ICAT.  It is designed
 for the use case of ingesting metadata for datasets created during
 experiments.
 
@@ -21,22 +21,14 @@ that base class in restricting the vocabular of the input file: only
 objects that need to be created during ingestion from the experiment
 may appear in the input.  This restriction is enforced by first
 validating the input against an XML Schema Definition (XSD).  In a
-second step, the input is transformed into generic XML :ref:`ICAT data
-file <ICAT-data-files>` format using an XSL Transformation (XSLT) and
-then fed into :class:`~icat.dumpfile_xml.XMLDumpFileReader`.  The
-format of the input files may be customized to some extent by providing
-custom versions of XSD and XSLT files, see :ref:`ingest-customize`
-below.
-
-The input accepted by :class:`~icat.ingest.IngestReader` consists of
-one or more ``Dataset`` objects that all need to relate to the same
-``Investigation`` and any number of related ``DatasetTechnique``,
-``DatasetInstrument``, and ``DatasetParameter`` objects.  The
-``Investigation`` must exist beforehand in ICAT.  The relation from
-the ``Dataset`` objects to the ``Investigation`` will be set by
-:class:`~icat.ingest.IngestReader` accordingly.  (Actually, the XSLT
-will add that attribute to the datasets in the input.)  The
-``Dataset`` objects will not be created by
+second step, the input is transformed into generic :ref:`ICAT data XML
+file format <ICAT-data-xml-files>` using an XSL Transformation (XSLT)
+and then fed into :class:`~icat.dumpfile_xml.XMLDumpFileReader`.  The
+format of the input files may be customized to some extent by
+providing custom versions of XSD and XSLT files, see
+:ref:`ingest-customize` below.
+
+The ``Dataset`` objects in the input will not be created by
 :class:`~icat.ingest.IngestReader`, because it is assumed that a
 separate workflow in the caller will copy the content of datafiles to
 the storage managed by IDS and create the corresponding ``Dataset``
@@ -47,11 +39,6 @@ of the datasets will be read from the input file and set in the
 ``DatasetTechnique``, ``DatasetInstrument`` and ``DatasetParameter``
 objects read from the input file in ICAT.
 
-Using ingest file format 1.1, ``Dataset`` objects may also include a
-reference to a ``Sample``.  That ``Sample`` objects needs to exist
-beforehand and needs to be related to the same ``Investigation`` as
-the ``Dataset``.
-
 .. autoclass:: icat.ingest.IngestReader
     :members:
     :show-inheritance:

From f2b9657153cbb87d66bf7bfaa272f3fd89466d5e Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 12 Feb 2024 14:15:12 +0100
Subject: [PATCH 077/102] Another language fix

---
 doc/src/file-icatingest.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/src/file-icatingest.rst b/doc/src/file-icatingest.rst
index 22c77814..9794ba75 100644
--- a/doc/src/file-icatingest.rst
+++ b/doc/src/file-icatingest.rst
@@ -15,8 +15,8 @@ problem is that this file format is too powerful: it can encode any
 ICAT content.  We want the ingest files from the experiment to create
 new Datasets and DatasetParameters, we certainly don't want these
 files to create new Instruments or Users in ICAT.  And we also want to
-control the Investigation that newly created Datasets will be added
-to.  It would be rather difficult to control the power of the input
+control to which Investigation newly created Datasets are going to be
+added.  It would be rather difficult to control the power of the input
 format if we would use plain ICAT data files for this purpose.
 
 .. note::

From 9ad05b5962c3279a4e6be85e56c96337980ab242 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 12 Feb 2024 14:22:56 +0100
Subject: [PATCH 078/102] Yet another language fix

---
 doc/src/file-icatingest.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/src/file-icatingest.rst b/doc/src/file-icatingest.rst
index 9794ba75..7348259f 100644
--- a/doc/src/file-icatingest.rst
+++ b/doc/src/file-icatingest.rst
@@ -93,7 +93,7 @@ same Sample, the third Dataset to another Sample, while the last
 Dataset has no relation with any Sample.  All Samples are referenced
 by their name.  Class :class:`icat.ingest.IngestReader` will add a
 reference to the Investigation to this, so that only Samples that are
-related to prescribed Investigation can actually be referenced.
+related to the prescribed Investigation can actually be referenced.
 
 Some DatasetParameter are added as separate objects in the file.  They
 respectively reference their related Datasets using local keys that

From 3eefef5a94405e290705a9f89aa2cb208976d7a5 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 12 Feb 2024 14:41:22 +0100
Subject: [PATCH 079/102] Add kink anchors to the entries for each version in
 the changelog in order to provide more stable permalinks

---
 CHANGES.rst | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/CHANGES.rst b/CHANGES.rst
index 1744b6ef..a152e835 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -2,6 +2,8 @@ Changelog
 =========
 
 
+.. _changes-1_3_0:
+
 1.3.0 (not yet released)
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -36,6 +38,8 @@ Bug fixes and minor changes
 .. _#147: https://github.com/icatproject/python-icat/pull/147
 
 
+.. _changes-1_2_0:
+
 1.2.0 (2023-10-31)
 ~~~~~~~~~~~~~~~~~~
 
@@ -84,6 +88,8 @@ Bug fixes and minor changes
 .. _#140: https://github.com/icatproject/python-icat/pull/140
 
 
+.. _changes-1_1_0:
+
 1.1.0 (2023-06-30)
 ~~~~~~~~~~~~~~~~~~
 
@@ -139,6 +145,8 @@ Bug fixes and minor changes
 .. _#129: https://github.com/icatproject/python-icat/pull/129
 
 
+.. _changes-1_0_0:
+
 1.0.0 (2022-12-21)
 ~~~~~~~~~~~~~~~~~~
 
@@ -231,6 +239,8 @@ Bug fixes and minor changes
 .. _#106: https://github.com/icatproject/python-icat/pull/106
 
 
+.. _changes-0_21_0:
+
 0.21.0 (2022-01-28)
 ~~~~~~~~~~~~~~~~~~~
 
@@ -249,6 +259,8 @@ New features
 .. _#100: https://github.com/icatproject/python-icat/pull/100
 
 
+.. _changes-0_20_1:
+
 0.20.1 (2021-11-04)
 ~~~~~~~~~~~~~~~~~~~
 
@@ -260,6 +272,8 @@ Bug fixes and minor changes
 .. _#96: https://github.com/icatproject/python-icat/pull/96
 
 
+.. _changes-0_20_0:
+
 0.20.0 (2021-10-29)
 ~~~~~~~~~~~~~~~~~~~
 
@@ -296,6 +310,8 @@ Bug fixes and minor changes
 .. _#95: https://github.com/icatproject/python-icat/pull/95
 
 
+.. _changes-0_19_0:
+
 0.19.0 (2021-07-20)
 ~~~~~~~~~~~~~~~~~~~
 
@@ -324,6 +340,8 @@ Bug fixes and minor changes
 .. _#85: https://github.com/icatproject/python-icat/pull/85
 
 
+.. _changes-0_18_1:
+
 0.18.1 (2021-04-13)
 ~~~~~~~~~~~~~~~~~~~
 
@@ -341,6 +359,8 @@ Bug fixes and minor changes
 .. _#82: https://github.com/icatproject/python-icat/pull/82
 
 
+.. _changes-0_18_0:
+
 0.18.0 (2021-03-29)
 ~~~~~~~~~~~~~~~~~~~
 
@@ -377,6 +397,8 @@ Bug fixes and minor changes
 .. _#80: https://github.com/icatproject/python-icat/pull/80
 
 
+.. _changes-0_17_0:
+
 0.17.0 (2020-04-30)
 ~~~~~~~~~~~~~~~~~~~
 
@@ -468,6 +490,8 @@ Misc
 .. _#72: https://github.com/icatproject/python-icat/issues/72
 
 
+.. _changes-0_16_0:
+
 0.16.0 (2019-09-26)
 ~~~~~~~~~~~~~~~~~~~
 
@@ -492,6 +516,8 @@ Bug fixes and minor changes
 .. _#60: https://github.com/icatproject/python-icat/pull/60
 
 
+.. _changes-0_15_1:
+
 0.15.1 (2019-07-12)
 ~~~~~~~~~~~~~~~~~~~
 
@@ -513,6 +539,8 @@ Bug fixes and minor changes
 .. _#57: https://github.com/icatproject/python-icat/issues/57
 
 
+.. _changes-0_15_0:
+
 0.15.0 (2019-03-27)
 ~~~~~~~~~~~~~~~~~~~
 
@@ -551,6 +579,8 @@ Bug fixes and minor changes
 .. _#54: https://github.com/icatproject/python-icat/issues/54
 
 
+.. _changes-0_14_2:
+
 0.14.2 (2018-10-25)
 ~~~~~~~~~~~~~~~~~~~
 
@@ -563,6 +593,8 @@ Bug fixes and minor changes
   probably not need it.
 
 
+.. _changes-0_14_1:
+
 0.14.1 (2018-06-05)
 ~~~~~~~~~~~~~~~~~~~
 
@@ -573,6 +605,8 @@ Bug fixes and minor changes
   for the Write API call.
 
 
+.. _changes-0_14_0:
+
 0.14.0 (2018-06-01)
 ~~~~~~~~~~~~~~~~~~~
 
@@ -628,6 +662,8 @@ Bug fixes and minor changes
 .. _#48: https://github.com/icatproject/python-icat/issues/48
 
 
+.. _changes-0_13_1:
+
 0.13.1 (2017-07-12)
 ~~~~~~~~~~~~~~~~~~~
 
@@ -640,6 +676,8 @@ Bug fixes and minor changes
 .. _#38: https://github.com/icatproject/python-icat/issues/38
 
 
+.. _changes-0_13_0:
+
 0.13.0 (2017-06-09)
 ~~~~~~~~~~~~~~~~~~~
 
@@ -798,6 +836,8 @@ Bug fixes and minor changes
 .. _pytest-dependency: https://pypi.python.org/pypi/pytest_dependency/
 
 
+.. _changes-0_12_0:
+
 0.12.0 (2016-10-10)
 ~~~~~~~~~~~~~~~~~~~
 
@@ -837,6 +877,8 @@ Bug fixes and minor changes
 .. _#28: https://github.com/icatproject/python-icat/issues/28
 
 
+.. _changes-0_11_0:
+
 0.11.0 (2016-06-01)
 ~~~~~~~~~~~~~~~~~~~
 
@@ -896,6 +938,8 @@ Misc
 .. _distutils_pytest: https://github.com/RKrahl/distutils-pytest
 
 
+.. _changes-0_10_0:
+
 0.10.0 (2015-12-06)
 ~~~~~~~~~~~~~~~~~~~
 
@@ -964,6 +1008,8 @@ Bug fixes and minor changes
 .. _#15: https://github.com/icatproject/python-icat/issues/15
 
 
+.. _changes-0_9_0:
+
 0.9.0 (2015-08-13)
 ~~~~~~~~~~~~~~~~~~
 
@@ -1067,6 +1113,8 @@ Bug fixes and minor changes
 .. _#10: https://github.com/icatproject/python-icat/issues/10
 
 
+.. _changes-0_8_0:
+
 0.8.0 (2015-05-08)
 ~~~~~~~~~~~~~~~~~~
 
@@ -1156,6 +1204,8 @@ Bug fixes and minor changes
   :meth:`icat.query.Query.__repr__`.
 
 
+.. _changes-0_7_0:
+
 0.7.0 (2015-02-11)
 ~~~~~~~~~~~~~~~~~~
 
@@ -1187,6 +1237,8 @@ New features
   :meth:`icat.ids.IDSClient.getLink` method.
 
 
+.. _changes-0_6_0:
+
 0.6.0 (2014-12-15)
 ~~~~~~~~~~~~~~~~~~
 
@@ -1314,6 +1366,8 @@ Minor changes and fixes
 + Add comparison operators to class :class:`icat.listproxy.ListProxy`.
 
 
+.. _changes-0_5_1:
+
 0.5.1 (2014-07-07)
 ~~~~~~~~~~~~~~~~~~
 
@@ -1357,6 +1411,8 @@ Minor changes and fixes
   modifications, such as running 2to3 on them.
 
 
+.. _changes-0_5_0:
+
 0.5.0 (2014-06-24)
 ~~~~~~~~~~~~~~~~~~
 
@@ -1399,6 +1455,8 @@ Minor changes and fixes
 .. __: https://github.com/icatproject/icat.server/issues/112
 
 
+.. _changes-0_4_0:
+
 0.4.0 (2014-02-11)
 ~~~~~~~~~~~~~~~~~~
 
@@ -1446,6 +1504,8 @@ Minor changes and fixes
   :ref:`icatrestore <icatingest>`.
 
 
+.. _changes-0_3_0:
+
 0.3.0 (2014-01-10)
 ~~~~~~~~~~~~~~~~~~
 
@@ -1492,6 +1552,8 @@ Minor changes and fixes
 + Add example scripts :ref:`icatdump` and :ref:`icatrestore <icatingest>`.
 
 
+.. _changes-0_2_0:
+
 0.2.0 (2013-11-18)
 ~~~~~~~~~~~~~~~~~~
 
@@ -1532,6 +1594,8 @@ Minor changes and fixes
   import :mod:`icat` and :mod:`icat.config`.
 
 
+.. _changes-0_1_0:
+
 0.1.0 (2013-11-01)
 ~~~~~~~~~~~~~~~~~~
 

From f1f2b73fe933898a2ed8f6dca5dfa861a9fa2c3d Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 19 Feb 2024 15:51:29 +0100
Subject: [PATCH 080/102] Dynamically create a file _meta.rst in the
 documentation source that defines substitutions and download links for the
 latest source distribution and signature file

---
 doc/.gitignore  |  1 +
 doc/Makefile    |  1 +
 doc/src/conf.py | 29 ++++++++++++++++++++++++++++-
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/doc/.gitignore b/doc/.gitignore
index e938dd2d..b6a292cd 100644
--- a/doc/.gitignore
+++ b/doc/.gitignore
@@ -1,3 +1,4 @@
+/src/_meta.rst
 /devhelp/
 /dirhtml/
 /doctest/
diff --git a/doc/Makefile b/doc/Makefile
index 9cc7cebc..7358c71a 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -20,6 +20,7 @@ $(BUILDERS): $(STATIC_SOURCEDIRS)
 
 distclean:
 	rm -rf doctrees $(BUILDERS)
+	rm -f src/_meta.rst
 
 $(STATIC_SOURCEDIRS):
 	mkdir $@
diff --git a/doc/src/conf.py b/doc/src/conf.py
index 2f880389..1496d62a 100644
--- a/doc/src/conf.py
+++ b/doc/src/conf.py
@@ -9,7 +9,8 @@
 from pathlib import Path
 import sys
 
-maindir = Path(__file__).resolve().parent.parent.parent
+docsrcdir = Path(__file__).resolve().parent
+maindir = docsrcdir.parent.parent
 buildlib = maindir / "build" / "lib"
 sys.path[0] = str(buildlib)
 sys.dont_write_bytecode = True
@@ -28,6 +29,32 @@
 # The short X.Y version
 version = ".".join(release.split(".")[0:2])
 
+# Write a _meta.rst that defines some custom substitutions
+def make_meta_rst(last_release):
+    template = """:orphan:
+
+.. |distribution_source| replace:: %(dist_src_name)s
+.. |distribution_signature| replace:: %(dist_sig_name)s
+.. _distribution_source: %(dist_src_url)s
+.. _distribution_signature: %(dist_sig_url)s
+"""
+    github_repo = "https://github.com/icatproject/python-icat"
+    dist_src_name = "python-icat-%s.tar.gz" % last_release
+    dist_src_url = ("%s/releases/download/%s/%s"
+                    % (github_repo, last_release, dist_src_name))
+    dist_sig_name = "python-icat-%s.tar.gz.asc" % last_release
+    dist_sig_url = ("%s/releases/download/%s/%s"
+                    % (github_repo, last_release, dist_sig_name))
+    subst = {
+        'dist_src_name': dist_src_name,
+        'dist_src_url': dist_src_url,
+        'dist_sig_name': dist_sig_name,
+        'dist_sig_url': dist_sig_url,
+    }
+    with (docsrcdir / '_meta.rst').open('wt') as f:
+        print(template % subst, file=f)
+
+make_meta_rst(icat._meta.release)
 
 # -- General configuration ---------------------------------------------------
 

From 8f35940836c76ef500a465ca45b73163194fbe9e Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 19 Feb 2024 17:26:27 +0100
Subject: [PATCH 081/102] Review install instructions, explaining how to verify
 the signature

---
 doc/src/83F336432C7FCC91.pub | 44 ++++++++++++++++++++++++++++++++
 doc/src/install.rst          | 49 ++++++++++++++++++++++++++++--------
 2 files changed, 83 insertions(+), 10 deletions(-)
 create mode 100644 doc/src/83F336432C7FCC91.pub

diff --git a/doc/src/83F336432C7FCC91.pub b/doc/src/83F336432C7FCC91.pub
new file mode 100644
index 00000000..330f2f80
--- /dev/null
+++ b/doc/src/83F336432C7FCC91.pub
@@ -0,0 +1,44 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+
+mQENBFE3WkEBCADM4jKAQMsVlnU5NxbJ5JmpqhPRj54eSkDcvIjPcEQLkMmQjCDT
+HHwN5ZjzHNTj7nXkvmjjWMgyzjpNmdUAofsh6MBp1etXNzYNkoEs+urRlw1wuRaU
+NMK4Pf0G35THrQ0nJdmmCGkzxiTgQTitLVA52zZclq3Vqo/ZsO26gkLB2ErhZJZE
+2q+TL6BBr98m+1zXpG5kqF/IE4pF4Yl1Oysp8imAAbodr+6X1DGfOM2h1NwMSbAo
+Uw49hR4PIwxKP5Sluv6GNUVgyPaOrk8LVE4c+H0lswmz6nZOlxhhbtplN0KViqki
+6pqyrOuwv3ZgzUXO4bjEexScyWe2PxKUzjFFABEBAAG0K1JvbGYgS3JhaGwgPHJv
+bGYua3JhaGxAaGVsbWhvbHR6LWJlcmxpbi5kZT6JATkEEwECACMFAlE3WkECGwMH
+CwkIBwMCAQYVCAIJCgsEFgIDAQIeAQIXgAAKCRCD8zZDLH/Mkcj5CAC0x2GU88xD
+eBR1MyGq9nUTDgjO/EkiztDZirBg1FLGwCVtXY3yZc0nSriEj4oF8lNiGU539rU1
+R+z76UCDTlq/xq2/a1BazStkHuv+OuUfoA/Hl5/Tvp+dwk7BXG6dlyr6joT3i9Pz
+RgH/kFe1RAJNnT/oy5LTRsydcWb/mCey/O/ON47zlKzNbbGvL6YPwmsyaO22vUmO
+JsH4JZM36BDu3Wt2LPB+A51ZanzlxkfA3Mcc0cIe9PsSqufvnV/kG4cQxJedgXes
+lVniggXbtsudl8EqmUpq/yS+/X3BLBfidTA2Yicx6udmR5ZFQHoCrOlcTfylW0mz
+x5rhClZPgrgaiEYEExECAAYFAlQVluoACgkQUcvGPyCdlGaaRgCg0s2cWgUXWeb7
+noexGZNxnmQIMrgAoJqBXBVVrWfd7bwdWT1IEnyGMiCeiEYEExECAAYFAlQVlyYA
+CgkQO0qCjX1HQDs8HACfduvRjIu+wmrvyN+ikPXHN6ZJYOAAni4k+F5m7P9RkUK/
+MPW34JrqaIg8iQIcBBMBAgAGBQJYRG+HAAoJEAihJkF1ND5uePgP/3okgaIQOwcy
+7lN2SiP1k/UxjmqynrdrsTWdGRm+wyJ9Er9WlHgMQavaxk2XOpTQ8DcAuczpNyOb
+qaYI6l+xd8mDvdJ7lbYZboiZj62nb/yUwRAyN3TJ7PRjuWXqLZjVnywQzYN66Z2v
+kuxewEqZUeLVlUcg7IEwwCOErAmHFfYmIER7Q0Hyvc8gdkbFzgQ5UNHyLUngMe+6
+VGLlkoyRykF9DDCmqMQO06Ork78gsTVTHr0LEMG3HyKiQ8rLZouSQS9tiw7RVIji
+nbf1EWRvVwgSXPSsx545uVwUOSyXlozK7AzFxjlFJU8G9+h1fXYlkviFPrsU2vwa
+6q8GiVnaLpwa2QC9iznPTzSnUFh9Eqg8aO4DqpH28L+o5PTClmWUGncqigmYGipm
+2s0AKdtRFVXcz7fmH8JKi9u9dBtJPIbdA3Kq/D6+1GkiS5V0aELWI+0424RJ5qlO
+MHukVUxg0QH/MJnzfRT3MAV5gBpJC5KrijwS7FN8m+CQN35+OMoiBbpOKt/+wQgF
+K31D/M55CZoaeVtkcLiTRjUig2Dwr/16IMd5IcpetNoIcUILDENcWh0mYo02kaJt
+nldsZIAi77goxdgKu41AIIhEv0FmlXp6OB/QoEJRiDOVtxSW7bG1F+JbularecE2
+t5PehBq5k35vxo8tteL1xQIP+8nnOtUJuQENBFE3WkEBCADB84pLmmsdFjV5R+0e
+zL2COBZBUxUPSIuKOdEfHkR5M5AxbXdg9GwxDMZE1TLAdX8sn1ymwUlZt6dSUFO0
+hg0LdZAOMvjvFb6dF+RE7gfeOsH0usTN32NUzW0/S1E2V8LRlplGIXtHa9YZArQw
+k97gpFATheh4K/QHvrIyneVam+B+6WH8zJtBfGmWtjfBLwSiWohQPQAvYBW6hi86
++I3z0yCrOhgM/N9uylgWu+BQzoQ8/Jv2g22bzSa1mbCP1OVp587HpJy9WbX/aKH4
+7I/vp0qLysWekbuX5OOjsiItW2Yv7oK/S7OtoagTUqX3KG1KRTJZHTTS03dy3DME
+fqNtABEBAAGJAR8EGAECAAkFAlE3WkECGwwACgkQg/M2Qyx/zJEJcAgAsE8NNJYX
+/3Vdd9WQih4Xg2Pvz66Z9jwTyS9Rb3boB0gtZMgqsHQBdF9iYNVxREpiVDPA0YKR
+x1iTjFblt9Ryq7MZVPhRI1cfDfHKCw6bMz1hZDBRr1BSZVjiru74OCebreeOMhzI
+zmyP7GSi0q5edZO0zpYkOlme3dQBatSkEAnSDOA9ct6EEMG3ZsQda1YXa9BMKj7e
+B+UdFUdGb5SB8buW5RKLMTD485gKpvxWpYptP5DD3r3mThc2m5uWdiAM+jqm9Flc
+NlD0bZ8tdZpbPOgxnbAuy7HEPaS/VnGZHouwZWpb484dynCO7+Oi1f2y2tPx0uXV
+DRFDDLLR3oBEag==
+=+2H3
+-----END PGP PUBLIC KEY BLOCK-----
diff --git a/doc/src/install.rst b/doc/src/install.rst
index 78fd935d..de15d475 100644
--- a/doc/src/install.rst
+++ b/doc/src/install.rst
@@ -1,11 +1,11 @@
+.. include:: _meta.rst
+
 Install instructions
 ====================
 
-Release packages of python-icat are published in the `Python Package
-Index (PyPI)`__.  See :ref:`install-using-pip` for the short version
-of the install instructions.
+See :ref:`install-using-pip` for the short version of the install
+instructions.
 
-.. __: `PyPI site`_
 
 
 System requirements
@@ -114,26 +114,54 @@ Installation
 Installation using pip
 ......................
 
-You can install python-icat from PyPI using pip::
+You can install python-icat from the
+`Python Package Index (PyPI) <PyPI site_>`_ using pip::
 
   $ pip install python-icat
 
+Note that while installing from PyPI is convenient, there is no way to
+verify the integrity of the source distribution, which may be
+considered a security risk.
+
 Installation from the source distribution
 .........................................
 
 Steps to manually build from the source distribution:
 
-1. Download the sources, unpack, and change into the source directory.
+1. Download the sources.
+
+   From the `Release Page <GitHub latest release_>`_ you may download
+   the source distribution file |distribution_source|_ and the
+   detached signature file |distribution_signature|_
+
+2. Check the signature (optional).
+
+   You may verify the integrity of the source distribution by checking
+   the signature (showing the output for version 1.2.0 as an example)::
+
+     $ gpg --verify python-icat-1.2.0.tar.gz.asc
+     gpg: assuming signed data in 'python-icat-1.2.0.tar.gz'
+     gpg: Signature made Tue Oct 31 07:01:55 2023 CET
+     gpg:                using RSA key 760465DAF652737A61EC0C9D83F336432C7FCC91
+     gpg: Good signature from "Rolf Krahl <rolf.krahl@helmholtz-berlin.de>" [full]
 
-2. Build::
+   The signature should be made by the key
+   :download:`0x760465DAF652737A61EC0C9D83F336432C7FCC91
+   <83F336432C7FCC91.pub>`.  The fingerprint of that key is::
+
+     7604 65DA F652 737A 61EC  0C9D 83F3 3643 2C7F CC91
+
+3. Unpack and change into the source directory.
+
+4. Build (optional)::
 
      $ python setup.py build
 
-3. Test (optional, see below)::
+5. Test (optional, see below)::
 
      $ python setup.py test
 
-4. Install::
+6. Install::
 
      $ python setup.py install
 
@@ -179,7 +207,6 @@ You can safely run the tests without configuring any test server.  You
 will just get many skipped tests then.
 
 
-.. _PyPI site: https://pypi.org/project/python-icat/
 .. _setuptools: https://github.com/pypa/setuptools/
 .. _packaging: https://github.com/pypa/packaging/
 .. _suds-jurko: https://pypi.org/project/suds-jurko/
@@ -191,5 +218,7 @@ will just get many skipped tests then.
 .. _pytest: https://docs.pytest.org/en/latest/
 .. _pytest-dependency: https://pypi.org/project/pytest-dependency/
 .. _distutils-pytest: https://github.com/RKrahl/distutils-pytest/
+.. _PyPI site: https://pypi.org/project/python-icat/
+.. _GitHub latest release: https://github.com/icatproject/python-icat/releases/latest/
 .. _GitHub repository: https://github.com/icatproject/python-icat/
 .. _Issue #72: https://github.com/icatproject/python-icat/issues/72

From 67b947c31be3111b1610d0a2baf627006b58e557 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 19 Feb 2024 17:43:45 +0100
Subject: [PATCH 082/102] Fixup 8f35940: need to run doc/src/conf.py before
 doc8-check now

---
 .github/workflows/rst-lint.yaml | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/.github/workflows/rst-lint.yaml b/.github/workflows/rst-lint.yaml
index b9b239f7..9205803a 100644
--- a/.github/workflows/rst-lint.yaml
+++ b/.github/workflows/rst-lint.yaml
@@ -11,6 +11,17 @@ jobs:
     steps:
       - name: Check out repository code
         uses: actions/checkout@v4
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.11
+      - name: Install dependencies
+        run: |
+          pip install setuptools packaging git-props suds
+      - name: Run conf.py
+        run: |
+          python setup.py build
+          python doc/src/conf.py
       - name: doc8-check
         uses: deep-entertainment/doc8-action@v4
         with:

From 2a827fdcc2820e7d6f2d76477730d081f96119ef Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 19 Feb 2024 17:50:30 +0100
Subject: [PATCH 083/102] Aesthetic fix for rst-lint action: unshallow the
 checked out repository in order to get the correct version number in the
 diagnostics

---
 .github/workflows/rst-lint.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/rst-lint.yaml b/.github/workflows/rst-lint.yaml
index 9205803a..187ce87c 100644
--- a/.github/workflows/rst-lint.yaml
+++ b/.github/workflows/rst-lint.yaml
@@ -11,6 +11,8 @@ jobs:
     steps:
       - name: Check out repository code
         uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
       - name: Set up Python 3.11
         uses: actions/setup-python@v4
         with:

From 848a4745abd3aeaab680c2dc25b5f4d47c7fe357 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 19 Feb 2024 18:09:44 +0100
Subject: [PATCH 084/102] Some tweaks in the install instructions: - Point out
 that a manual install does not automatically install   dependencies, -
 Removed yet another reference of PyPI yo get release versions from, - Minor
 formulation fix.

---
 doc/src/install.rst | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/doc/src/install.rst b/doc/src/install.rst
index de15d475..12d0bd41 100644
--- a/doc/src/install.rst
+++ b/doc/src/install.rst
@@ -126,7 +126,9 @@ considered a security risk.
 Installation from the source distribution
 .........................................
 
-Steps to manually build from the source distribution:
+Note that the manual build does not automatically check the
+dependencies.  So we assume that you have all the systems requirements
+installed.  Steps to manually build from the source distribution:
 
 1. Download the sources.
 
@@ -172,9 +174,9 @@ Building from development sources
 .................................
 
 For production use, it is always recommended to use the latest release
-version from PyPI, see above.  If you need some not yet released
-bleeding edge feature or if you want to participate in the
-development, you may also clone the `source repository from GitHub`__.
+version, see above.  If you need some not yet released bleeding edge
+feature or if you want to participate in the development, you may also
+clone the `source repository from GitHub`__.
 
 Note that some source files are dynamically created and thus missing
 in the development sources.  If you want to build from the development
@@ -203,8 +205,8 @@ authentication plugin must also have these users configured.
 from the test server and replace it with example content.  Do not
 configure the tests to access a production server!
 
-You can safely run the tests without configuring any test server.  You
-will just get many skipped tests then.
+You can safely run the tests without configuring any test server.  But
+most of the test will be skipped then.
 
 
 .. _setuptools: https://github.com/pypa/setuptools/

From 0b49a2f7ca23c80b53e5ee09077074244648c402 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 19 Feb 2024 18:39:57 +0100
Subject: [PATCH 085/102] Update changelog

---
 CHANGES.rst | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index a152e835..cc10d5fc 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -29,8 +29,12 @@ Incompatible changes
 Bug fixes and minor changes
 ---------------------------
 
-+ `#145`_: Review build tool chain
++ `#141`_, `#142`_: Review documentation.
 
++ `#145`_: Review build tool chain.
+
+.. _#141: https://github.com/icatproject/python-icat/issues/141
+.. _#142: https://github.com/icatproject/python-icat/pull/142
 .. _#143: https://github.com/icatproject/python-icat/issues/143
 .. _#144: https://github.com/icatproject/python-icat/pull/144
 .. _#145: https://github.com/icatproject/python-icat/pull/145

From 340d1ac41d7e52dcb413c0c1271ce720458cb639 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 28 Feb 2024 12:46:40 +0100
Subject: [PATCH 086/102] Document the processing of ingest files in
 IngestReader and the _environment element that is injected into the metadata
 during that processing

---
 doc/src/ingest.rst | 51 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/doc/src/ingest.rst b/doc/src/ingest.rst
index 9ab94740..8245e0fd 100644
--- a/doc/src/ingest.rst
+++ b/doc/src/ingest.rst
@@ -44,6 +44,57 @@ objects read from the input file in ICAT.
     :show-inheritance:
 
 
+.. _ingest-process:
+
+Ingest process
+--------------
+
+The processing of ingest files during the instantiation of an
+:class:`~icat.ingest.IngestReader` object may be summarized with the
+following steps:
+
+1. Read the metadata and parse the :class:`lxml.etree._ElementTree`.
+
+2. Call :meth:`~icat.ingest.IngestReader.get_xsd` to get the
+   appropriate XSD file and validate the metadata against that schema.
+
+3. Inject an ``_environment`` element as first child of the ``data``
+   element, see below.
+
+4. Call :meth:`~icat.ingest.IngestReader.get_xslt` to get the
+   appropriate XSLT file and transform the metadata into generic ICAT
+   data XML file format.
+
+5. Feed the result of the transformation into the parent class
+   :class:`~icat.dumpfile_xml.XMLDumpFileReader`.
+
+Once this initialization is done,
+:meth:`~icat.ingest.IngestReader.ingest` may be called to read the
+individual objects defined in the metadata.
+
+
+.. _ingest-environment:
+
+The environment element
+-----------------------
+
+During the processing of ingest files, an ``_environment`` element
+will be injected as the first child of the ``data`` element.  In the
+current version of python-icat, this ``_environment`` element has the
+following attributes:
+
+  `icat_version`
+    Version of the ICAT server this client connects to, e.g. the
+    :attr:`icat.client.Client.apiversion` attribute of the `client`
+    object being used by this :class:`~icat.ingest.IngestReader`.
+
+More attributes may be added in future versions.  This
+``_environment`` element may be used by the XSLT in order to adapt the
+result of the transformation to the environment, in particular to
+adapt the output to the ICAT schema version it is supposed to conform
+to.
+
+
 .. _ingest-example:
 
 Ingest example

From 77314423049c2e95a25d7e922211024406b0bcc2 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 28 Feb 2024 23:08:56 +0100
Subject: [PATCH 087/102] Update changelog

---
 CHANGES.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGES.rst b/CHANGES.rst
index cc10d5fc..5c6cb3ad 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -14,6 +14,9 @@ New features
   processing the input in custom versions of
   :class:`icat.ingest.IngestReader`.
 
++ `#148`_, `#149`_: Inject an additional element with environment
+  information into the input data in :class:`icat.ingest.IngestReader`.
+
 + `#146`_, `#147`_: Better error handling in
   :class:`icat.ingest.IngestReader`.
 
@@ -40,6 +43,8 @@ Bug fixes and minor changes
 .. _#145: https://github.com/icatproject/python-icat/pull/145
 .. _#146: https://github.com/icatproject/python-icat/issues/146
 .. _#147: https://github.com/icatproject/python-icat/pull/147
+.. _#148: https://github.com/icatproject/python-icat/issues/148
+.. _#149: https://github.com/icatproject/python-icat/pull/149
 
 
 .. _changes-1_2_0:

From 0a7dda498fe51d1de94d148faefa0a01b50d1431 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 4 Mar 2024 10:42:02 +0100
Subject: [PATCH 088/102] Make Sphinx extension sphinx_copybutton optional,
 e.g. load that extension only if it is installed

---
 doc/src/conf.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/doc/src/conf.py b/doc/src/conf.py
index 1496d62a..0b0f02e4 100644
--- a/doc/src/conf.py
+++ b/doc/src/conf.py
@@ -68,8 +68,12 @@ def make_meta_rst(last_release):
 extensions = [
     'sphinx.ext.autodoc',
     'sphinx.ext.intersphinx',
-    'sphinx_copybutton',
 ]
+try:
+    import sphinx_copybutton
+    extensions.append('sphinx_copybutton')
+except ImportError:
+    pass
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']

From 730f248e7be07c5d4cfdf2f1f5a13eecccc7d08e Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 4 Mar 2024 10:49:03 +0100
Subject: [PATCH 089/102] Add lxml intersphinx mapping

---
 doc/src/conf.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/doc/src/conf.py b/doc/src/conf.py
index 0b0f02e4..ed638dca 100644
--- a/doc/src/conf.py
+++ b/doc/src/conf.py
@@ -106,7 +106,10 @@ def make_meta_rst(last_release):
 
 # -- Options for intersphinx extension ---------------------------------------
 
-intersphinx_mapping = {'python': ('https://docs.python.org/3', None)}
+intersphinx_mapping = {
+    'python': ('https://docs.python.org/3', None),
+    'lxml': ('https://lxml.de/apidoc/', None),
+}
 
 # -- Options for HTML output -------------------------------------------------
 

From aa1cb6083702129f045655262d25ce0879d8fb89 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 4 Mar 2024 15:31:05 +0100
Subject: [PATCH 090/102] Add some introductory paragraphs to the File formats
 section

---
 doc/src/fileformats.rst | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/doc/src/fileformats.rst b/doc/src/fileformats.rst
index c90eaec1..c24d37eb 100644
--- a/doc/src/fileformats.rst
+++ b/doc/src/fileformats.rst
@@ -1,8 +1,22 @@
 File formats
 ============
 
-Some components of python-icat read input files or write output files.
-This section describes the file formats being used.
+Some components of python-icat read input files or write output files:
+
+The :ref:`icatdump` command line script fetches content from an ICAT
+server and writes it to a file.  The :ref:`icatingest` command line
+script reads those files and restores the content in an ICAT server.
+The ICAT data file format written and read by these scripts
+respectively corresponds directly to the ICAT schema.  It is rather
+generic and may encode any ICAT content.
+
+The metadata ingest file format is basically a restricted version of
+the ICAT data file format.  It is read by class
+:class:`icat.ingest.IngestReader` for the purpose of ingesting
+metadata created by experiments into ICAT.
+
+See the following sections for a detailed description of these file
+formats:
 
 .. toctree::
    :maxdepth: 1

From 14f7efa6f5c5f908b75791796da81b1451f38bad Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 4 Mar 2024 15:38:45 +0100
Subject: [PATCH 091/102] Minor formulation detail

---
 doc/src/file-icatingest.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/src/file-icatingest.rst b/doc/src/file-icatingest.rst
index 7348259f..e6e1c251 100644
--- a/doc/src/file-icatingest.rst
+++ b/doc/src/file-icatingest.rst
@@ -26,7 +26,7 @@ format if we would use plain ICAT data files for this purpose.
    ingest file format is 1.1.
 
 .. versionchanged:: 1.2.0
-   add metadata ingest file format version 1.1: add support for
+   add metadata ingest file format version 1.1, adding support for
    relating Datasets with Samples.
 
 Differences compared to ICAT data XML files

From 10d3471ecfe2c57c2ca58bd84286902d81847d36 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 4 Mar 2024 16:55:35 +0100
Subject: [PATCH 092/102] Review the icat.ingest module documentation

---
 doc/src/ingest.rst | 10 +++++-----
 src/icat/ingest.py | 17 +++++++++++++++--
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/doc/src/ingest.rst b/doc/src/ingest.rst
index 8245e0fd..e0574221 100644
--- a/doc/src/ingest.rst
+++ b/doc/src/ingest.rst
@@ -49,8 +49,8 @@ objects read from the input file in ICAT.
 Ingest process
 --------------
 
-The processing of ingest files during the instantiation of an
-:class:`~icat.ingest.IngestReader` object may be summarized with the
+The processing of the metadata during the instantiation of an
+:class:`~icat.ingest.IngestReader` object may be summarized by the
 following steps:
 
 1. Read the metadata and parse the :class:`lxml.etree._ElementTree`.
@@ -58,7 +58,7 @@ following steps:
 2. Call :meth:`~icat.ingest.IngestReader.get_xsd` to get the
    appropriate XSD file and validate the metadata against that schema.
 
-3. Inject an ``_environment`` element as first child of the ``data``
+3. Inject an ``_environment`` element as first child of the root
    element, see below.
 
 4. Call :meth:`~icat.ingest.IngestReader.get_xslt` to get the
@@ -78,8 +78,8 @@ individual objects defined in the metadata.
 The environment element
 -----------------------
 
-During the processing of ingest files, an ``_environment`` element
-will be injected as the first child of the ``data`` element.  In the
+During the processing of the metadata, an ``_environment`` element
+will be injected as the first child of the root element.  In the
 current version of python-icat, this ``_environment`` element has the
 following attributes:
 
diff --git a/src/icat/ingest.py b/src/icat/ingest.py
index 0b8f2e8f..2540535f 100644
--- a/src/icat/ingest.py
+++ b/src/icat/ingest.py
@@ -75,7 +75,7 @@ class IngestReader(XMLDumpFileReader):
        in favour of :attr:`~icat.ingest.IngestReader.XSLT_Map`.
 
     .. versionchanged:: 1.3.0
-        inject an element `_environment` as first child of the root
+        inject an element ``_environment`` as first child of the root
         element into the input data.
     """
 
@@ -188,6 +188,9 @@ def get_xslt(self, ingest_data):
     def get_environment(self, client):
         """Get the environment to be injected as an element into the input.
 
+        Subclasses may override this method to control the attributes
+        set in the environment.
+
         :param client: the client object being used by this
             IngestReader.
         :type client: :class:`icat.client.Client`
@@ -201,6 +204,11 @@ def get_environment(self, client):
     def add_environment(self, client, ingest_data):
         """Inject environment information into input data.
 
+        The attributes set in the environment are determined by
+        calling :meth:`~icat.ingest.IngestReader.get_environment`.
+        Subclasses may override this method to fully control the
+        process of adding the environment element.
+
         :param client: the client object being used by this
             IngestReader.
         :type client: :class:`icat.client.Client`
@@ -244,11 +252,16 @@ def ingest(self, datasets, dry_run=False, update_ds=False):
         created in ICAT.  In this case, the `datasets` in the argument
         must already have been created in ICAT beforehand (e.g. the
         `id` attribute must be set).  If `dry_run` is :const:`True`,
-        the `datasets` don't need to be created beforehand.
+        the objects in the metadata will be checked for conformance,
+        but nothing will be committed to ICAT.  In this case, the
+        `datasets` don't need to be created beforehand.
 
         if `update_ds` is :const:`True`, the objects in the `datasets`
         argument will be updated: the attributes and the relations to
         other objects will be set to the values read from the input.
+        This is particularly useful in conjunction with `dry_run` in
+        order to update the `datasets` from the metadata prior to
+        creating them in ICAT.
 
         :param datasets: list of allowed datasets in the input.
         :type datasets: iterable of :class:`icat.entity.Entity`

From 17bd431fcdd215ce88bcb5f515307a0648ba1b84 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 18 Mar 2024 11:45:49 +0100
Subject: [PATCH 093/102] Extend the ICAT data XML files section: add an
 example for an object reference element and for using keys to reference
 related objects when referencing an object by attributes

---
 doc/src/file-icatdata.rst | 76 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 71 insertions(+), 5 deletions(-)

diff --git a/doc/src/file-icatdata.rst b/doc/src/file-icatdata.rst
index 878c87f6..76df594e 100644
--- a/doc/src/file-icatdata.rst
+++ b/doc/src/file-icatdata.rst
@@ -101,11 +101,12 @@ to provide some information on the context of the creation of the data
 file, which may be useful for debugging in case of issues.
 
 The content of each ``data`` element is one chunk, its subelements are
-the ICAT object definitions according to the logical structure
-explained above.  The present example contains two chunks: the first
-chunk contains four User objects and three Grouping objects.  The
-Groupings include related UserGroups.  The second chunk only contains
-one Investigation, including related InvestigationGroups.
+either the ICAT object definitions according to the logical structure
+explained above or ICAT object references, see below.  The present
+example contains two chunks: the first chunk contains four User
+objects and three Grouping objects.  The Groupings include related
+UserGroups.  The second chunk only contains one Investigation,
+including related InvestigationGroups.
 
 The object elements may have an ``id`` attribute that define a local
 key to reference the object later on.  The subelements of the object
@@ -209,6 +210,71 @@ or newer, because the attributes fileCount and fileSize have been
 added to Investigation in this version.  With older ICAT versions, it
 will fail because these attributes are not defined.
 
+For some further features of the ICAT data XML file format consider
+the following example:
+
+.. code-block:: XML
+
+  <?xml version="1.0" encoding="utf-8"?>
+  <icatdata>
+    <head>
+      <date>2023-10-17T07:33:36Z</date>
+      <generator>manual edit</generator>
+    </head>
+    <data>
+      <investigationRef id="inv_1" name="10100601-ST" visitId="1.1-N"/>
+      <dataset id="dataset_1">
+        <complete>false</complete>
+        <endDate>2012-07-30T01:10:08+00:00</endDate>
+        <name>e209001</name>
+        <startDate>2012-07-26T15:44:24+00:00</startDate>
+        <investigation ref="inv_1"/>
+        <sample name="ab3465" investigation.ref="inv_1"/>
+        <type name="raw"/>
+      </dataset>
+      <dataset id="dataset_2">
+        <complete>false</complete>
+        <endDate>2012-08-06T01:10:08+00:00</endDate>
+        <name>e209002</name>
+        <startDate>2012-08-02T05:30:00+00:00</startDate>
+        <investigation ref="inv_1"/>
+        <sample name="ab3465" investigation.ref="inv_1"/>
+        <type name="raw"/>
+      </dataset>
+      <dataset id="dataset_3">
+        <complete>false</complete>
+        <endDate>2012-07-16T14:30:17+00:00</endDate>
+        <name>e209003</name>
+        <startDate>2012-07-16T11:42:05+00:00</startDate>
+        <investigation ref="inv_1"/>
+        <sample name="ab3466" investigation.ref="inv_1"/>
+        <type name="raw"/>
+      </dataset>
+      <dataset id="dataset_4">
+        <complete>false</complete>
+        <endDate>2012-07-31T22:52:23+00:00</endDate>
+        <name>e209004</name>
+        <startDate>2012-07-31T20:20:37+00:00</startDate>
+        <investigation ref="inv_1"/>
+        <type name="raw"/>
+      </dataset>
+    </data>
+  </icatdata>
+
+In this case, the first subelelement of the ``data`` element is an
+ICAT object reference ``investigationRef``.  It does not define an
+ICAT object to be created when reading the ICAT data file but
+references an already existing Investigation object by attributes.  It
+defines a local key in the ``id`` attribute that can be used in
+subsequent ICAT object definitions to reference this investigation
+more easily.  In this example, the same chunk contains four Dataset
+objects that use that key to set their relation with the
+investigation.  Furthermore some of the Dataset have a relation with a
+Sample that is respectively referenced by its name and relation to the
+same investigation.  Again, the reference from the sample to the
+investigation reuse the same key for the investigation.
+
+
 You will find more extensive examples in the source distribution of
 python-icat.  The distribution also provides XML Schema Definition
 files for the ICAT data XML file format corresponding to various ICAT

From c63d0fc5ba7a5a3012b6a4551498918f12a12945 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Mon, 18 Mar 2024 14:35:00 +0100
Subject: [PATCH 094/102] Enable numfig

---
 doc/src/conf.py    | 11 +++++++++++
 doc/src/config.rst |  6 +++---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/doc/src/conf.py b/doc/src/conf.py
index ed638dca..3b1fe261 100644
--- a/doc/src/conf.py
+++ b/doc/src/conf.py
@@ -84,6 +84,17 @@ def make_meta_rst(last_release):
 # The master toctree document.
 master_doc = 'index'
 
+# Enable automatic numbering of figures, tables and code-blocks
+numfig = True
+
+# Strings to format figure, table, code-block, and section numbers
+numfig_format = {
+    'figure': "Figure %s",
+    'table': "Table %s",
+    'code-block': "Snippet %s",
+    'section': "Section %s",
+}
+
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
 #
diff --git a/doc/src/config.rst b/doc/src/config.rst
index af597211..14688e5e 100644
--- a/doc/src/config.rst
+++ b/doc/src/config.rst
@@ -171,9 +171,9 @@ A few derived variables are also set in
     | `promptPass`    | ``-P``, ``--prompt-pass``   |                       | :const:`False` | no        | \(3),(4),(5) |
     +-----------------+-----------------------------+-----------------------+----------------+-----------+--------------+
 
-See the table for an overview of predefined configuration variables.
-Mandatory means that an error will be raised in
-:meth:`icat.config.Config.getconfig` if no value is found for the
+See :numref:`tab-config-vars` for an overview of predefined
+configuration variables.  Mandatory means that an error will be raised
+in :meth:`icat.config.Config.getconfig` if no value is found for the
 configuration variable in question.
 
 Notes:

From a521daee552f2eb910b472a38c691e8e25b3803e Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 20 Mar 2024 07:20:33 +0100
Subject: [PATCH 095/102] Review the ICAT data files Section, essentally
 rewrote the subsection on ICAT data XML files

---
 doc/src/file-icatdata.rst | 383 +++++++++++++++++---------------------
 1 file changed, 171 insertions(+), 212 deletions(-)

diff --git a/doc/src/file-icatdata.rst b/doc/src/file-icatdata.rst
index 76df594e..c4894c03 100644
--- a/doc/src/file-icatdata.rst
+++ b/doc/src/file-icatdata.rst
@@ -62,9 +62,8 @@ corresponding Grouping objects.
 References to ICAT objects and unique keys
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-References to related objects are encoded in ICAT data files with
-reference keys.  There are two kinds of those keys, local keys and
-unique keys:
+References to ICAT objects may be encoded using reference keys.  There
+are two kinds of those keys, local keys and unique keys:
 
 When an ICAT object is defined in the file, it generally defines a
 local key at the same time.  Local keys are stored in the object index
@@ -86,12 +85,6 @@ Reference keys should be considered as opaque ids.
 ICAT data XML files
 ~~~~~~~~~~~~~~~~~~~
 
-In this section we describe the ICAT data file format using the XML
-backend.  Consider the following example:
-
-.. literalinclude:: ../examples/icatdump-simple.xml
-   :language: xml
-
 The root element of ICAT data XML files is ``icatdata``.  It may
 optionally have one ``head`` subelement and one or more ``data``
 subelements.
@@ -100,109 +93,158 @@ The ``head`` element will be ignored by :ref:`icatingest`.  It serves
 to provide some information on the context of the creation of the data
 file, which may be useful for debugging in case of issues.
 
-The content of each ``data`` element is one chunk, its subelements are
-either the ICAT object definitions according to the logical structure
-explained above or ICAT object references, see below.  The present
-example contains two chunks: the first chunk contains four User
-objects and three Grouping objects.  The Groupings include related
-UserGroups.  The second chunk only contains one Investigation,
-including related InvestigationGroups.
-
-The object elements may have an ``id`` attribute that define a local
-key to reference the object later on.  The subelements of the object
-elements correspond to the object's attributes and relations according
-to the ICAT schema.  All many-to-one relations must be provided and
-reference already existing objects, e.g. they must either already have
-existed before starting the ingestion or appear earlier in the ICAT
-data file than the referencing object, so that they will be created
-earlier.  The related object may either be referenced by reference key
-using the ``ref`` attribute or by the related object's attribute
-values, using XML attributes of the same name.  In the latter case,
-the attribute values must uniquely define the related object.
-
-Consider a simplified version of the first chunk from the present
-example, defining only one User, Grouping and UserGroup respectively:
-
-.. code-block:: XML
+The actual payload of an ICAT data XML file is in the ``data``
+elements.  There can be any number of them and each is one chunk
+according to the logical structure explained above.  The subelements
+of ``data`` may either be ICAT object references or ICAT object
+definitions, both explained in detail below.  Either of them may have
+an ``id`` attribute that defines a local key that allows to reference
+the corresponding object later on.
 
-  <data>
-    <user id="User_name-db=2Fahau">
-      <affiliation>Goethe University Frankfurt, Faculty of Philosophy and History</affiliation>
-      <email>ahau@example.org</email>
-      <familyName>Hau</familyName>
-      <fullName>Arnold Hau</fullName>
-      <givenName>Arnold</givenName>
-      <name>db/ahau</name>
-      <orcidId>0000-0002-3263</orcidId>
-    </user>
-    <grouping id="Grouping_name-investigation=5F10100601=2DST=5Fowner">
-      <name>investigation_10100601-ST_owner</name>
-      <userGroups>
-        <user ref="User_name-db=2Fahau"/>
-      </userGroups>
-    </grouping>
-  </data>
-
-The Grouping includes the related UserGroup object that in turn
-references the related User.  This User is referenced in the ``ref``
-attribute using a local key defined in the User's ``id`` attribute.
-Note that the UserGroup does not include its relation with Grouping.
-The latter relationship is implied by the parent relation of the
-object in the file.
-
-As an alternative, the UserGroup could have been added to the file as
-separate object as direct subelement of ``data``:
+:numref:`snip-file-icatdata-xml-1` shows a simple example for an ICAT
+data XML file having one single ``data`` element that defines four
+Datasets.
 
 .. code-block:: XML
+    :name: snip-file-icatdata-xml-1
+    :caption: A simple example for an ICAT data XML file
+    :dedent: 2
+
+      <?xml version="1.0" encoding="utf-8"?>
+      <icatdata>
+        <head>
+          <date>2023-10-17T07:33:36Z</date>
+          <generator>manual edit</generator>
+        </head>
+        <data>
+          <investigationRef id="inv_1" name="10100601-ST" visitId="1.1-N"/>
+          <dataset id="dataset_1">
+            <complete>false</complete>
+            <endDate>2012-07-30T01:10:08+00:00</endDate>
+            <name>e209001</name>
+            <startDate>2012-07-26T15:44:24+00:00</startDate>
+            <investigation ref="inv_1"/>
+            <sample name="ab3465" investigation.ref="inv_1"/>
+            <type name="raw"/>
+          </dataset>
+          <dataset id="dataset_2">
+            <complete>false</complete>
+            <endDate>2012-08-06T01:10:08+00:00</endDate>
+            <name>e209002</name>
+            <startDate>2012-08-02T05:30:00+00:00</startDate>
+            <investigation ref="inv_1"/>
+            <sample name="ab3465" investigation.ref="inv_1"/>
+            <type name="raw"/>
+          </dataset>
+          <dataset id="dataset_3">
+            <complete>false</complete>
+            <endDate>2012-07-16T14:30:17+00:00</endDate>
+            <name>e209003</name>
+            <startDate>2012-07-16T11:42:05+00:00</startDate>
+            <investigation ref="inv_1"/>
+            <sample name="ab3466" investigation.ref="inv_1"/>
+            <type name="raw"/>
+          </dataset>
+          <dataset id="dataset_4">
+            <complete>false</complete>
+            <endDate>2012-07-31T22:52:23+00:00</endDate>
+            <name>e209004</name>
+            <startDate>2012-07-31T20:20:37+00:00</startDate>
+            <investigation ref="inv_1"/>
+            <type name="raw"/>
+          </dataset>
+        </data>
+      </icatdata>
+
+ICAT object references
+......................
+
+ICAT object references do not define an ICAT object to be created when
+reading the ICAT data file but reference an already existing one.  It
+is either assumed to exist in ICAT before ingesting the file or it
+must appear earlier in the ICAT data file, so that it will be created
+before the referencing object is read.
+
+ICAT objects may either be referenced by reference key or by
+attributes.  A reference key should be included as a ``ref``
+attribute.
+
+When referencing the object by attributes, these attributes should be
+included using the same name in the reference element.  This may also
+include attributes of related objects using the same dot notation as
+for ICAT JPQL search expressions.  Referencing by attributes may be
+combined with referencing related objects by reference key, using
+``ref`` in place of the related object's attribute names.  In any
+case, referenced objects must be uniquely defined by the attribute
+values.
+
+ICAT object references may be used in two locations in ICAT data XML
+files: as direct subelements of ``data`` or to reference related
+objects in many-to-one relations in ICAT object definitions, see
+below.  In the former case, the name of the object reference element
+is the name of the corresponding ICAT entity type (the first letter in
+lowercase) with a ``Ref`` suffix appended.  In that case, the element
+should have an ``id`` attribute that will define a local key that can
+be used to reference that object in subsequent object references.
+This is convenient to define a shortcut when the same object needs to
+be referenced often, to avoid having to repeat the same set of
+attributes each time.
+
+In any case, object reference elements only have attributes, but no
+content or subelements.
+
+See :numref:`snip-file-icatdata-xml-1` for a few examples: the first
+subelement of the ``data`` element in this case is
+``investigationRef``.  It references a (supposed to be existing)
+Investigation by its attributes ``name`` and ``visitId``.  It defines
+a local key for that Investigation object in the ``id`` attribute.
+The Dataset object definitions in that example each use that local key
+to set their relation with the Investigation respectively.  The
+Dataset object definitions each also include a relation with their
+``type``, referencing the related DatasetType by the ``name``
+attribute.  Some of the Dataset object definitions also include a
+relation with a Sample.  The respective Sample object is referenced by
+``name`` and the related Investigation.  The latter is referenced by
+the local key defined earlier in the ``investigation.ref`` attribute.
+
+ICAT object definitions
+.......................
+
+ICAT object definitions define objects that will be created in ICAT
+when ingesting the ICAT data file.  As direct subelements of ``data``,
+the name of the element must be the name of the corresponding entity
+type in the ICAT schema (the first letter in lowercase).
+
+The subelements of ICAT object definitions are the attributes and
+object relations as defined in the ICAT schema using the same names.
+Attributes must include the corresponding value as text content of the
+element.  All many-to-one relations must be provided as ICAT object
+references, see above.
+
+The ICAT object definitions may include one-to-many relations as
+subelements.  In this case, these subelements must in turn be ICAT
+object definitions for the related objects.  These related objects
+will be created along with the parent in one single cascading call.
+The object definition for the related object must not include its
+relation with the parent object as this is already implied by the
+parent and child relationship.
+
+When appearing as direct subelements of ``data``, ICAT object
+definitions may have an ``id`` attribute that will define a local key
+that can be used to reference the defined object later on.
 
-  <data>
-    <user id="User_name-db=2Fahau">
-      <affiliation>Goethe University Frankfurt, Faculty of Philosophy and History</affiliation>
-      <email>ahau@example.org</email>
-      <familyName>Hau</familyName>
-      <fullName>Arnold Hau</fullName>
-      <givenName>Arnold</givenName>
-      <name>db/ahau</name>
-      <orcidId>0000-0002-3263</orcidId>
-    </user>
-    <grouping id="Grouping_name-investigation=5F10100601=2DST=5Fowner">
-      <name>investigation_10100601-ST_owner</name>
-    </grouping>
-    <userGroup id="UserGroup_user-(name-db=2Fahau)_grouping-(name-investigation=5F10100601=2DST=5Fowner)">
-      <grouping ref="Grouping_name-investigation=5F10100601=2DST=5Fowner"/>
-      <user ref="User_name-db=2Fahau"/>
-    </userGroup>
-  </data>
-
-Another example is how the Investigation references its Facility:
-
-.. code-block:: XML
-
-  <investigation>
-    <!--  ... -->
-    <facility ref="Facility_name-ESNF"/>
-    <!--  ... -->
-  </investigation>
-
-The Facility is not defined in the data file.  It is assumed to exist
-in ICAT before ingesting the file.  In this case, it must be
-referenced by its unique key.  Alternatively, the Facility could have
-been referenced by attribute as in:
-
-.. code-block:: XML
-
-  <investigation>
-    <!--  ... -->
-    <facility name="ESNF"/>
-    <!--  ... -->
-  </investigation>
+.. literalinclude:: ../examples/icatdump-simple.xml
+   :language: xml
+   :name: snip-file-icatdata-xml-2
+   :caption: An example for an ICAT data XML file
 
-The Investigation in the second chunk in the present example includes
-related InvestigationGroups that will be created along with the
-Investigation.  The InvestigationGroup objects include a reference to
-the corresponding Grouping respectively.  Note that these references
-go across chunk boundaries.  Thus, unique keys for the Groupings need
-to be used here.
+Consider the example in :numref:`snip-file-icatdata-xml-2`.  It
+contains two chunks: the first chunk contains four User objects and
+three Grouping objects.  The Groupings include related UserGroups.
+Note that these UserGroups include their relation to the User, but not
+their relation with Grouping.  The latter is implied by the parent
+relation of the object in the file.  The second chunk only contains
+one Investigation, including related InvestigationGroups.
 
 Finally note that the file format also depends on the ICAT schema
 version: the present example can only be ingested into ICAT server 5.0
@@ -210,71 +252,6 @@ or newer, because the attributes fileCount and fileSize have been
 added to Investigation in this version.  With older ICAT versions, it
 will fail because these attributes are not defined.
 
-For some further features of the ICAT data XML file format consider
-the following example:
-
-.. code-block:: XML
-
-  <?xml version="1.0" encoding="utf-8"?>
-  <icatdata>
-    <head>
-      <date>2023-10-17T07:33:36Z</date>
-      <generator>manual edit</generator>
-    </head>
-    <data>
-      <investigationRef id="inv_1" name="10100601-ST" visitId="1.1-N"/>
-      <dataset id="dataset_1">
-        <complete>false</complete>
-        <endDate>2012-07-30T01:10:08+00:00</endDate>
-        <name>e209001</name>
-        <startDate>2012-07-26T15:44:24+00:00</startDate>
-        <investigation ref="inv_1"/>
-        <sample name="ab3465" investigation.ref="inv_1"/>
-        <type name="raw"/>
-      </dataset>
-      <dataset id="dataset_2">
-        <complete>false</complete>
-        <endDate>2012-08-06T01:10:08+00:00</endDate>
-        <name>e209002</name>
-        <startDate>2012-08-02T05:30:00+00:00</startDate>
-        <investigation ref="inv_1"/>
-        <sample name="ab3465" investigation.ref="inv_1"/>
-        <type name="raw"/>
-      </dataset>
-      <dataset id="dataset_3">
-        <complete>false</complete>
-        <endDate>2012-07-16T14:30:17+00:00</endDate>
-        <name>e209003</name>
-        <startDate>2012-07-16T11:42:05+00:00</startDate>
-        <investigation ref="inv_1"/>
-        <sample name="ab3466" investigation.ref="inv_1"/>
-        <type name="raw"/>
-      </dataset>
-      <dataset id="dataset_4">
-        <complete>false</complete>
-        <endDate>2012-07-31T22:52:23+00:00</endDate>
-        <name>e209004</name>
-        <startDate>2012-07-31T20:20:37+00:00</startDate>
-        <investigation ref="inv_1"/>
-        <type name="raw"/>
-      </dataset>
-    </data>
-  </icatdata>
-
-In this case, the first subelelement of the ``data`` element is an
-ICAT object reference ``investigationRef``.  It does not define an
-ICAT object to be created when reading the ICAT data file but
-references an already existing Investigation object by attributes.  It
-defines a local key in the ``id`` attribute that can be used in
-subsequent ICAT object definitions to reference this investigation
-more easily.  In this example, the same chunk contains four Dataset
-objects that use that key to set their relation with the
-investigation.  Furthermore some of the Dataset have a relation with a
-Sample that is respectively referenced by its name and relation to the
-same investigation.  Again, the reference from the sample to the
-investigation reuse the same key for the investigation.
-
-
 You will find more extensive examples in the source distribution of
 python-icat.  The distribution also provides XML Schema Definition
 files for the ICAT data XML file format corresponding to various ICAT
@@ -288,11 +265,14 @@ ICAT data YAML files
 ~~~~~~~~~~~~~~~~~~~~
 
 In this section we describe the ICAT data file format using the YAML
-backend.  Consider the following example, it corresponds to the same
-ICAT content as the XML example above:
+backend.  Consider the example in :numref:`snip-file-icatdata-yaml`,
+it corresponds to the same ICAT content as the XML in
+:numref:`snip-file-icatdata-xml-2`:
 
 .. literalinclude:: ../examples/icatdump-simple.yaml
    :language: yaml
+   :name: snip-file-icatdata-yaml
+   :caption: An example for an ICAT data YAML file
 
 ICAT data YAML files start with a head consisting of a few comment
 lines, followed by one or more YAML documents.  YAML documents are
@@ -302,13 +282,14 @@ file, which may be useful for debugging in case of issues.
 
 Each YAML document defines one chunk of data according to the logical
 structure explained above.  It consists of a mapping having the name
-of entity types in the ICAT schema as keys.  The values are in turn
-mappings that map object ids as key to ICAT object definitions as
-value.  These object ids define local keys that may be used to
-reference the respective object later on.  In the present example, the
-first chunk contains four User objects and three Grouping objects.
-The Groupings include related UserGroups.  The second chunk only
-contains one Investigation, including related investigationGroups.
+of entity types in the ICAT schema (the first letter in lowercase) as
+keys.  The values are in turn mappings that map object ids as key to
+ICAT object definitions as value.  These object ids define local keys
+that may be used to reference the respective object later on.  In the
+present example, the first chunk contains four User objects and three
+Grouping objects.  The Groupings include related UserGroups.  The
+second chunk only contains one Investigation, including related
+investigationGroups.
 
 Each of the ICAT object definitions corresponds to an object in the
 ICAT schema.  It is again a mapping with the object's attribute and
@@ -317,43 +298,21 @@ relations must be provided and reference existing objects, e.g. they
 must either already have existed before starting the ingestion or
 appear in the same or an earlier YAML document in the ICAT data file.
 The values of many-to-one relations are reference keys, either local
-keys defined in the same YAML document or unique keys.
+keys defined in the same YAML document or unique keys.  Unlike the XML
+backend, the YAML backend does not support referencing objects by
+attributes.
 
 The object definitions may include one-to-many relations.  In this
 case, the value for the relation name is a list of object definitions
 for the related objects.  These related objects will be created along
 with the parent in one single cascading call.  In the present example,
 the Grouping objects include their related UserGroup objects.  Note
-that these UserGroups include their relation to the User, but not
-their relation with Grouping.  The latter relationship is implied by
-the parent relation of the object in the file.
-
-As an alternative, in the present example, the UserGroups could have
-been added to the file as separate objects as in:
-
-.. code-block:: YAML
-
-  ---
-  grouping:
-    Grouping_name-investigation=5F10100601=2DST=5Fowner:
-      name: investigation_10100601-ST_owner
-  user:
-    User_name-db=2Fahau:
-      affiliation: Goethe University Frankfurt, Faculty of Philosophy and History
-      email: ahau@example.org
-      familyName: Hau
-      fullName: Arnold Hau
-      givenName: Arnold
-      name: db/ahau
-      orcidId: 0000-0002-3263
-  userGroup:
-    UserGroup_user-(name-db=2Fahau)_grouping-(name-investigation=5F10100601=2DST=5Fowner):
-      grouping: Grouping_name-investigation=5F10100601=2DST=5Fowner
-      user: User_name-db=2Fahau
-  ---
-
-Note that the entries in the mappings have no inherent order.  The
-:ref:`icatingest` script uses a predefined order to read the ICAT
+that these UserGroups include their relation to the User, but not with
+Grouping.  The latter relationship is implied by the parent relation
+of the object in the file.
+
+Note that the entries in the mappings in YAML have no inherent order.
+The :ref:`icatingest` script uses a predefined order to read the ICAT
 entity types in order to make sure that referenced objects are created
 before any object that may reference them.
 

From d8362555fb914b6484d18dab504d9afb23a5a728 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 20 Mar 2024 09:43:47 +0100
Subject: [PATCH 096/102] Update changelog

---
 CHANGES.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 5c6cb3ad..cc44a850 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -32,7 +32,7 @@ Incompatible changes
 Bug fixes and minor changes
 ---------------------------
 
-+ `#141`_, `#142`_: Review documentation.
++ `#141`_, `#142`_, `#150`_: Review documentation.
 
 + `#145`_: Review build tool chain.
 
@@ -45,6 +45,7 @@ Bug fixes and minor changes
 .. _#147: https://github.com/icatproject/python-icat/pull/147
 .. _#148: https://github.com/icatproject/python-icat/issues/148
 .. _#149: https://github.com/icatproject/python-icat/pull/149
+.. _#150: https://github.com/icatproject/python-icat/pull/150
 
 
 .. _changes-1_2_0:

From 0f2ee5ec4c114c56a159e20a25bc804e305dd96b Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 20 Mar 2024 17:16:55 +0100
Subject: [PATCH 097/102] Log (expected) exceptions test_06_ingest.py

---
 tests/test_06_ingest.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py
index e0456d67..1d28f305 100644
--- a/tests/test_06_ingest.py
+++ b/tests/test_06_ingest.py
@@ -4,6 +4,7 @@
 from collections import namedtuple
 import datetime
 import io
+import logging
 import pytest
 pytest.importorskip("lxml")
 from lxml import etree
@@ -14,6 +15,7 @@
 from conftest import (getConfig, gettestdata, icat_version,
                       get_icatdata_schema, testdatadir)
 
+logger = logging.getLogger(__name__)
 
 def get_test_investigation(client):
     query = Query(client, "Investigation", conditions={
@@ -560,9 +562,10 @@ def test_ingest_error_invalid(client, investigation, schemadir, case):
     datasets = []
     for name in case.data:
         datasets.append(client.new("Dataset", name=name))
-    with pytest.raises(icat.InvalidIngestFileError):
+    with pytest.raises(icat.InvalidIngestFileError) as exc:
         reader = IngestReader(client, case.metadata, investigation)
         reader.ingest(datasets, dry_run=True, update_ds=True)
+    logger.info("Raised %s: %s", exc.type.__name__, exc.value)
 
 searcherr_attr_metadata = NamedBytesIO("""<?xml version='1.0' encoding='UTF-8'?>
 <icatingest version="1.0">
@@ -621,9 +624,10 @@ def test_ingest_error_searcherr(client, investigation, schemadir, case):
     datasets = []
     for name in case.data:
         datasets.append(client.new("Dataset", name=name))
-    with pytest.raises(icat.SearchResultError):
+    with pytest.raises(icat.SearchResultError) as exc:
         reader = IngestReader(client, case.metadata, investigation)
         reader.ingest(datasets, dry_run=True, update_ds=True)
+    logger.info("Raised %s: %s", exc.type.__name__, exc.value)
 
 
 customcases = [

From 3a4adb180db6e42366dc7041632dc8ead8ece865 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 20 Mar 2024 17:47:25 +0100
Subject: [PATCH 098/102] Call XMLSchema().assertValid() rather than
 XMLSchema().validate() which raises a more meaningful exception

---
 src/icat/ingest.py      | 6 ++++--
 tests/test_06_ingest.py | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/icat/ingest.py b/src/icat/ingest.py
index 2540535f..0e513526 100644
--- a/src/icat/ingest.py
+++ b/src/icat/ingest.py
@@ -112,8 +112,10 @@ def __init__(self, client, metadata, investigation):
             raise InvalidIngestFileError(e)
         with self.get_xsd(ingest_data).open("rb") as f:
             schema = etree.XMLSchema(etree.parse(f))
-        if not schema.validate(ingest_data):
-            raise InvalidIngestFileError("validation failed")
+        try:
+            schema.assertValid(ingest_data)
+        except etree.DocumentInvalid as exc:
+            raise InvalidIngestFileError("DocumentInvalid: %s" % exc)
         self.add_environment(client, ingest_data)
         with self.get_xslt(ingest_data).open("rb") as f:
             xslt = etree.XSLT(etree.parse(f))
diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py
index 1d28f305..baa0c73c 100644
--- a/tests/test_06_ingest.py
+++ b/tests/test_06_ingest.py
@@ -357,7 +357,7 @@ def test_ingest_schema(client, investigation, schemadir, case):
     reader = IngestReader(client, case.metadata, investigation)
     with get_icatdata_schema().open("rb") as f:
         schema = etree.XMLSchema(etree.parse(f))
-    assert schema.validate(reader.infile)
+    schema.assertValid(reader.infile)
 
 @pytest.mark.parametrize("case", [
     pytest.param(c, id=c.metadata.name, marks=c.marks) for c in cases
@@ -735,7 +735,7 @@ def test_ingest_env(monkeypatch, client, investigation, schemadir, case):
     reader = IngestReader(client, case.metadata, investigation)
     with get_icatdata_schema().open("rb") as f:
         schema = etree.XMLSchema(etree.parse(f))
-    assert schema.validate(reader.infile)
+    schema.assertValid(reader.infile)
     version_elem = reader.infile.xpath("/icatdata/head/apiversion")
     assert version_elem
     assert version_elem[0].text == str(client.apiversion)

From 6393affbd2ffe749496d3773aa229d9c6d78969b Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 20 Mar 2024 18:02:15 +0100
Subject: [PATCH 099/102] Update changelog

---
 CHANGES.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGES.rst b/CHANGES.rst
index cc44a850..9685c9c0 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -32,6 +32,10 @@ Incompatible changes
 Bug fixes and minor changes
 ---------------------------
 
++ `#151`_: Provide a more meaningful error message if the input to
+  :class:`icat.ingest.IngestReader` fails validation against the XML
+  Schema Definition.
+
 + `#141`_, `#142`_, `#150`_: Review documentation.
 
 + `#145`_: Review build tool chain.
@@ -46,6 +50,7 @@ Bug fixes and minor changes
 .. _#148: https://github.com/icatproject/python-icat/issues/148
 .. _#149: https://github.com/icatproject/python-icat/pull/149
 .. _#150: https://github.com/icatproject/python-icat/pull/150
+.. _#151: https://github.com/icatproject/python-icat/pull/151
 
 
 .. _changes-1_2_0:

From 1525e1bf412ac8a906307fe3066870c8c5be722c Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 20 Mar 2024 21:05:26 +0100
Subject: [PATCH 100/102] Last minute changes: - change the project_url to the
 changelog in the package metadata - minor change in the changelog

---
 CHANGES.rst | 6 +-----
 setup.py    | 4 ++--
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 9685c9c0..4e5fd391 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -17,7 +17,7 @@ New features
 + `#148`_, `#149`_: Inject an additional element with environment
   information into the input data in :class:`icat.ingest.IngestReader`.
 
-+ `#146`_, `#147`_: Better error handling in
++ `#146`_, `#147`_, `#151`_: Better error handling in
   :class:`icat.ingest.IngestReader`.
 
 Incompatible changes
@@ -32,10 +32,6 @@ Incompatible changes
 Bug fixes and minor changes
 ---------------------------
 
-+ `#151`_: Provide a more meaningful error message if the input to
-  :class:`icat.ingest.IngestReader` fails validation against the XML
-  Schema Definition.
-
 + `#141`_, `#142`_, `#150`_: Review documentation.
 
 + `#145`_: Review build tool chain.
diff --git a/setup.py b/setup.py
index 9c3b1e17..ee7173e1 100755
--- a/setup.py
+++ b/setup.py
@@ -182,8 +182,8 @@ def run(self):
         Source="https://github.com/icatproject/python-icat/",
         Download=("https://github.com/icatproject/python-icat/releases/%s/"
                   % release),
-        Changes=("https://python-icat.readthedocs.io/en/%s/changelog.html"
-                 % release),
+        Changes=("https://python-icat.readthedocs.io/en/stable"
+                 "/changelog.html#changes-%s" % release.replace('.', '-')),
     ),
     packages = ["icat"],
     package_dir = {"": "src"},

From 8a3b2da9dfd6674896794d4764445978b74278e2 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Thu, 21 Mar 2024 07:26:14 +0100
Subject: [PATCH 101/102] Fix source link in spec file

---
 python-icat.spec | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python-icat.spec b/python-icat.spec
index 50edcbb7..d0a39b81 100644
--- a/python-icat.spec
+++ b/python-icat.spec
@@ -15,7 +15,7 @@ Url:		$url
 Summary:	$description
 License:	Apache-2.0
 Group:		Development/Libraries/Python
-Source:		https://github.com/icatproject/python-icat/releases/latest/download/python-icat-%{version}.tar.gz
+Source:		https://github.com/icatproject/python-icat/releases/download/%{version}/python-icat-%{version}.tar.gz
 BuildRequires:	python%{pyversfx}-base >= 3.4
 BuildRequires:	python%{pyversfx}-setuptools
 BuildRequires:	fdupes

From 45846c1f2fbb9dcb7c649a626ccc36688c7f0f62 Mon Sep 17 00:00:00 2001
From: Rolf Krahl <rolf.krahl@helmholtz-berlin.de>
Date: Wed, 20 Mar 2024 18:21:06 +0100
Subject: [PATCH 102/102] Prepare release 1.3.0

---
 CHANGES.rst     | 4 ++--
 README.rst      | 2 +-
 doc/src/conf.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 4e5fd391..2941612e 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -4,8 +4,8 @@ Changelog
 
 .. _changes-1_3_0:
 
-1.3.0 (not yet released)
-~~~~~~~~~~~~~~~~~~~~~~~~
+1.3.0 (2024-03-21)
+~~~~~~~~~~~~~~~~~~
 
 New features
 ------------
diff --git a/README.rst b/README.rst
index e32e8ce3..54cacb19 100644
--- a/README.rst
+++ b/README.rst
@@ -50,7 +50,7 @@ the reason why the example scripts require PyYAML.
 Copyright and License
 ---------------------
 
-Copyright 2013–2023
+Copyright 2013–2024
 Helmholtz-Zentrum Berlin für Materialien und Energie GmbH
 
 Licensed under the `Apache License`_, Version 2.0 (the "License"); you
diff --git a/doc/src/conf.py b/doc/src/conf.py
index 3b1fe261..bd8fdc6f 100644
--- a/doc/src/conf.py
+++ b/doc/src/conf.py
@@ -20,7 +20,7 @@
 # -- Project information -----------------------------------------------------
 
 project = 'python-icat'
-copyright = ('2013–2023, '
+copyright = ('2013–2024, '
              'Helmholtz-Zentrum Berlin für Materialien und Energie GmbH')
 author = 'Rolf Krahl'