From b450b7b4291bf24b69f4b09ebd4fa4d7ce4f2581 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 9 Oct 2024 15:32:04 +0200 Subject: [PATCH 1/5] Add two notes relevant to the environment setting in IngestReader to the reference documentation --- src/icat/ingest.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/icat/ingest.py b/src/icat/ingest.py index 21c4097..9504afc 100644 --- a/src/icat/ingest.py +++ b/src/icat/ingest.py @@ -97,6 +97,11 @@ class IngestReader(XMLDumpFileReader): Dataset_complete = "false" """Value to prescribe in the `complete` attribute of datasets. + .. note:: + The value for this class attribute is subject to change in + version 2.0. You might want to override it in order to pin it + to a value that is suitable for you. + .. versionadded:: 1.5.0 """ DatasetType_name = "raw" @@ -198,6 +203,20 @@ def get_environment(self, client): Subclasses may override this method to control the attributes set in the environment. + .. note:: + If you override this method, it is advisable to call the + inherited method from the parent class and augment the + result. This avoids inadvertently dropping environment + settings added in future versions. E.g. do something + like the following in your subclass: + + .. code-block:: python + + def get_environment(self, client): + env = super().get_environment(client) + env['mykey'] = 'value' + return env + :param client: the client object being used by this IngestReader. :type client: :class:`icat.client.Client` From 895073592f974e69ee49015a780ab26d7ba892af Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 9 Oct 2024 16:35:12 +0200 Subject: [PATCH 2/5] Add checks to the tests in test_06_ingest.py to verify that the prescribed values are actually set as expected --- tests/test_06_ingest.py | 48 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py index f823106..76c19e9 100644 --- a/tests/test_06_ingest.py +++ b/tests/test_06_ingest.py @@ -104,6 +104,10 @@ class MyIngestReader(IngestReader): metadata = gettestdata("metadata-4.4-inl.xml"), checks = { "testingest_inl_1": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + ("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d", + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "Dy01Cp02 at 2.7 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -120,6 +124,10 @@ class MyIngestReader(IngestReader): 2.74103), ], "testingest_inl_2": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + ("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d", + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "Dy01Cp02 at 5.1 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -143,6 +151,10 @@ class MyIngestReader(IngestReader): metadata = gettestdata("metadata-5.0-inl.xml"), checks = { "testingest_inl5_1": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + ("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d", + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "Dy01Cp02 at 2.7 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -167,6 +179,10 @@ class MyIngestReader(IngestReader): 2.74103), ], "testingest_inl5_2": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + ("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d", + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "Dy01Cp02 at 5.1 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -201,6 +217,10 @@ class MyIngestReader(IngestReader): metadata = gettestdata("metadata-4.4-sep.xml"), checks = { "testingest_sep_1": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + ("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d", + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "Dy01Cp02 at 2.7 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -217,6 +237,10 @@ class MyIngestReader(IngestReader): 2.74103), ], "testingest_sep_2": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + ("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d", + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "Dy01Cp02 at 5.1 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -240,6 +264,10 @@ class MyIngestReader(IngestReader): metadata = gettestdata("metadata-5.0-sep.xml"), checks = { "testingest_sep5_1": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + ("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d", + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "Dy01Cp02 at 2.7 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -264,6 +292,10 @@ class MyIngestReader(IngestReader): 2.74103), ], "testingest_sep5_2": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + ("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d", + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "Dy01Cp02 at 5.1 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -299,6 +331,10 @@ class MyIngestReader(IngestReader): metadata = gettestdata("metadata-sample.xml"), checks = { "testingest_sample_1": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + ("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d", + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "ab3465 at 2.7 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -313,6 +349,10 @@ class MyIngestReader(IngestReader): "ab3465"), ], "testingest_sample_2": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + ("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d", + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "ab3465 at 5.1 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -327,6 +367,10 @@ class MyIngestReader(IngestReader): "ab3465"), ], "testingest_sample_3": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + ("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d", + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "ab3466 at 2.7 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -341,6 +385,10 @@ class MyIngestReader(IngestReader): "ab3466"), ], "testingest_sample_4": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + ("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d", + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "reference"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", From b59bc2cde85c055d1ab02a4a1e40069dd2985312 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Fri, 11 Oct 2024 10:52:41 +0200 Subject: [PATCH 3/5] Add a test overriding the IngestReader class attributes to set the prescribed values in the XSLT transformation --- tests/test_06_ingest.py | 57 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py index 76c19e9..33f6ba3 100644 --- a/tests/test_06_ingest.py +++ b/tests/test_06_ingest.py @@ -692,6 +692,63 @@ def test_ingest_error_searcherr(client, investigation, schemadir, case): logger.info("Raised %s: %s", exc.type.__name__, exc.value) +classattr_metadata = NamedBytesIO(""" + + + 2024-10-11T10:51:26+02:00 + metadata-writer 0.27a + + + + testingest_classattr_1 + Auxiliary data + 2022-02-03T15:40:12+01:00 + 2022-02-03T17:04:22+01:00 + + + +""".encode("utf8"), "classattr_metadata") +classattr_cases = [ + Case( + data = ["testingest_classattr_1"], + metadata = classattr_metadata, + checks = { + "testingest_classattr_1": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + True), + ("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d", + "other"), + ], + }, + marks = (), + ), +] +@pytest.mark.parametrize("case", [ + pytest.param(c, id=c.metadata.name, marks=c.marks) for c in classattr_cases +]) +def test_ingest_classattr(monkeypatch, client, investigation, schemadir, case): + """Test overriding prescribed values set in IngestReader class attributes. + """ + monkeypatch.setattr(IngestReader, "Dataset_complete", "true") + monkeypatch.setattr(IngestReader, "DatasetType_name", "other") + datasets = [] + for name in case.data: + datasets.append(client.new("Dataset", name=name)) + reader = IngestReader(client, case.metadata, investigation) + reader.ingest(datasets, dry_run=True, update_ds=True) + for ds in datasets: + ds.create() + reader.ingest(datasets) + for name in case.checks.keys(): + query = Query(client, "Dataset", conditions={ + "name": "= '%s'" % name, + "investigation.id": "= %d" % investigation.id, + }) + ds = client.assertedSearch(query)[0] + for query, res in case.checks[name]: + assert client.assertedSearch(query % ds.id)[0] == res + + customcases = [ Case( data = ["testingest_custom_icatingest_1"], From d1208f75e60527fc7c5e76029ea1d2dc5113f0aa Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Fri, 11 Oct 2024 11:20:20 +0200 Subject: [PATCH 4/5] Extend test_ingest_env in test_06_ingest.py to also add a custom environment attribute --- tests/data/ingest-env.xslt | 4 +++- tests/test_06_ingest.py | 39 ++++++++++++++++++++++++++------------ 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/tests/data/ingest-env.xslt b/tests/data/ingest-env.xslt index 8e0eb4e..26b858d 100644 --- a/tests/data/ingest-env.xslt +++ b/tests/data/ingest-env.xslt @@ -18,7 +18,9 @@ - ingest-env.xslt + + + diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py index 33f6ba3..9001642 100644 --- a/tests/test_06_ingest.py +++ b/tests/test_06_ingest.py @@ -69,10 +69,17 @@ def schemadir(monkeypatch): monkeypatch.setattr(IngestReader, "SchemaDir", testdatadir) -class CapturingIngestReader(IngestReader): - """Modified version of Ingest reader that captures ingest_data in - add_environment(). +class EnvironmentIngestReader(IngestReader): + """Modified version of IngestReader + - Allow custom environment settings to be included. + - Capture the ingest data after injection of the environment in an + attribute. """ + _add_env = dict() + def get_environment(self, client): + env = super().get_environment(client) + env.update(self._add_env) + return env def add_environment(self, client, ingest_data): super().add_environment(client, ingest_data) self._ingest_data = ingest_data @@ -414,7 +421,7 @@ def test_ingest_schema(client, investigation, schemadir, case): datasets = [] for name in case.data: datasets.append(client.new("Dataset", name=name)) - reader = CapturingIngestReader(client, case.metadata, investigation) + reader = EnvironmentIngestReader(client, case.metadata, investigation) print_xml(reader._ingest_data) print_xml(reader.infile) with get_icatdata_schema().open("rb") as f: @@ -839,22 +846,30 @@ def test_custom_ingest(client, investigation, samples, schemadir, case): def test_ingest_env(monkeypatch, client, investigation, schemadir, case): """Test using the _environment element. - Applying a custom XSLT that extracts an attribute from the - _environment element that is injected by IngestReader into the - input data and puts that values into the head element of the - transformed input. This is to test that adding the _environment - element works and it is in principle possible to make use of the - values in the XSLT. + Add a custom attribute to the _environment that is injected by + IngestReader into the input data. Apply a custom XSLT that + extracts attributes from the _environment element and puts the + values into the head element of the transformed input. This is to + test that adding the _environment element works and it is in + principle possible to make use of the values in the XSLT. """ - monkeypatch.setattr(IngestReader, + generator = "test_ingest_env (python-icat %s)" % icat.__version__ + monkeypatch.setattr(EnvironmentIngestReader, + "_add_env", dict(generator=generator)) + monkeypatch.setattr(EnvironmentIngestReader, "XSLT_Map", dict(icatingest="ingest-env.xslt")) datasets = [] for name in case.data: datasets.append(client.new("Dataset", name=name)) - reader = IngestReader(client, case.metadata, investigation) + reader = EnvironmentIngestReader(client, case.metadata, investigation) + print_xml(reader._ingest_data) + print_xml(reader.infile) with get_icatdata_schema().open("rb") as f: schema = etree.XMLSchema(etree.parse(f)) schema.assertValid(reader.infile) version_elem = reader.infile.xpath("/icatdata/head/apiversion") assert version_elem assert version_elem[0].text == str(client.apiversion) + generator_elem = reader.infile.xpath("/icatdata/head/generator") + assert generator_elem + assert generator_elem[0].text == generator From 6b521f5f51afb39d7a27d430c4be5aec194dd943 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Fri, 11 Oct 2024 11:44:52 +0200 Subject: [PATCH 5/5] Update changelog --- CHANGES.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index f7bed82..d879da3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,7 +10,7 @@ Changelog New features ------------ -+ `#160`_, `#161`_: Add class attributes to ++ `#160`_, `#161`_, `#163`_: Add class attributes to :class:`icat.ingest.IngestReader` to make some prescribed values in the transformation to ICAT data file format configurable. @@ -22,6 +22,7 @@ Bug fixes and minor changes .. _#160: https://github.com/icatproject/python-icat/issues/160 .. _#161: https://github.com/icatproject/python-icat/pull/161 .. _#162: https://github.com/icatproject/python-icat/pull/162 +.. _#163: https://github.com/icatproject/python-icat/pull/163 .. _changes-1_4_0: