diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index 45ec580c..1926d95b 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -2,7 +2,6 @@ name: Deploy static content to Pages on: - # Runs on pushes targeting the default branch push: branches: - main @@ -23,6 +22,7 @@ concurrency: cancel-in-progress: false jobs: + # Single deploy job since we're just deploying deploy: environment: @@ -57,4 +57,3 @@ jobs: PASSWORD: ${{ secrets.PASSWORD }} run: | python3 ./stac/stac-generator/update_items.py - diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000..c569e9df --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,26 @@ +name: Validate generated stac item in the PR + +on: + pull_request: + branches: + - main + push: + branches: + - '**' + - '!master' + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: install dependencies + run: | + pip install -r ./stac/stac-generator/requirements.txt + - name: validate stac items + + run: | + pytest --tb=no ./stac/stac-generator/test/validator.py diff --git a/CoverageEncoding/CategoryInformation.md b/CoverageEncoding/CategoryInformation.md new file mode 100644 index 00000000..4b671d3d --- /dev/null +++ b/CoverageEncoding/CategoryInformation.md @@ -0,0 +1,9 @@ +# Example codespace for SWE:Category + +Dominant Leafe Type. Details at https://land.copernicus.eu/pan-european/high-resolution-layers/forests/dominant-leaf-type/status-maps/dominant-leaf-type-2018 + +| Label | Value | Color | Color RGBA | +|-------|-------|-------|-----------| +| all non-tree covered areas | 0 | ![grafik](https://github.com/FAIRiCUBE/data-requests/assets/11915304/1d01084f-cd75-4052-8019-9e122270be47) | 240,240,240,255 | +| broadleaved trees | 1 | ![grafik](https://github.com/FAIRiCUBE/data-requests/assets/11915304/1254b337-78df-4560-bc35-b1ac5ceedc5f) | 70,158,74,255 | +| coniferous trees | 2 | ![grafik](https://github.com/FAIRiCUBE/data-requests/assets/11915304/cfdee270-398f-4d3f-901f-236e6ea6258e) | 28,92,36,255 | diff --git a/CoverageEncoding/rangeType.md b/CoverageEncoding/rangeType.md new file mode 100644 index 00000000..5e5ef06b --- /dev/null +++ b/CoverageEncoding/rangeType.md @@ -0,0 +1,122 @@ +# Specification for provision of rangeType information + +OGC CIS uses [SWE Common](https://portal.ogc.org/files/?artifact_id=41157) DataRecord types for the provision of rangeType (description of what the numbers provided in the coverage range actually mean) +To date, this element has often been filled with erroneous values, not taking requirements from the SWE Common specification into account. +In order to ensure correct provision of CIS encoded data, we will describe the requirements here, together with examples. + +## SWE Common +The OGC Sensor Web Enablement Suite (SWE) aims to cover requirements pertaining to measured or observed data. +For CIS encodings, the following requirements classes are of relevance: +- Record Components Package: `The “DataRecord” class is modeled on the definition of ‘Record’ from ISO 11404. In this definition, a record is a composite data type composed of one to many fields, each of which having its own name and type definition.` +- Basic Types and Simple Components Schemas: `XML Schema elements and types defined in the “basic_types.xsd” and “simple_components.xsd” schema files implement all classes defined respectively in the “Basic Types” and “Simple Components” UML packages.` + +From the Basic Types and Simple Components, we rely on the Quantity and Category Elements. +While the Count Element would also be applicable, for the moment we will handle Counts as Quantities with a uom of "1" for "unitless". + +## SWE:DataRecord +SWE:DataRecord, derived from the SWE Common AbstractDataComponent, can be used to group multiple components via the `field` attribute. +Each field describes one band of the described Coverage. + +![grafik](https://github.com/FAIRiCUBE/data-requests/assets/11915304/f943b189-cc85-4ccf-a39e-d85cad4a3e6f) + +The name attribute of the swe:field must be provided, whereby Req 39 stipulates that `Each “field” attribute in a given instance of the “DataRecord” class shall be identified by a name that is unique to this instance.` + +Depending on the nature of the data provided in each band, the correct Simple Component must be provided in the corresponding DataRecord.field. + +## SWE:SimpleComponent Elements +All SWE:SimpleComponent types are derived from the SWE:AbstractSimpleComponent. Elements defined for SWE:AbstractSimpleComponent apply to all SWE:SimpleComponent types, thus are described jointly in this section. + +![grafik](https://github.com/FAIRiCUBE/data-requests/assets/11915304/04029405-8c1d-4b67-a210-7ce1288423c4) + +### definition +`definition` is a mandatory attribute of the SWE:SimpleComponent types. + +Requirement - http://www.opengis.net/spec/SWE/2.0/req/xsd-simple-components/definition-resolvable +`Req 62. The “definition” attribute shall contain a URI that can be resolved to the complete human readable definition of the property that is represented by the data component.` + +Additional information on the definition attribute is provided in clause 7.2.2 as follows: +`The “definition” attribute identifies the property (often an observed property in our context) that the data component represents by using a scoped ame. It should map to a controlled term defined in an (web accessible) dictionary, registry or ontology. Such terms provide the formal textual efinition agreed upon by one or more communities, eventually illustrated by pictures and diagrams as well as additional semantic information such as relationships to units and other concepts, ontological mappings, etc. ` + +As Observed Property registers covering FAIRiCUBE requirements have not been identified, alternative solutions may be required. A simple solution would be the use of the [QUDT Quantity Kind Vocabulary](http://qudt.org/2.1/vocab/quantitykind), e.g. [velocity](https://qudt.org/vocab/quantitykind/Velocity) or [radiance](https://qudt.org/vocab/quantitykind/Radiance) in order to provide basic information on the property being conveyed. When re-providing data from Copernicus Services, a reference to the page describing this dataset can be used, e.g. [High Resolution Layer Dominant Leaf Type](https://land.copernicus.eu/en/products/high-resolution-layer-dominant-leaf-type) +If a reference to an Observed Property register is available, this should be used. + +When working from a data request, the `Definition` field in the Bands section provides the required content. +If this is missing, and there is only one band, the text from the dataset `Documentation Link` or `Data Source` may be used. + +### label +The `label` element is a short descriptive human readable label describing what property the component represents. + +When working from a data request, the `cell components` field in the Bands section provides the required content. +If this is missing, and there is only one band, the text from the dataset `Title` may be used. + +### description +The `description` element is a longer more descriptive human readable description describing what property the component represents. + +When working from a data request, the `Description` field in the Bands section provides the required content. +If this is missing, and there is only one band, the text from the dataset Description may be used. + +### nilValues +The `nilValues` element is defined on the SWE:AbstractSimpleComponentType. If nil values are used in the coverage, they must be provided here, together with a reason. The swe:NilValuesType must be used. + +When working from a data request, the `Null values` field in the Bands section provides the required content. + +## SWE:Quantity +The SWE:Quantity type adds a mandatory `Unit of Measure` element of type swe:UnitReference + +When working from a data request, the `Null values` field in the Bands section provides the required content. + +Requirement - http://www.opengis.net/spec/SWE/2.0/req/xsd-simple-components/ucum-code-used +Req 64. The UCUM code for a unit of measure shall be used as the value of the “code” XML attribute whenever it can be constructed using the UCUM 1.8 specification. Otherwise the “href” XML attribute shall be used to reference an external unit definition. + +Note: UCUM 1.8 has been deprecated, current version is [UCUM 2.1](https://ucum.org/ucum). Thus, we will use UCUM 2.1 in FAIRiCUBE. + +The following shows an example of a Quantity rangeType taken from the Demography dataset. Note that ideally we would use the swe:Count type for this purpose. + +``` + + + + + Population total + The 2021 census contained a major innovation with the presentation of key census topics on an EU-wide 1 km² grid. + + + 65535 + + + + + + + +``` + +## SWE:Category +The SWE:Category type adds a `codeSpace` element of type swe:UnitReference. This element utilizes xlink:href to reference an external dictionary, taxonomy or ontology representing the code space. This element is implicitely mandatory, as Req 25 stipulates the alternative provision of a `constraint` with a list of allowed values; as the constraint alternative does not allow for additional semantics on the individual entries, the `codeSpace` approach is prefered. + +However, the `codeSpace` approach requires the availability of a URI that resolves to information on the categorization used in the data, often not the case. While a machine readable approach would be preferable, in lieu of identified standards in this area, a simple [GitHub page](https://github.com/FAIRiCUBE/data-requests/blob/main/CoverageEncoding/CategoryInformation.md) can suffice. + +When working from a data request, the `Category List` field in the Bands section provides the required content. + +The following shows an example of a Category rangeType taken from the DominantLeafType dataset + +``` + + + + + Dominant Leaf Type + The HRL Forest 2018 primary status layer Dominant Leaf Type (DLT) has been created in frame of the tender “EEA/IDM/R0/18/009 - High Resolution land cover characteristics for the 2018 reference year” as part of the EEA Copernicus Land Monitoring Service (CLMS, https://land.copernicus.eu). The DLT raster product provides a basic land cover classification with 3 thematic classes (all non-tree covered areas / broadleaved / coniferous) at 10m spatial resolution and covers the full of EEA39 area. More about the High Resolution Layers and CLMS datasets can be found at https://land.copernicus.eu/pan-european. + + + 240 + 255 + + + + + + + +``` + diff --git a/README.md b/README.md index c40dba0f..6d8460e9 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ Contents of this space: - [How to Get Data Added](https://github.com/FAIRiCUBE/data-requests/wiki/How-to-Add-Data) - [Choosing the right pixel type](https://github.com/FAIRiCUBE/data-requests/wiki/Choosing-the-Right-Pixel-Type) +- [Details on the Coverage range type, as inherited from SWE Common](https://github.com/FAIRiCUBE/data-requests/blob/main/CoverageEncoding/rangeType.md) - [Connecting catalog with datacubes](https://github.com/FAIRiCUBE/data-requests/wiki/Connection-Catalog-Datacubes) - [Finding data ingested, datacube access how-to](https://github.com/FAIRiCUBE/data-requests/wiki) - [Use case specific modeling and access](https://github.com/FAIRiCUBE/data-requests/wiki/Data-Overview) @@ -15,6 +16,8 @@ Contents of this space: As data ingest is tightly connected with metadata management, use of data, etc., consider also these related spaces: +- [metadata-editor WebGUI](https://catalog-editor.eoxhub.fairicube.eu/): to provide and edit metadata to be shown in the [data catalog (STAC-fastapi)](https://catalog.eoxhub.fairicube.eu/?.language=en) + - [resource-metadata](https://github.com/FAIRiCUBE/resource-metadata): in addition to the issues providing metadata for resources, also used to discuss technical details on resource metadata - [Fairicube Hub](https://github.com/FAIRiCUBE/FAIRiCUBE-Hub-issue-tracker): for general FAIRiCUBE topics diff --git a/encoding-examples/readme.md b/encoding-examples/readme.md index 90804739..ae369f6d 100644 --- a/encoding-examples/readme.md +++ b/encoding-examples/readme.md @@ -1,3 +1,3 @@ # Coverage encoding examples -Note that the errors were corrected in November 2023, the examples should be updated by their creator to serve as a good basis. +On this page, we provide encoding examples linked from issues. While some of these examples may be outdated, they serve to illustrate the problems described in the issues. diff --git a/stac/stac-generator/requirements.txt b/stac/stac-generator/requirements.txt index ae6c237e..5df015df 100644 --- a/stac/stac-generator/requirements.txt +++ b/stac/stac-generator/requirements.txt @@ -1,5 +1,5 @@ pystac -gql +pytest shapely stactools requests-toolbelt \ No newline at end of file diff --git a/stac/stac-generator/test/validator.py b/stac/stac-generator/test/validator.py new file mode 100644 index 00000000..0a3b0c7b --- /dev/null +++ b/stac/stac-generator/test/validator.py @@ -0,0 +1,156 @@ +import pystac +import pytest +import os +import json + +from typing import Any + + +def validate_item(item: pystac.item.Item): + item_is_EDC: bool = False + # for now exempt edc items from the inventory required fields + for link in item.links: + if link.rel == "about" and link.target.startswith("https://collections.eurodatacube.com"): + item_is_EDC = True + break + + properties: dict[str, Any] = item.properties + + if not item_is_EDC: + # validate Data Source + assert "dataSource" in properties.keys(), "No dataSource in the stac item" + assert isinstance(properties["dataSource"], str), "dataSource must be a string" + assert len(properties["dataSource"]) > 0, "dataSource string must not be empty" + + # validate Owner/Organisation + assert "providers" in properties, "No dataSource in the stac item" + assert isinstance(properties["providers"], list), "providers must be a list" + for provider in properties["providers"]: + assert "organization" in provider.keys() or "name" in provider.keys() + if "organization" in provider.keys(): + assert isinstance(provider["organization"], str), "provider's organization must be a string" + assert len(provider["organization"]) > 0, " provider's organization must not be empty" + if "name" in provider.keys(): + assert isinstance(provider["name"], str), "provider name must be a string" + assert len(provider["name"]) > 0, "provider name string must not be empty" + + # validate Horizontal section + assert isinstance(item.bbox, list), "bbox must be a list" + assert len(item.bbox) == 4, "bbox must contain a 4 coordinates" + assert isinstance(item.geometry, dict),"geometry must be an object" + + # Resolution of Horizontal Axis + assert isinstance(item.properties["cube:dimensions"], dict), "No dimensions in the stac item" + assert "x" in item.properties["cube:dimensions"].keys(), "No x dimension in the stac item" + assert "y" in item.properties["cube:dimensions"].keys(), "No y dimension in the stac item" + x = item.properties["cube:dimensions"]["x"] + y = item.properties["cube:dimensions"]["y"] + assert "step" in x.keys() and x["step"] is not None, "No step in the x dimension" + assert isinstance(float(x["step"]), float), "x step must be float" + assert "step" in y.keys()and y["step"] is not None, "No step in the x dimension" + assert isinstance(float(y["step"]), float), "y step must be float" + + # Units of Measurement + assert "unit" in x.keys(), "No unit in x dimensions" + assert isinstance(x["unit"], str), "x dimension unit must be a string" + assert "unit" in y.keys(), "No unit in y dimensions" + assert isinstance(y["unit"], str), "y dimension unit must be a string" + + # Horizontal CRS + assert "reference_system" in x.keys(), "No reference_system in x dimensions" + assert isinstance(x["reference_system"], str), "x dimension reference_system must be a string" + assert "reference_system" in y.keys(), "No reference_system in y dimensions" + assert isinstance(y["reference_system"], str), "x dimension reference_system must be a string" + + # Temporal + assert "t" in item.properties["cube:dimensions"].keys() or "time" in item.properties["cube:dimensions"].keys() + time = dict() + if "t" in item.properties["cube:dimensions"].keys() or "time" in item.properties["cube:dimensions"].keys(): + time = item.properties["cube:dimensions"]["t"] + else: + time = item.properties["cube:dimensions"]["time"] + + # Time (Begin/End) + assert "extent" in time.keys() or "values" in time.keys() + # Resolution of Time Axis (Interval) + if "values" in time.keys(): + assert "step" in time.keys(), "No step in time dimensions" + assert isinstance(time["step"], str), "time's step must be a string" + # Unit of measure + assert isinstance(time["unit"], str), "time's unit must be a string" + + + # Range Data validation + assert "raster:bands" in item.properties.keys() or "bands" in item.properties.keys() + + #TODO figure out a way to validate edc items , the ones with "bands" + + if "raster:bands" in item.properties.keys(): + bands = item.properties["raster:bands"] + for band in bands: + # Range Data Type + assert "data_type" in band.keys(), "No data_type in band" + assert isinstance(band["data_type"], str), "band's data_type must be a string" + assert len(band["data_type"]) > 0, "band's data_type string must not be empty" + + # Range Definition + assert "definition" in band.keys(), "No definition in band" + assert isinstance(band["definition"], str), "band's definition must be a string" + assert len(band["definition"]) > 0, "band's definition string must not be empty" + + # Range Description + assert "description" in band.keys(), "No description in band" + assert isinstance(band["description"], str), "band's description must be a string" + assert len(band["description"]) > 0, "band's description string must not be empty" + + # Null values + + assert "nodata" in band.keys() and band["nodata"]is not None, "No nodata in band" + assert isinstance(float(band["nodata"]), float), "band's nodata must be float" + + + + # validate ID + assert isinstance(item.id, str), "id must be a string" + + assert len(item.id) > 0, "item id string must not be empty" + + + # validate Description + assert "description" in properties.keys(), "No description in the stac item" + assert isinstance(properties["description"], str), "description must be a string" + assert len(properties["description"]) > 0, "description string must not be empty" + + + # Legal - License + assert "license" in item.properties.keys(), "No license in the stac item" + assert isinstance(item.properties["license"], str), "license must be a string" + + #TODO: keywords must be a list + # Keywords + assert "keywords" in item.properties.keys(), "No keywords in the stac item" + keywords = item.properties["keywords"] + assert isinstance(keywords, list) or (isinstance(keywords, str) and isinstance(keywords.split(","), list)), "keywords is not a valid list" + assert len(item.properties["keywords"]) > 0, "keywords must not be empty" + +@pytest.mark.parametrize("dir", [ + os.path.join('stac_dist', f) for f in os.listdir( + os.path.join('stac_dist')) if os.path.isdir(os.path.join('stac_dist', f))]) +def test_items(dir): + + items = [ + f for f in os.listdir(dir) if not f.endswith( + 'catalog.json') and f.endswith('.json')] + + for item in items: + item_path = os.path.join(dir, item) + + stac_item = pystac.Item.from_file(item_path) + + validate_item(stac_item) + + + +# In addition, some fields can be filled with defaults, e.g. +# Metadata Standard: STAC +# Provision Date: Date being provided \ No newline at end of file diff --git a/stac_dist/OSM_OpenStreetMap/OSM_OpenStreetMap.json b/stac_dist/OSM_OpenStreetMap/OSM_OpenStreetMap.json deleted file mode 100644 index 042634a9..00000000 --- a/stac_dist/OSM_OpenStreetMap/OSM_OpenStreetMap.json +++ /dev/null @@ -1,112 +0,0 @@ -{ - "type": "Feature", - "stac_version": "2.2.0", - "id": "OSM_OpenStreetMap", - "properties": { - "license": "ODbL-1.0", - "description": "OSM provides 2D maps", - "providers": [ - { - "name": "Open Street Map", - "organization": "OSM", - "description": "OSM provides 2D maps", - "url": "https://www.openstreetmap.org", - "project_purpose": "OSM provides 2D-level data for buildings" - } - ], - "dataSource": "", - "cube:dimensions": { - "x": { - "axis": "x", - "extent": [ - -180, - 180 - ], - "reference_system": "4326", - "type": "spatial" - }, - "y": { - "axis": "y", - "extent": [ - -90, - 90 - ], - "reference_system": "4326", - "type": "spatial" - }, - "time": { - "extent": [], - "type": "temporal", - "values": [ - "1900-01-01T00:00:00Z", - "2999-01-01T00:00:00Z" - ], - "step": "" - }, - "z": {} - }, - "raster:bands": [], - "title": "OSM_OpenStreetMap", - "datasource_type": "vector", - "area_cover": "World", - "start_datetime": "1900-01-01T00:00:00Z", - "end_datetime": "2999-01-01T00:00:00Z", - "use_case_S4E": "two", - "use_case_WER": "two", - "use_case_NHM": "one", - "use_case_NILU": "one", - "platform": "other" - }, - "geometry": { - "type": "Polygon", - "coordinates": [ - [ - [ - -180, - -90 - ], - [ - -180, - 90 - ], - [ - 180, - 90 - ], - [ - 180, - -90 - ], - [ - -180, - -90 - ] - ] - ] - }, - "links": [ - { - "rel": "root", - "href": "../catalog.json", - "type": "application/json", - "title": "data-access catalog" - }, - { - "rel": "parent", - "href": "../catalog.json", - "type": "application/json", - "title": "data-access catalog" - } - ], - "assets": {}, - "bbox": [ - -180, - -90, - 180, - 90 - ], - "stac_extensions": [ - "https://stac-extensions.github.io/raster/v1.1.0/schema.json", - "https://stac-extensions.github.io/datacube/v2.0.0/schema.json" - ] -} \ No newline at end of file diff --git a/stac_dist/eu_demography/eu_demography.json b/stac_dist/eu_demography/eu_demography.json deleted file mode 100644 index d3589c82..00000000 --- a/stac_dist/eu_demography/eu_demography.json +++ /dev/null @@ -1,125 +0,0 @@ -{ - "type": "Feature", - "stac_version": "2.2.0", - "id": "eu_demography", - "properties": { - "description": "European Demography on a 1km grid\n2018 - eu_demography_2018_index\n2021 - eu_demography_2021_index\nvirtual coverage - eu_demography_virtual_coverage_index", - "providers": [], - "dataSource": "", - "cube:dimensions": { - "x": { - "axis": "x", - "extent": [ - 944000, - 6505000 - ], - "reference_system": "3035", - "type": "spatial", - "unit_of_measure": "meter", - "step": "1000" - }, - "y": { - "axis": "y", - "extent": [ - 942000, - 5414000 - ], - "reference_system": "3035", - "type": "spatial", - "unit_of_measure": "meter", - "step": "1000" - }, - "time": { - "extent": [ - "2018-01-01T00:00Z", - "2020-12-31T23:59:59Z" - ], - "type": "temporal", - "step": "P3Y" - }, - "z": {} - }, - "datetime": "2021-02-17T15:13:53Z", - "raster:bands": [], - "title": "EU Demography", - "datasource_type": "eu_demography_2018_index", - "keywords": "These data shall not be used for commercial purposes (even within the user's organisation) without prior approval by the owner. Users may not: - disseminate the dataset to clients outside their own organisation - sell the dataset \u2013 in whole or in part \u2013 to parties outside their organisation - use it for any other commercial purpose.", - "start_datetime": "2018-01-01T00:00Z", - "end_datetime": "2020-12-31T23:59:59Z", - "personalData": "https://ec.europa.eu/eurostat/web/main/about/policies/copyright", - "Provenance_name": "https://ec.europa.eu/eurostat/web/gisco/geodata/reference-data/population-distribution-demography/geostat", - "use_case_NHM": "two", - "platform": "rasdaman" - }, - "geometry": { - "type": "Polygon", - "coordinates": [ - [ - [ - -54.896608380235485, - 24.805697480738544 - ], - [ - -54.896608380235485, - 64.33852817800617 - ], - [ - 59.87641607130845, - 64.33852817800617 - ], - [ - 59.87641607130845, - 24.805697480738544 - ], - [ - -54.896608380235485, - 24.805697480738544 - ] - ] - ] - }, - "links": [ - { - "rel": "root", - "href": "../catalog.json", - "type": "application/json", - "title": "data-access catalog" - }, - { - "rel": "parent", - "href": "../catalog.json", - "type": "application/json", - "title": "data-access catalog" - }, - { - "href": "https://fairicube.rasdaman.com/rasdaman/ows?&SERVICE=WCS&VERSION=2.1.0&REQUEST=DescribeCoverage&COVERAGEID=eu_demography&outputType=GeneralGridCoverage", - "rel": "about", - "type": "text/xml", - "title": "Link to the coverage description in XML" - }, - { - "href": "https://fairicube.rasdaman.com/rasdaman-dashboard/?layers=eu_demography", - "rel": "service", - "type": "text/html", - "title": "Link to the web application to Access, process gridded data" - } - ], - "assets": { - "data": { - "href": "https://fairicube.rasdaman.com/rasdaman/ows?SERVICE=WCS&VERSION=2.0.1&REQUEST=GetCoverage&COVERAGEID=eu_demography&FORMAT=application/netcdf", - "roles": [ - "data" - ] - } - }, - "bbox": [ - -54.896608380235485, - 24.805697480738544, - 59.87641607130845, - 64.33852817800617 - ], - "stac_extensions": [ - "https://stac-extensions.github.io/raster/v1.1.0/schema.json", - "https://stac-extensions.github.io/datacube/v2.0.0/schema.json" - ] -} \ No newline at end of file