Skip to content

Commit

Permalink
geojsontojson: Report on inconsistent networks
Browse files Browse the repository at this point in the history
#26

tests/fixtures/geojson_to_json/phases_inconsistent*:
Added new inconsistent phrase in contract in network
Make sure the phase_id is reported in meta output
  • Loading branch information
odscjames committed Nov 25, 2022
1 parent e6cf42c commit cad4463
Show file tree
Hide file tree
Showing 17 changed files with 427 additions and 53 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- GeoJSON to JSON:
- Include name in organisation and phase references
- Process funders/organisations in phases correctly, including spotting inconsistent funders/organisations
- Report on inconsistent networks

## [0.4.0] - 2022-11-09

Expand Down
125 changes: 91 additions & 34 deletions libcoveofds/geojson.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ def __init__(self):
self._inconsistent_organisation_ids_by_network_id: defaultdict = defaultdict(
set
)
self._inconsistent_network_ids_seen: set = set()

def process_data(self, nodes_data: dict, spans_data: dict) -> None:
"""Process data. Results are stored on object to get with other methods."""
Expand All @@ -223,32 +224,86 @@ def _process_network(self, geojson_feature_node_or_span: dict) -> None:
and "network" in geojson_feature_node_or_span["properties"]
):
network = geojson_feature_node_or_span["properties"]["network"]
if network.get("id"):
# TODO check for inconsistent data here!
self._networks[network.get("id")] = copy.deepcopy(network)
self._networks[network.get("id")]["nodes"] = []
self._networks[network.get("id")]["spans"] = []
self._networks[network.get("id")]["phases"] = {}
self._networks[network.get("id")]["organisations"] = {}

# Sort references to phases in contracts
if "contracts" in self._networks[network.get("id")] and isinstance(
self._networks[network.get("id")]["contracts"], list
):
for contract in self._networks[network.get("id")]["contracts"]:
if "relatedPhases" in contract and isinstance(
contract["relatedPhases"], list
if isinstance(network, dict):
network_id = network.get("id")
if isinstance(network_id, str) and network_id:

# TODO nodes/spans/phases/organisations should not be set in network - check this and warn if so

# Is data already seen?
if network_id in self._networks:
# Is it inconsistent with what we have seen before?
if json_diff_function(
self._networks[network_id]["network_data_original"], network
):
# record error for later
self._inconsistent_network_ids_seen.add(network_id)

# Check references to phases in contracts are consistent with what we have seen before.
# This will give a more specific error on the phase_id.
# (However, 2 errors will be recorded - one for network_id and one for phrase_id!
# Can we make that tidier?
# Suspect as we work to make errors more informative that will happen anyway #TODO)
if "contracts" in self._networks[network_id][
"network_data_output"
] and isinstance(
self._networks[network_id]["network_data_output"][
"contracts"
],
list,
):
for contract in self._networks[network_id][
"network_data_output"
]["contracts"]:
if "relatedPhases" in contract and isinstance(
contract["relatedPhases"], list
):
for phase_reference in contract[
"relatedPhases"
]:
self._process_phase(
network_id, phase_reference
)
else:

# Not seen this before!

# Store it
self._networks[network_id] = {
"network_data_original": copy.deepcopy(network),
"network_data_output": copy.deepcopy(network),
"nodes": [],
"spans": [],
"phases": {},
"organisations": {},
}

# Sort references to phases in contracts
# (Must do after storing, as this writes data to the stored network)
if "contracts" in self._networks[network_id][
"network_data_output"
] and isinstance(
self._networks[network_id]["network_data_output"][
"contracts"
],
list,
):
out: list = []
for phase_reference in contract["relatedPhases"]:
phase_data = self._process_phase(
network.get("id"), phase_reference
)
if phase_data:
out.append(phase_data)
else:
out.append(phase_reference)
contract["relatedPhases"] = out
for contract in self._networks[network_id][
"network_data_output"
]["contracts"]:
if "relatedPhases" in contract and isinstance(
contract["relatedPhases"], list
):
out: list = []
for phase_reference in contract["relatedPhases"]:
phase_data = self._process_phase(
network_id, phase_reference
)
if phase_data:
out.append(phase_data)
else:
out.append(phase_reference)
contract["relatedPhases"] = out

def _process_node(self, geojson_feature_node: dict) -> None:
node = copy.deepcopy(geojson_feature_node.get("properties", {}))
Expand Down Expand Up @@ -380,16 +435,17 @@ def get_json(self) -> dict:
out: dict = {"networks": []}
for network in self._networks.values():
# We are going to change network, so we need to take a copy
network = copy.deepcopy(network)
# Arrays have minItems: 1 set - so if no content, remove the empty array
for key in ["nodes", "spans", "phases", "organisations", "contracts"]:
if key in network and not network[key]:
del network[key]
# Sometimes we store these things in dicts - turn to lists
network_data = copy.deepcopy(network["network_data_output"])
# Copy other data we have built to network_data
# Arrays have minItems: 1 set - so only add if we actually have content
for key in ["nodes", "spans"]:
if network[key]:
network_data[key] = network[key]
for key in ["phases", "organisations"]:
if key in network:
network[key] = list(network[key].values())
out["networks"].append(network)
if network[key]:
network_data[key] = list(network[key].values())
# build return
out["networks"].append(network_data)
return out

def get_meta_json(self) -> dict:
Expand All @@ -410,5 +466,6 @@ def get_meta_json(self) -> dict:
k: {"organisation_ids": sorted(list(v))}
for k, v in self._inconsistent_organisation_ids_by_network_id.items()
}
out["inconsistent_network_ids"] = list(self._inconsistent_network_ids_seen)
# return
return out
1 change: 1 addition & 0 deletions make_expected_test_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ libcoveofds gjtoj --outputmetafilename tests/fixtures/geojson_to_json/organisati
libcoveofds gjtoj --outputmetafilename tests/fixtures/geojson_to_json/phase_funders_1.meta.expected.json tests/fixtures/geojson_to_json/phase_funders_1.nodes.geo.json tests/fixtures/geojson_to_json/phase_funders_1.spans.geo.json tests/fixtures/geojson_to_json/phase_funders_1.expected.json
libcoveofds gjtoj --outputmetafilename tests/fixtures/geojson_to_json/phases_inconsistent_1.meta.expected.json tests/fixtures/geojson_to_json/phases_inconsistent_1.nodes.geo.json tests/fixtures/geojson_to_json/phases_inconsistent_1.spans.geo.json tests/fixtures/geojson_to_json/phases_inconsistent_1.expected.json
libcoveofds gjtoj --outputmetafilename tests/fixtures/geojson_to_json/organisations_inconsistent_1.meta.expected.json tests/fixtures/geojson_to_json/organisations_inconsistent_1.nodes.geo.json tests/fixtures/geojson_to_json/organisations_inconsistent_1.spans.geo.json tests/fixtures/geojson_to_json/organisations_inconsistent_1.expected.json
libcoveofds gjtoj --outputmetafilename tests/fixtures/geojson_to_json/network_inconsistent_1.meta.expected.json tests/fixtures/geojson_to_json/network_inconsistent_1.nodes.geo.json tests/fixtures/geojson_to_json/network_inconsistent_1.spans.geo.json tests/fixtures/geojson_to_json/network_inconsistent_1.expected.json

# JSON to GeoJSON
libcoveofds jtogj --outputmetafilename tests/fixtures/json_to_geojson/basic_1.expected.meta.json tests/fixtures/json_to_geojson/basic_1.json tests/fixtures/json_to_geojson/basic_1.expected.nodes.geo.json tests/fixtures/json_to_geojson/basic_1.expected.spans.geo.json
Expand Down
3 changes: 2 additions & 1 deletion tests/fixtures/geojson_to_json/basic_1.meta.expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,6 @@
}
},
"inconsistent_phases_by_network_id": {},
"inconsistent_organisations_by_network_id": {}
"inconsistent_organisations_by_network_id": {},
"inconsistent_network_ids": []
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
{
"networks": [
{
"id": "a096d627-72e1-4f9b-b129-951b1737bff4",
"name": "Ghana Fibre Network",
"nodes": [
{
"id": "1",
"name": "Accra",
"location": {
"type": "Point",
"coordinates": [
-0.174,
5.625
]
}
},
{
"id": "2",
"name": "Kumasi",
"status": "operational",
"location": {
"type": "Point",
"coordinates": [
-1.628,
6.711
]
}
}
],
"spans": [
{
"id": "1",
"name": "Accra to Kumasi",
"start": "1",
"end": "2",
"route": {
"type": "LineString",
"coordinates": [
[
-0.173,
5.626
],
[
-0.178,
5.807
],
[
-0.112,
5.971
],
[
-0.211,
5.963
],
[
-0.321,
6.17
],
[
-0.488,
6.29
],
[
-0.56,
6.421
],
[
-0.752,
6.533
],
[
-0.867,
6.607
],
[
-1.101,
6.585
],
[
-1.304,
6.623
],
[
-1.461,
6.727
],
[
-1.628,
6.713
]
]
}
}
]
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
{
"output_field_coverage": {
"/networks": {
"count": 1
},
"/networks/id": {
"count": 1
},
"/networks/name": {
"count": 1
},
"/networks/nodes": {
"count": 1
},
"/networks/nodes/id": {
"count": 2
},
"/networks/nodes/name": {
"count": 2
},
"/networks/nodes/location": {
"count": 2
},
"/networks/nodes/location/type": {
"count": 2
},
"/networks/nodes/location/coordinates": {
"count": 2
},
"/networks/nodes/status": {
"count": 1
},
"/networks/spans": {
"count": 1
},
"/networks/spans/id": {
"count": 1
},
"/networks/spans/name": {
"count": 1
},
"/networks/spans/start": {
"count": 1
},
"/networks/spans/end": {
"count": 1
},
"/networks/spans/route": {
"count": 1
},
"/networks/spans/route/type": {
"count": 1
},
"/networks/spans/route/coordinates": {
"count": 1
}
},
"inconsistent_phases_by_network_id": {},
"inconsistent_organisations_by_network_id": {},
"inconsistent_network_ids": [
"a096d627-72e1-4f9b-b129-951b1737bff4"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [
-0.174,
5.625
]
},
"properties": {
"id": "1",
"name": "Accra",
"network": {
"id": "a096d627-72e1-4f9b-b129-951b1737bff4",
"name": "Ghana Fibre Network"
}
}
},
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [
-1.628,
6.711
]
},
"properties": {
"id": "2",
"name": "Kumasi",
"status": "operational",
"network": {
"id": "a096d627-72e1-4f9b-b129-951b1737bff4",
"name": "Ghana Fibre Network"
}
}
}
]
}
Loading

0 comments on commit cad4463

Please sign in to comment.