Skip to content

Commit

Permalink
add levels on chart and black .
Browse files Browse the repository at this point in the history
  • Loading branch information
JessyBarrette committed Feb 13, 2024
1 parent b773e42 commit deca8d8
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 15 deletions.
40 changes: 26 additions & 14 deletions hakai_ckan_records_checks/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

import click
import pandas as pd
import plotly.express as px
from jinja2 import Environment, FileSystemLoader
from loguru import logger
from tqdm import tqdm
import plotly.express as px

from hakai_ckan_records_checks import hakai
from hakai_ckan_records_checks.ckan import CKAN
Expand Down Expand Up @@ -115,10 +115,10 @@ def main(ckan_url, api_key, output, max_workers, log_level, cache):
results = pickle.load(file)
else:
results = review_records(ckan, max_workers)
if cache:
with open(CACHE_FILE, "wb") as file:
logger.info("Caching results")
pickle.dump(results, file)

with open(CACHE_FILE, "wb") as file:
logger.info("Caching results")
pickle.dump(results, file)

if not output:
return
Expand All @@ -127,22 +127,33 @@ def main(ckan_url, api_key, output, max_workers, log_level, cache):
results["catalog_summary"] = format_summary(results["catalog_summary"])

# Combine summary and issues
combined_issues = results['test_results'].merge(results['catalog_summary'], left_on='record_id', right_on='id').drop(columns=['id'])
combined_issues = (
results["test_results"]
.merge(results["catalog_summary"], left_on="record_id", right_on="id")
.drop(columns=["id"])
)
standardized_issues = combined_issues.copy()
standardized_issues['message'] = standardized_issues['message'].str.replace('resources\[[0-9]+\]','resources[...]',regex=True)
standardized_issues["message"] = standardized_issues["message"].str.replace(
"resources\[[0-9]+\]", "resources[...]", regex=True
)

# Generate figures
pie_chart = px.pie(standardized_issues, names='message',title=f'Hakai Records Issues Distribution: {len(standardized_issues)} issues detected')
pie_chart.update_traces(textposition='inside')
pie_chart.update_layout(uniformtext_minsize=12, uniformtext_mode='hide')
pie_chart = px.pie(
standardized_issues,
names="message",
title=f"Hakai Records Issues Distribution: {len(standardized_issues)} issues detected",
facet_col="level",
)
pie_chart.update_traces(textposition="inside")
pie_chart.update_layout(uniformtext_minsize=12, uniformtext_mode="hide")
pie_chart_html = pie_chart.to_html(full_html=False)

# save results
Path(output).mkdir(parents=True, exist_ok=True)
environment.get_template("index.html.jinja").stream(
catalog_summary=format_summary(results["catalog_summary"]),
issues_pie_chart=pie_chart_html,
issues_table = combined_issues,
issues_table=combined_issues,
time=pd.Timestamp.utcnow(),
ckan_url=ckan_url,
).dump(f"{output}/index.html")
Expand All @@ -156,9 +167,10 @@ def main(ckan_url, api_key, output, max_workers, log_level, cache):
time=pd.Timestamp.utcnow(),
).dump(f"{output}/issues/{record_id}.html")

# save results
results['catalog_summary'].to_csv(f"{output}/catalog_summary.csv", index=False)
results['test_results'].to_csv(f"{output}/test_results.csv", index=False)
# save results
results["catalog_summary"].to_csv(f"{output}/catalog_summary.csv", index=False)
results["test_results"].to_csv(f"{output}/test_results.csv", index=False)


if __name__ == "__main__":
main()
14 changes: 13 additions & 1 deletion hakai_ckan_records_checks/hakai.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,18 @@ def _test(condition, level, message):
f"Invalid distributor organisation-name={record['distributor'][0]['organisation-name']}",
)

# Review funder
funders = [
item for item in record["cited-responsible-party"] if "funder" in item["role"]
]
_test(len(funders) > 0, "WARNING", "No funder")
if funders:
_test(
[funder["organisation-name"] == "Hakai Institute" for funder in funders],
"WARNING",
f"'Hakai Institute' isn't listed as funder in record",
)

# Review publisher

# Review resources
Expand All @@ -61,7 +73,7 @@ def _test(condition, level, message):
_test(resource["url"] != "", "ERROR", "Empty resource url")
_test(resource["format"] != "", "ERROR", "Empty resource format")
_test(
resource["format"] in ["HTML","ERDDAP","OBIS"],
resource["format"] in ["HTML", "ERDDAP", "OBIS"],
"ERROR",
f"Invalid resource format: resources[{index}].format={resource['format']}",
)
Expand Down

0 comments on commit deca8d8

Please sign in to comment.