Skip to content

Commit

Permalink
Merge branch 'fix/firebase-to-xml' into migrate-cioos-form
Browse files Browse the repository at this point in the history
  • Loading branch information
JessyBarrette committed Nov 1, 2024
2 parents f9dba74 + 4c6f7d0 commit a204681
Show file tree
Hide file tree
Showing 8 changed files with 555 additions and 97 deletions.
1 change: 1 addition & 0 deletions firebase_to_xml/.python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.12
55 changes: 34 additions & 21 deletions firebase_to_xml/firebase_to_xml/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,23 @@
"""
Command line interface to part of firebase_to_xml
"""

import argparse
import os
import traceback
from pathlib import Path

import yaml
from dotenv import load_dotenv
from get_records_from_firebase import get_records_from_firebase
from loguru import logger
from metadata_xml.template_functions import metadata_to_xml

from firebase_to_xml.get_records_from_firebase import get_records_from_firebase
from firebase_to_xml.record_json_to_yaml import record_json_to_yaml


def parse_status(status: str):
"""Return a list version fo their status selection"""

if "," in status:
return status.split(",")

return [status]
from record_json_to_yaml import record_json_to_yaml
from tqdm import tqdm


def get_filename(record):
"""Creates a filename by combinig the title and UUID """
"""Creates a filename by combinig the title and UUID"""
name = record["title"][record["language"]][0:30] + "_" + record["identifier"][0:5]
char_list = [
character if character.isalnum() else "_" for character in name.strip().lower()
Expand All @@ -48,9 +42,7 @@ def main():
parser.add_argument(
"--yaml", action="store_true", help="Whether to output yaml file as well as xml"
)
parser.add_argument(
"--region", required=True, help="Eg pacific/stlaurent/atlantic"
)
parser.add_argument("--region", required=True, help="Eg pacific/stlaurent/atlantic")
parser.add_argument(
"--status",
default="published",
Expand All @@ -63,7 +55,18 @@ def main():
],
)
parser.add_argument("--record_url", required=False)

parser.add_argument(
"--database_url",
default=os.getenv("DATABASE_URL"),
required=False,
help="Firebase database URL (default: %(default)s)",
)
parser.add_argument(
"--encoding",
default="utf-8",
required=False,
help="Encoding of the output files",
)
args = vars(parser.parse_args())

region = args["region"]
Expand All @@ -73,10 +76,15 @@ def main():

firebase_auth_key_file = args["key"]
also_save_yaml = args["yaml"]
encoding = args["encoding"]

# get list of records from Firebase
record_list = get_records_from_firebase(
region, firebase_auth_key_file, record_url, record_status
region=args["region"],
firebase_auth_key_file=args["key"],
record_url=record_url,
record_status=args["status"].split(","),
database_url=args["database_url"],
)

# translate each record to YAML and then to XML
Expand Down Expand Up @@ -107,9 +115,11 @@ def main():

# output yaml
if also_save_yaml:
filename = f"{xml_directory}/{name}.yaml"
file = open(filename, "w")
file.write(yaml.dump(record_yaml, allow_unicode=True, sort_keys=False))
yaml_file = output_directory / f"{filename}.yaml"
yaml_file.write_text(
yaml.dump(record_yaml, allow_unicode=True, sort_keys=False),
encoding=encoding,
)

# render xml template and write to file
xml = metadata_to_xml(record_yaml)
Expand All @@ -121,6 +131,9 @@ def main():
file.write(xml)
print("Wrote " + file.name)

xml_file = output_directory / f"{filename}.xml"
xml_file.write_text(xml, encoding=encoding)

except Exception:
print(traceback.format_exc())

Expand Down
21 changes: 11 additions & 10 deletions firebase_to_xml/firebase_to_xml/get_records_from_firebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,16 @@

from google.auth.transport.requests import AuthorizedSession
from google.oauth2 import service_account
from loguru import logger


def get_records_from_firebase(
region, firebase_auth_key_file, record_url, record_status, firebase_auth_key_json=None
region: str,
firebase_auth_key_file: str,
record_url: str,
record_status: list,
database_url: str,
firebase_auth_key_json: str = None,
):
"""
Returns list of records from firebase for this region,
Expand Down Expand Up @@ -43,23 +49,18 @@ def get_records_from_firebase(
records = []

if record_url:
response = authed_session.get(
f"https://cioos-metadata-form.firebaseio.com/{record_url}.json"
)
response = authed_session.get(f"{database_url}{record_url}.json")
body = json.loads(response.text)
records.append(body)
return records

else:
response = authed_session.get(
f"https://cioos-metadata-form.firebaseio.com/{region}/users.json"
)
response = authed_session.get(f"{database_url}{region}/users.json")
body = json.loads(response.text)

# Parse response
if not body or type(body) != dict :
print("Region",region,"not found?")
# print(response.content)
if not body or not isinstance(body, dict):
logger.warning("Region", region, "not found?")
sys.exit()

for users_tree in body.values():
Expand Down
109 changes: 70 additions & 39 deletions firebase_to_xml/firebase_to_xml/record_json_to_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,45 +6,52 @@
"""

import json
from firebase_to_xml.scrubbers import scrub_dict, scrub_keys, remove_nones
import os

from firebase_to_xml.scrubbers import remove_nones, scrub_dict, scrub_keys

dir = os.path.dirname(os.path.realpath(__file__))


def get_licenses():
with open(dir + '/resources/licenses.json') as f:
with open(dir + "/resources/licenses.json") as f:
return json.load(f)


def get_eov_translations():
with open(dir + '/resources/eov.json') as f:
eovs= json.load(f)
translation={}
with open(dir + "/resources/eov.json") as f:
eovs = json.load(f)
translation = {}
for eov in eovs:
translation[eov['value']]=eov['label FR']
translation[eov["value"]] = eov["label FR"]
return translation


def get_epsg():
with open(dir + '/resources/epsg.json') as f:
with open(dir + "/resources/epsg.json") as f:
epsgDict = {}
epsgJson = json.load(f)
if epsgJson:
epsgDict = {str(x['Code']): x for x in epsgJson}
epsgDict = {str(x["Code"]): x for x in epsgJson}
return epsgDict

licenses=get_licenses()
eov_translations=get_eov_translations()

licenses = get_licenses()
eov_translations = get_eov_translations()
epsg = get_epsg()


def eovs_to_fr(eovs_en):
""" Translate a list of EOVs in english to a list in french"""
return [eov_translations.get(eov,"") for eov in eovs_en if eov]
"""Translate a list of EOVs in english to a list in french"""
return [eov_translations.get(eov, "") for eov in eovs_en if eov]


def verify_translation(verified, message):
if not verified:
return message
return ""


def strip_keywords(keywords):
"""Strips whitespace from each keyword in either language"""
stripped = {
Expand All @@ -61,33 +68,40 @@ def date_from_datetime_str(datetime_str):
return None
return (datetime_str or "")[:10]


def fix_lat_long_polygon(polygon):
"""Change lat,long to long, lat, which is what is expected in the XML"""
if not polygon:
return ""
fixed = []
cleanPolygon = polygon.replace(", ",",")
cleanPolygon = polygon.replace(", ", ",")
coords = cleanPolygon.split(" ")
for coord in coords:
[lat, long] = coord.split(",")
fixed.append(",".join([long, lat]))
return " ".join(fixed)


def format_taxa(taxa):
taxaKeywords = []
if isinstance(taxa, str):
taxa = [taxa]
for t in taxa:
taxaKeywords = taxaKeywords + ",".join(filter(None, (
t.get("kingdom"),
t.get("phylum"),
t.get("class"),
t.get("order"),
t.get("family"),
t.get("genus"),
t.get("species")
))).split(",")

taxaKeywords = taxaKeywords + ",".join(
filter(
None,
(
t.get("kingdom"),
t.get("phylum"),
t.get("class"),
t.get("order"),
t.get("family"),
t.get("genus"),
t.get("species"),
),
)
).split(",")

return taxaKeywords


Expand All @@ -102,16 +116,14 @@ def record_json_to_yaml(record):
base_url = "https://cioos-siooc.github.io/metadata-entry-form#"
full_url = f"{base_url}/{language}/{region}/{user_id}/{record_id}"


polygon = record.get("map", {}).get("polygon", "")

record_yaml = {
"metadata": {
"naming_authority": "ca.cioos",
"identifier": record.get("identifier"),
"language": record.get("language"),
"maintenance_note": "Generated from "
+ full_url,
"maintenance_note": "Generated from " + full_url,
"use_constraints": {
"limitations": record.get("limitations", "None"),
"licence": licenses.get(
Expand Down Expand Up @@ -139,13 +151,19 @@ def record_json_to_yaml(record):
else "",
"polygon": fix_lat_long_polygon(polygon),
"vertical": [
0 if record.get("noVerticalExtent") else float(
record.get("verticalExtentMin")),
0 if record.get("noVerticalExtent") else float(
record.get("verticalExtentMax")),
0
if record.get("noVerticalExtent")
else float(record.get("verticalExtentMin")),
0
if record.get("noVerticalExtent")
else float(record.get("verticalExtentMax")),
],
"vertical_positive": "heightPositive" if record.get("noVerticalExtent") else record.get("verticalExtentDirection"),
"vertical_epsg": epsg.get("5829") if record.get("noVerticalExtent") else epsg.get(record.get("verticalExtentEPSG")),
"vertical_positive": "heightPositive"
if record.get("noVerticalExtent")
else record.get("verticalExtentDirection"),
"vertical_epsg": epsg.get("5829")
if record.get("noVerticalExtent")
else epsg.get(record.get("verticalExtentEPSG")),
"description": record["map"].get("description"),
"descriptionIdentifier": record["map"].get("descriptionIdentifier"),
},
Expand All @@ -161,8 +179,14 @@ def record_json_to_yaml(record):
},
"keywords": {
"default": strip_keywords(record.get("keywords", {"en": [], "fr": []})),
"eov": {"en": record.get("eov",[]), "fr": eovs_to_fr(record.get("eov",[]))},
"taxa": {"en": format_taxa(record.get("taxa",[])), "fr": format_taxa(record.get("taxa",[]))}
"eov": {
"en": record.get("eov", []),
"fr": eovs_to_fr(record.get("eov", [])),
},
"taxa": {
"en": format_taxa(record.get("taxa", [])),
"fr": format_taxa(record.get("taxa", [])),
},
},
"temporal_begin": record.get("dateStart"),
"temporal_end": record.get("dateEnd"),
Expand All @@ -184,7 +208,14 @@ def record_json_to_yaml(record):
"ror": contact.get("orgRor"),
},
"individual": {
"name": ", ".join(remove_nones([contact.get("lastName") or None, contact.get("givenNames") or None])),
"name": ", ".join(
remove_nones(
[
contact.get("lastName") or None,
contact.get("givenNames") or None,
]
)
),
"position": contact.get("indPosition"),
"email": contact.get("indEmail"),
"orcid": contact.get("indOrcid"),
Expand All @@ -203,14 +234,14 @@ def record_json_to_yaml(record):
],
}

if record.get("noTaxa") and record_yaml.get("identification",{}).get("keywords"):
if record.get("noTaxa") and record_yaml.get("identification", {}).get("keywords"):
record_yaml["identification"]["keywords"].pop("taxa", None)

if record.get("noPlatform"):
record_yaml["instruments"] = record.get("instruments")
else:
instrumentsList = record.get("instruments",[])
platformList = record.get("platforms",[])
instrumentsList = record.get("instruments", [])
platformList = record.get("platforms", [])
# If platforms has only one element, add it to the platform dict and add all instruments as a key
if len(platformList) == 1:
record["platforms"][0]["instruments"] = instrumentsList
Expand Down
7 changes: 4 additions & 3 deletions firebase_to_xml/firebase_to_xml/scrubbers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def scrub_dict(d_in):
val = scrub_dict(val)
if isinstance(val, list):
val = scrub_list(val)
if val not in (u"", None, {}):
if val not in ("", None, {}):
new_dict[key] = val
return new_dict

Expand All @@ -41,5 +41,6 @@ def scrub_keys(d_in):
scrubbed[k] = d_in[k]
return scrubbed

def remove_nones(l):
return list(filter(lambda item: item is not None, l))

def remove_nones(items):
return list(filter(lambda item: item is not None, items))
Loading

0 comments on commit a204681

Please sign in to comment.