From f361dc6c88361c2d2cfbbbaf204ea41cf6d5f14f Mon Sep 17 00:00:00 2001 From: HOC3426 Date: Tue, 30 May 2023 15:08:52 -0500 Subject: [PATCH 1/3] Create script.py This script adds MSNET IDs to INSPIRE records. --- scripts/msnet_add_id/script.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 scripts/msnet_add_id/script.py diff --git a/scripts/msnet_add_id/script.py b/scripts/msnet_add_id/script.py new file mode 100644 index 0000000..2b5518c --- /dev/null +++ b/scripts/msnet_add_id/script.py @@ -0,0 +1,34 @@ +import requests + +from inspirehep.curation.search_check_do import SearchCheckDo + +URL = 'https://cernbox.cern...msnet_add_id.txt' +MSNET_IDS = requests.get(URL) + +ELEMENT = 'external_system_identifiers' + +class AddMsnetIds(SearchCheckDo): + """Add MSNET IDs to INSPIRE records.""" + + query = f'tc:p -{ELEMENT}.schema:MSNET' + + @staticmethod + def check(record, logger, state): + if record.get_value('control_number') not in MSNET_IDS: + return True + for schema in record.get_value(f'{ELEMENT}.schema', []): + if schema == 'MSNET': + return True + return False + + @staticmethod + def do(record, logger, state): + record.setdefault(ELEMENT, []).append( + { + "value": MSNET_IDS[record.get_value('control_number')], + "schema": "MSNET", + } + ) + + +AddMsnetIds() From 83328ea15f75b69dfc1d40fce978b4448165fe22 Mon Sep 17 00:00:00 2001 From: hoc3426 Date: Tue, 30 May 2023 20:10:04 +0000 Subject: [PATCH 2/3] Auto-format python code --- scripts/msnet_add_id/script.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/scripts/msnet_add_id/script.py b/scripts/msnet_add_id/script.py index 2b5518c..64cff0e 100644 --- a/scripts/msnet_add_id/script.py +++ b/scripts/msnet_add_id/script.py @@ -2,22 +2,23 @@ from inspirehep.curation.search_check_do import SearchCheckDo -URL = 'https://cernbox.cern...msnet_add_id.txt' +URL = "https://cernbox.cern...msnet_add_id.txt" MSNET_IDS = requests.get(URL) -ELEMENT = 'external_system_identifiers' +ELEMENT = "external_system_identifiers" + class AddMsnetIds(SearchCheckDo): """Add MSNET IDs to INSPIRE records.""" - query = f'tc:p -{ELEMENT}.schema:MSNET' + query = f"tc:p -{ELEMENT}.schema:MSNET" @staticmethod def check(record, logger, state): - if record.get_value('control_number') not in MSNET_IDS: + if record.get_value("control_number") not in MSNET_IDS: return True - for schema in record.get_value(f'{ELEMENT}.schema', []): - if schema == 'MSNET': + for schema in record.get_value(f"{ELEMENT}.schema", []): + if schema == "MSNET": return True return False @@ -25,7 +26,7 @@ def check(record, logger, state): def do(record, logger, state): record.setdefault(ELEMENT, []).append( { - "value": MSNET_IDS[record.get_value('control_number')], + "value": MSNET_IDS[record.get_value("control_number")], "schema": "MSNET", } ) From 4aec570b481b572d054e3c6b9a26f52b51a9dcaa Mon Sep 17 00:00:00 2001 From: Micha Moskovic Date: Thu, 28 Sep 2023 09:03:40 +0200 Subject: [PATCH 3/3] Fix script to add MSNET ids to papers --- .../{msnet_add_id => msnet-add-id}/script.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) rename scripts/{msnet_add_id => msnet-add-id}/script.py (63%) diff --git a/scripts/msnet_add_id/script.py b/scripts/msnet-add-id/script.py similarity index 63% rename from scripts/msnet_add_id/script.py rename to scripts/msnet-add-id/script.py index 64cff0e..71bd480 100644 --- a/scripts/msnet_add_id/script.py +++ b/scripts/msnet-add-id/script.py @@ -2,8 +2,11 @@ from inspirehep.curation.search_check_do import SearchCheckDo -URL = "https://cernbox.cern...msnet_add_id.txt" -MSNET_IDS = requests.get(URL) +URL = ( + "https://cernbox.cern.ch/remote.php/dav/public-files/" + "DgV3O0I8D8haXMZ/msnet_add_id.json" +) +MSNET_IDS = requests.get(URL).json() ELEMENT = "external_system_identifiers" @@ -15,12 +18,10 @@ class AddMsnetIds(SearchCheckDo): @staticmethod def check(record, logger, state): - if record.get_value("control_number") not in MSNET_IDS: - return True - for schema in record.get_value(f"{ELEMENT}.schema", []): - if schema == "MSNET": - return True - return False + return record["control_number"] in MSNET_IDS and not any( + id_["schema"] == "MSNET" + for id_ in record.get("external_system_identifiers", []) + ) @staticmethod def do(record, logger, state):