forked from slub/efre-lod-elasticsearch-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'v0.6-isql' into Imperator
Merging of big update v0.6 into current Main Branch
- Loading branch information
Showing
13 changed files
with
2,415 additions
and
885 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
#!/usr/bin/env python | ||
# coding: utf-8 | ||
|
||
# Copyright 2021 by Leipzig University Library, http://ub.uni-leipzig.de | ||
# JP Kanter, <[email protected]> | ||
# | ||
# This file is part of the Solr2Triplestore Tool. | ||
# | ||
# This program is free software: you can redistribute | ||
# it and/or modify it under the terms of the GNU General Public | ||
# License as published by the Free Software Foundation, either | ||
# version 3 of the License, or (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will | ||
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with Solr2Triplestore Tool. If not, see <http://www.gnu.org/licenses/>. | ||
# | ||
# @license GPL-3.0-only <https://www.gnu.org/licenses/gpl-3.0.en.html> | ||
|
||
""" | ||
I have read that its the pythonic way to introduce your own set of errors and exceptions to be more | ||
specific about what has happened, i am a bit late to the party in that regard, only adding this many | ||
months after i first started working on this projects, this makes the whole code unfortunatly to a | ||
jumpled mess of standard exceptions and my own that i later created | ||
""" | ||
|
||
|
||
class WorkOrderInconsitencyError(Exception): | ||
def __repr__(self): | ||
return "A change is inconsistent with the logic of a work order, like updating a status to a lower level than the previos one" | ||
|
||
|
||
class WorkOrderError(Exception): | ||
def __repr__(self): | ||
return "Generic error with the given work order" | ||
|
||
|
||
class WorkOrderTypeError(Exception): | ||
def __repr__(self): | ||
return "For incorrect file types in work order parameters" | ||
|
||
|
||
class ParameterError(Exception): | ||
def __repr__(self): | ||
return "The given parameter lead to an outcome that did not work" | ||
|
||
|
||
class OperationalError(Exception): | ||
def __repr__(self): | ||
return "Something that stops the overall operation from proceeding" | ||
|
||
|
||
class RequestError(ConnectionError): | ||
def __repr__(self): | ||
return "For requests that might fail for this or that reason within the bellows of the script" | ||
|
||
|
||
class ParsingError(Exception): | ||
def __repr__(self): | ||
return "an Exception that occurs when trying to interpret or parse some kind of data" | ||
|
||
|
||
class Unexpected(Exception): | ||
def __repr__(self): | ||
return "an exception that should have not been happened but was prepared in case seomthing weird happened" | ||
|
||
|
||
class MandatoryError(Exception): | ||
def __repr__(self): | ||
return "a field that was classified as mandatory was not present, therefore failing the entire chain" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,252 @@ | ||
{ | ||
"CreateOrder": | ||
{ | ||
"type": "str", | ||
"help": "Creates a blank order without executing it", | ||
"metavar": ["order_name", "fetch_method", "processing_type", "insert_method"], | ||
"nargs": 4 | ||
}, | ||
"CreateOrderPara": | ||
{ | ||
"action": "store_true", | ||
"help": "Creates a blank order with executing it with provided variables: --order_name, --fetch, --process and --insert" | ||
}, | ||
"order_name": | ||
{ | ||
"type": "str", | ||
"help": "name for a new order" | ||
}, | ||
"fetch": | ||
{ | ||
"type": "str", | ||
"help": "Type of fetch mechanismn for data: 'solr' or 'file'" | ||
}, | ||
"process": | ||
{ | ||
"type": "str", | ||
"help": "Processing type, either 'insert' or 'update'" | ||
}, | ||
"insert": | ||
{ | ||
"type": "str", | ||
"help": "method of inserting into triplestore: 'isql', 'obdc' or 'sparql'" | ||
}, | ||
"FetchSolrOrder": | ||
{ | ||
"type": "str", | ||
"help": "Executes a fetch order provided, if the work order file has that current status", | ||
"metavar": ["work_file", "solr_url", "query", "total_rows", "chunk_size", "spcht_descriptor", "save_folder"], | ||
"nargs": 7 | ||
}, | ||
"FetchSolrOrderPara": | ||
{ | ||
"action": "store_true", | ||
"help": "Executes a solr fetch work order, needs parameters --work_order_file, --solr_url, --query, --total_rows, --chunk_size, --spcht_descriptor, --save_folder" | ||
}, | ||
"work_order_file": | ||
{ | ||
"type": "str", | ||
"help": "Path to work order file" | ||
}, | ||
"solr_url": | ||
{ | ||
"type": "str", | ||
"help": "Url to a solr query endpoint" | ||
}, | ||
"query": | ||
{ | ||
"type": "str", | ||
"help": "Query for solr ['*' fetches everything]", | ||
"default": "*" | ||
}, | ||
"total_rows": | ||
{ | ||
"type": "int", | ||
"help": "Number of rows that are fetched in total from an external datasource", | ||
"default": 25000 | ||
}, | ||
"chunk_size": | ||
{ | ||
"type": "int", | ||
"help": "Size of a single chunk, determines the number of queries", | ||
"default": 5000 | ||
}, | ||
"max_age": | ||
{ | ||
"type": "int", | ||
"help": "Maximum age of a given entry in the source database, used for update operations as filter" | ||
}, | ||
"spcht_descriptor": | ||
{ | ||
"type": "str", | ||
"help": "Path to a spcht descriptor file, usually ends with '.spcht.json'" | ||
}, | ||
"save_folder": | ||
{ | ||
"type": "str", | ||
"help": "The folder were downloaded data is to be saved, will be referenced in work order", | ||
"default": "./" | ||
}, | ||
"SpchtProcessing": | ||
{ | ||
"type": "str", | ||
"help": "Processes the provided work order file", | ||
"metavar": ["work_file", "graph/subject", "spcht_descriptor"], | ||
"nargs": 3 | ||
}, | ||
"SpchtProcessingMulti": | ||
{ | ||
"type": "str", | ||
"help": "Processes the provided work order file in multiple threads", | ||
"metavar": ["work_file", "graph/subject", "spcht_descriptor", "processes"], | ||
"nargs": 4 | ||
}, | ||
"SpchtProcessingPara": | ||
{ | ||
"action": "store_true", | ||
"help": "Processes the given work_order file with parameters, needs: --work_order_file, --graph, --spcht_descriptor" | ||
}, | ||
"SpchtProcessingMultiPara": | ||
{ | ||
"action": "store_true", | ||
"help": "Procesesses the given order with multiple processes, needs: --work_order_file, --graph, --spcht_descriptor, --processes" | ||
}, | ||
"graph": | ||
{ | ||
"type": "str", | ||
"help": "URI of the subject part the graph gets mapped to in the <subject> <predicate> <object> triple" | ||
}, | ||
"processes": | ||
{ | ||
"type": "int", | ||
"help": "Number of parallel processes used, should be <= cpu_count", | ||
"default": 1 | ||
}, | ||
"InsertISQLOrder": | ||
{ | ||
"type": "str", | ||
"help": "Inserts the given work order via the isql interface of virtuoso, copies files in a temporary folder where virtuoso has access, needs credentials", | ||
"metavar": ["work_file", "named_graph", "isql_path", "user", "password", "virt_folder"], | ||
"nargs": 6 | ||
}, | ||
"InsertISQLOrderPara": | ||
{ | ||
"action": "store_true", | ||
"help": "Inserts the given order via the isql interace of virtuoso, copies files in a temporary folder, needs paramters: --isql_path, --user, --password, --named_graph, --virt_folder" | ||
}, | ||
"named_graph": | ||
{ | ||
"type": "str", | ||
"help": "In a quadstore this is the graph the processed triples are saved upon, might be different from the triple subject" | ||
}, | ||
"isql_path": | ||
{ | ||
"type": "str", | ||
"help": "File path to the OpenLink Virtuoso isql executable, usually 'isql-v' or 'isql-v.exe" | ||
}, | ||
"virt_folder": | ||
{ | ||
"type": "str", | ||
"help": "When inserting data via iSQL the ingested files must lay in a directory whitelisted by Virtuoso, usually this is /tmp/ in Linux systems, but can be anywhere if configured so. Script must have write access there." | ||
}, | ||
"user": | ||
{ | ||
"type": "str", | ||
"help": "Name of an authorized user for the desired operation" | ||
}, | ||
"password": | ||
{ | ||
"type": "str", | ||
"help": "Plaintext password for the defined --user, caution advised when saving cleartext passwords in config files or bash history" | ||
}, | ||
"isql_port": | ||
{ | ||
"type": "int", | ||
"help": "When using iSQL the corresponding database usually resides on port 1111, this parameter allows to adjust for changes in that regard", | ||
"default": 1111 | ||
}, | ||
"HandleWorkOrder": | ||
{ | ||
"type": "str", | ||
"help": "Takes any one work order and processes it to the next step, needs all parameters the corresponding steps requires", | ||
"metavar": ["work_order_file"], | ||
"nargs": 1 | ||
}, | ||
"FullOrder": | ||
{ | ||
"type": "str", | ||
"help": "Creates a new order with assigned methods, immediatly starts with --Parameters [or --config] to fullfill the created order", | ||
"metavar": ["work_order_name", "fetch", "type", "method"], | ||
"nargs": 4 | ||
}, | ||
"sparql_endpoint": | ||
{ | ||
"type": "str", | ||
"help": "URL to a sparql endpoint of any one triplestore, usually ends with /sparql or /sparql-auth for authenticated user" | ||
}, | ||
"CheckWorkOrder": | ||
{ | ||
"type": "str", | ||
"help": "Checks the status of any given work order and displays it in the console", | ||
"metavar": ["work_order_file"], | ||
"nargs": 1 | ||
}, | ||
"config": | ||
{ | ||
"type": "str", | ||
"help": "loads the defined config file, must be a json file containing a flat dictionary", | ||
"metavar": ["path/to/config.json"], | ||
"short": "-c" | ||
}, | ||
"UpdateData": | ||
{ | ||
"help": "Special form of full process, fetches data with a filter, deletes old data and inserts new ones", | ||
"action": "store_true" | ||
}, | ||
"environment": | ||
{ | ||
"action": "store_true", | ||
"help": "Prints all variables" | ||
}, | ||
"force": | ||
{ | ||
"action": "store_true", | ||
"help": "Ignores security checks in work order execution like only proceeding when the right meta status is present" | ||
}, | ||
"CleanUp": | ||
{ | ||
"type": "str", | ||
"help": "Deletes all temporary files of a given work order.", | ||
"metavar": ["work_order_file"] | ||
}, | ||
"CompileSpcht": | ||
{ | ||
"type": "str", | ||
"help": "Inserts all includes of a spcht descriptor in one file, resolving all include relations", | ||
"metavar": ["SPCHT_FILE", "FILEPATH"], | ||
"nargs": 2 | ||
}, | ||
"CheckFields": | ||
{ | ||
"type": "str", | ||
"help": "Loads a spcht file and displays all dictionary keys used in that descriptor", | ||
"metavar": ["SPCHT_FILE"] | ||
}, | ||
"debug": | ||
{ | ||
"action": "store_true", | ||
"help": "Sets the debug flag for CheckFields, CheckSpcht, CompileSpcht" | ||
}, | ||
"CheckSpcht": | ||
{ | ||
"help": "Tries to load and validate the specified Spcht JSON File", | ||
"type": "str", | ||
"metavar": ["SPCHT FILE"] | ||
}, | ||
"ContinueOrder": | ||
{ | ||
"help": "Continues a previously paused or interrupted work order, needs parameters", | ||
"type": "str", | ||
"metavar": ["WORK ORDER FILE"] | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,18 @@ | ||
{ | ||
"errors": { | ||
"urls": "Die Sammlung der Serveradressen konnte nicht gefunden werden. Vorgang wird abgebrochen", | ||
"settings": "Die sonstigen Einstellungen konnten nicht gefunden werden. Vorgang wird abgebrochen", | ||
"nofile": "Die angegebene Datei konnte nicht gefunden werden", | ||
"graph_parser": "Der Graph Parser konnte den Eingang nicht interpretieren", | ||
"json_parser": "Beim Interpretieren des JSON Eingangs wurde ein Irrigularität entdeckt.", | ||
"@context": "Mapping unvollständig, unauflösbare Kurzform gefunden {}", | ||
"file": "Die Datei {} konnte nicht gefunden werden", | ||
"spcht_map": "Das Mapping muss eindimensional sein.", | ||
"spcht_ref": "Fehler beim Laden der referenzierten Mappings" | ||
}, | ||
"para": { | ||
"solr": "http://<fqdn>/solr/biblio/select", | ||
"sparql": "http://<fqdn>/sparql-auth/", | ||
"spcht": "default.spcht.json", | ||
"sparql_user": "<user>", | ||
"sparql_pw": "<password>", | ||
"solr_url": "http://<fqdn>/solr/biblio/select", | ||
"sparql_endpoint": "http://<fqdn>/sparql-auth/", | ||
"spcht_descriptor": "default.spcht.json", | ||
"user": "<user>", | ||
"password": "<password>", | ||
"graph": "<URI>", | ||
"named_graph": "<URI>", | ||
"query": "*:*", | ||
"rows": 20, | ||
"parts": 10000, | ||
"time": 2880 | ||
}, | ||
"settings": { | ||
"workers": 8 | ||
} | ||
"query_rows": 5000, | ||
"chunk_size": 1000, | ||
"save_folder": "./", | ||
"virt_folder": "/tmp/", | ||
"isql_path": "/usr/local/bin/isql-v", | ||
"isql_port": 1111, | ||
"processes": 4, | ||
"max_age": 5600 | ||
} |
Oops, something went wrong.