Skip to content

Commit

Permalink
Merge branch 'v0.6-isql' into Imperator
Browse files Browse the repository at this point in the history
Merging of big update v0.6 into current Main Branch
  • Loading branch information
jpkanter committed Jul 14, 2021
2 parents f128e3e + 7df601d commit 3547632
Show file tree
Hide file tree
Showing 13 changed files with 2,415 additions and 885 deletions.
235 changes: 133 additions & 102 deletions SpchtDescriptorFormat.py

Large diffs are not rendered by default.

75 changes: 75 additions & 0 deletions SpchtErrors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/usr/bin/env python
# coding: utf-8

# Copyright 2021 by Leipzig University Library, http://ub.uni-leipzig.de
# JP Kanter, <[email protected]>
#
# This file is part of the Solr2Triplestore Tool.
#
# This program is free software: you can redistribute
# it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, either
# version 3 of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Solr2Triplestore Tool. If not, see <http://www.gnu.org/licenses/>.
#
# @license GPL-3.0-only <https://www.gnu.org/licenses/gpl-3.0.en.html>

"""
I have read that its the pythonic way to introduce your own set of errors and exceptions to be more
specific about what has happened, i am a bit late to the party in that regard, only adding this many
months after i first started working on this projects, this makes the whole code unfortunatly to a
jumpled mess of standard exceptions and my own that i later created
"""


class WorkOrderInconsitencyError(Exception):
def __repr__(self):
return "A change is inconsistent with the logic of a work order, like updating a status to a lower level than the previos one"


class WorkOrderError(Exception):
def __repr__(self):
return "Generic error with the given work order"


class WorkOrderTypeError(Exception):
def __repr__(self):
return "For incorrect file types in work order parameters"


class ParameterError(Exception):
def __repr__(self):
return "The given parameter lead to an outcome that did not work"


class OperationalError(Exception):
def __repr__(self):
return "Something that stops the overall operation from proceeding"


class RequestError(ConnectionError):
def __repr__(self):
return "For requests that might fail for this or that reason within the bellows of the script"


class ParsingError(Exception):
def __repr__(self):
return "an Exception that occurs when trying to interpret or parse some kind of data"


class Unexpected(Exception):
def __repr__(self):
return "an exception that should have not been happened but was prepared in case seomthing weird happened"


class MandatoryError(Exception):
def __repr__(self):
return "a field that was classified as mandatory was not present, therefore failing the entire chain"

252 changes: 252 additions & 0 deletions argparse.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
{
"CreateOrder":
{
"type": "str",
"help": "Creates a blank order without executing it",
"metavar": ["order_name", "fetch_method", "processing_type", "insert_method"],
"nargs": 4
},
"CreateOrderPara":
{
"action": "store_true",
"help": "Creates a blank order with executing it with provided variables: --order_name, --fetch, --process and --insert"
},
"order_name":
{
"type": "str",
"help": "name for a new order"
},
"fetch":
{
"type": "str",
"help": "Type of fetch mechanismn for data: 'solr' or 'file'"
},
"process":
{
"type": "str",
"help": "Processing type, either 'insert' or 'update'"
},
"insert":
{
"type": "str",
"help": "method of inserting into triplestore: 'isql', 'obdc' or 'sparql'"
},
"FetchSolrOrder":
{
"type": "str",
"help": "Executes a fetch order provided, if the work order file has that current status",
"metavar": ["work_file", "solr_url", "query", "total_rows", "chunk_size", "spcht_descriptor", "save_folder"],
"nargs": 7
},
"FetchSolrOrderPara":
{
"action": "store_true",
"help": "Executes a solr fetch work order, needs parameters --work_order_file, --solr_url, --query, --total_rows, --chunk_size, --spcht_descriptor, --save_folder"
},
"work_order_file":
{
"type": "str",
"help": "Path to work order file"
},
"solr_url":
{
"type": "str",
"help": "Url to a solr query endpoint"
},
"query":
{
"type": "str",
"help": "Query for solr ['*' fetches everything]",
"default": "*"
},
"total_rows":
{
"type": "int",
"help": "Number of rows that are fetched in total from an external datasource",
"default": 25000
},
"chunk_size":
{
"type": "int",
"help": "Size of a single chunk, determines the number of queries",
"default": 5000
},
"max_age":
{
"type": "int",
"help": "Maximum age of a given entry in the source database, used for update operations as filter"
},
"spcht_descriptor":
{
"type": "str",
"help": "Path to a spcht descriptor file, usually ends with '.spcht.json'"
},
"save_folder":
{
"type": "str",
"help": "The folder were downloaded data is to be saved, will be referenced in work order",
"default": "./"
},
"SpchtProcessing":
{
"type": "str",
"help": "Processes the provided work order file",
"metavar": ["work_file", "graph/subject", "spcht_descriptor"],
"nargs": 3
},
"SpchtProcessingMulti":
{
"type": "str",
"help": "Processes the provided work order file in multiple threads",
"metavar": ["work_file", "graph/subject", "spcht_descriptor", "processes"],
"nargs": 4
},
"SpchtProcessingPara":
{
"action": "store_true",
"help": "Processes the given work_order file with parameters, needs: --work_order_file, --graph, --spcht_descriptor"
},
"SpchtProcessingMultiPara":
{
"action": "store_true",
"help": "Procesesses the given order with multiple processes, needs: --work_order_file, --graph, --spcht_descriptor, --processes"
},
"graph":
{
"type": "str",
"help": "URI of the subject part the graph gets mapped to in the <subject> <predicate> <object> triple"
},
"processes":
{
"type": "int",
"help": "Number of parallel processes used, should be <= cpu_count",
"default": 1
},
"InsertISQLOrder":
{
"type": "str",
"help": "Inserts the given work order via the isql interface of virtuoso, copies files in a temporary folder where virtuoso has access, needs credentials",
"metavar": ["work_file", "named_graph", "isql_path", "user", "password", "virt_folder"],
"nargs": 6
},
"InsertISQLOrderPara":
{
"action": "store_true",
"help": "Inserts the given order via the isql interace of virtuoso, copies files in a temporary folder, needs paramters: --isql_path, --user, --password, --named_graph, --virt_folder"
},
"named_graph":
{
"type": "str",
"help": "In a quadstore this is the graph the processed triples are saved upon, might be different from the triple subject"
},
"isql_path":
{
"type": "str",
"help": "File path to the OpenLink Virtuoso isql executable, usually 'isql-v' or 'isql-v.exe"
},
"virt_folder":
{
"type": "str",
"help": "When inserting data via iSQL the ingested files must lay in a directory whitelisted by Virtuoso, usually this is /tmp/ in Linux systems, but can be anywhere if configured so. Script must have write access there."
},
"user":
{
"type": "str",
"help": "Name of an authorized user for the desired operation"
},
"password":
{
"type": "str",
"help": "Plaintext password for the defined --user, caution advised when saving cleartext passwords in config files or bash history"
},
"isql_port":
{
"type": "int",
"help": "When using iSQL the corresponding database usually resides on port 1111, this parameter allows to adjust for changes in that regard",
"default": 1111
},
"HandleWorkOrder":
{
"type": "str",
"help": "Takes any one work order and processes it to the next step, needs all parameters the corresponding steps requires",
"metavar": ["work_order_file"],
"nargs": 1
},
"FullOrder":
{
"type": "str",
"help": "Creates a new order with assigned methods, immediatly starts with --Parameters [or --config] to fullfill the created order",
"metavar": ["work_order_name", "fetch", "type", "method"],
"nargs": 4
},
"sparql_endpoint":
{
"type": "str",
"help": "URL to a sparql endpoint of any one triplestore, usually ends with /sparql or /sparql-auth for authenticated user"
},
"CheckWorkOrder":
{
"type": "str",
"help": "Checks the status of any given work order and displays it in the console",
"metavar": ["work_order_file"],
"nargs": 1
},
"config":
{
"type": "str",
"help": "loads the defined config file, must be a json file containing a flat dictionary",
"metavar": ["path/to/config.json"],
"short": "-c"
},
"UpdateData":
{
"help": "Special form of full process, fetches data with a filter, deletes old data and inserts new ones",
"action": "store_true"
},
"environment":
{
"action": "store_true",
"help": "Prints all variables"
},
"force":
{
"action": "store_true",
"help": "Ignores security checks in work order execution like only proceeding when the right meta status is present"
},
"CleanUp":
{
"type": "str",
"help": "Deletes all temporary files of a given work order.",
"metavar": ["work_order_file"]
},
"CompileSpcht":
{
"type": "str",
"help": "Inserts all includes of a spcht descriptor in one file, resolving all include relations",
"metavar": ["SPCHT_FILE", "FILEPATH"],
"nargs": 2
},
"CheckFields":
{
"type": "str",
"help": "Loads a spcht file and displays all dictionary keys used in that descriptor",
"metavar": ["SPCHT_FILE"]
},
"debug":
{
"action": "store_true",
"help": "Sets the debug flag for CheckFields, CheckSpcht, CompileSpcht"
},
"CheckSpcht":
{
"help": "Tries to load and validate the specified Spcht JSON File",
"type": "str",
"metavar": ["SPCHT FILE"]
},
"ContinueOrder":
{
"help": "Continues a previously paused or interrupted work order, needs parameters",
"type": "str",
"metavar": ["WORK ORDER FILE"]
}
}
38 changes: 14 additions & 24 deletions config.example.json
Original file line number Diff line number Diff line change
@@ -1,28 +1,18 @@
{
"errors": {
"urls": "Die Sammlung der Serveradressen konnte nicht gefunden werden. Vorgang wird abgebrochen",
"settings": "Die sonstigen Einstellungen konnten nicht gefunden werden. Vorgang wird abgebrochen",
"nofile": "Die angegebene Datei konnte nicht gefunden werden",
"graph_parser": "Der Graph Parser konnte den Eingang nicht interpretieren",
"json_parser": "Beim Interpretieren des JSON Eingangs wurde ein Irrigularität entdeckt.",
"@context": "Mapping unvollständig, unauflösbare Kurzform gefunden {}",
"file": "Die Datei {} konnte nicht gefunden werden",
"spcht_map": "Das Mapping muss eindimensional sein.",
"spcht_ref": "Fehler beim Laden der referenzierten Mappings"
},
"para": {
"solr": "http://<fqdn>/solr/biblio/select",
"sparql": "http://<fqdn>/sparql-auth/",
"spcht": "default.spcht.json",
"sparql_user": "<user>",
"sparql_pw": "<password>",
"solr_url": "http://<fqdn>/solr/biblio/select",
"sparql_endpoint": "http://<fqdn>/sparql-auth/",
"spcht_descriptor": "default.spcht.json",
"user": "<user>",
"password": "<password>",
"graph": "<URI>",
"named_graph": "<URI>",
"query": "*:*",
"rows": 20,
"parts": 10000,
"time": 2880
},
"settings": {
"workers": 8
}
"query_rows": 5000,
"chunk_size": 1000,
"save_folder": "./",
"virt_folder": "/tmp/",
"isql_path": "/usr/local/bin/isql-v",
"isql_port": 1111,
"processes": 4,
"max_age": 5600
}
Loading

0 comments on commit 3547632

Please sign in to comment.