diff --git a/.github/workflows/test_file_reader.yml b/.github/workflows/test_file_reader.yml index 1c3cdf8..a0dc0ae 100644 --- a/.github/workflows/test_file_reader.yml +++ b/.github/workflows/test_file_reader.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.10'] + python-version: ['3.11'] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/test_file_writer.yml b/.github/workflows/test_file_writer.yml index 8b77f00..7e51dad 100644 --- a/.github/workflows/test_file_writer.yml +++ b/.github/workflows/test_file_writer.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.10'] + python-version: ['3.11'] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/test_plugin.yml b/.github/workflows/test_plugin.yml index 8d02882..6d1b8e0 100644 --- a/.github/workflows/test_plugin.yml +++ b/.github/workflows/test_plugin.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.10'] + python-version: ['3.11'] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/test_sqlite.yml b/.github/workflows/test_sqlite.yml index b251b9d..9fdd3b7 100644 --- a/.github/workflows/test_sqlite.yml +++ b/.github/workflows/test_sqlite.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.10'] + python-version: ['3.11'] steps: - uses: actions/checkout@v4 diff --git a/dsi/backends/sqlite.py b/dsi/backends/sqlite.py index 6c57156..8df382e 100644 --- a/dsi/backends/sqlite.py +++ b/dsi/backends/sqlite.py @@ -5,7 +5,6 @@ import yaml import subprocess import os -import toml from dsi.backends.filesystem import Filesystem @@ -385,7 +384,7 @@ def get_artifact_list(self, query, isVerbose=False): # Returns reference from query def get_artifacts(self, query, isVerbose=False): - self.get_artifact_list(query, isVerbose) + return self.get_artifact_list(query, isVerbose) # Closes connection to server def close(self): @@ -575,161 +574,161 @@ def query_fctime(self, operator, ctime, isVerbose=False): return resout - def yamlDataToList(self, filenames): - """ - Function that reads a YAML file or files into a list - """ + # def yamlDataToList(self, filenames): + # """ + # Function that reads a YAML file or files into a list + # """ - yamlData = [] - for filename in filenames: - with open(filename, 'r') as yaml_file: - editedString = yaml_file.read() - editedString = re.sub('specification', r'columns:\n specification', editedString) - editedString = re.sub(r'(!.+)\n', r"'\1'\n", editedString) - yml_data = yaml.safe_load_all(editedString) + # yamlData = [] + # for filename in filenames: + # with open(filename, 'r') as yaml_file: + # editedString = yaml_file.read() + # editedString = re.sub('specification', r'columns:\n specification', editedString) + # editedString = re.sub(r'(!.+)\n', r"'\1'\n", editedString) + # yml_data = yaml.safe_load_all(editedString) - for table in yml_data: - yamlData.append(table) + # for table in yml_data: + # yamlData.append(table) - return yamlData + # return yamlData - def yamlToSqlite(self, filenames, db_name, deleteSql=True): - """ - Function that ingests a YAML file into a sqlite database based on the given database name - - `filenames`: name of YAML file or a list of YAML files to be ingested - - `db_name`: name of database that YAML file should be added to. Database will be created if it does not exist in local directory. - - `deleteSql`: flag to delete temp SQL file that creates the database. Default is True, but change to False for testing or comparing outputs - """ - - sql_statements = [] - if isinstance(filenames, str): - filenames = [filenames] - - with open(db_name+".sql", "w") as sql_file: - yml_list = self.yamlDataToList(filenames) - for table in yml_list: - tableName = table["segment"] - - data_types = {float: "FLOAT", str: "VARCHAR", int: "INT"} - if not os.path.isfile(db_name+".db") or os.path.getsize(db_name+".db") == 0: - createStmt = f"CREATE TABLE IF NOT EXISTS {tableName} ( " - createUnitStmt = f"CREATE TABLE IF NOT EXISTS {tableName}_units ( " - insertUnitStmt = f"INSERT INTO {tableName}_units VALUES( " - - for key, val in table['columns'].items(): - createUnitStmt+= f"{key} VARCHAR, " - if data_types[type(val)] == "VARCHAR" and self.check_type(val[:val.find(" ")]) in [" INT", " FLOAT"]: - createStmt += f"{key}{self.check_type(val[:val.find(' ')])}, " - insertUnitStmt+= f"'{val[val.find(' ')+1:]}', " - else: - createStmt += f"{key} {data_types[type(val)]}, " - insertUnitStmt+= "NULL, " - - if createStmt not in sql_statements: - sql_statements.append(createStmt) - sql_file.write(createStmt[:-2] + ");\n\n") - if createUnitStmt not in sql_statements: - sql_statements.append(createUnitStmt) - sql_file.write(createUnitStmt[:-2] + ");\n\n") - if insertUnitStmt not in sql_statements: - sql_statements.append(insertUnitStmt) - sql_file.write(insertUnitStmt[:-2] + ");\n\n") - - insertStmt = f"INSERT INTO {tableName} VALUES( " - for val in table['columns'].values(): - if data_types[type(val)] == "VARCHAR" and self.check_type(val[:val.find(" ")]) in [" INT", " FLOAT"]: - insertStmt+= f"{val[:val.find(' ')]}, " - elif data_types[type(val)] == "VARCHAR": - insertStmt+= f"'{val}', " - else: - insertStmt+= f"{val}, " - - if insertStmt not in sql_statements: - sql_statements.append(insertStmt) - sql_file.write(insertStmt[:-2] + ");\n\n") + # def yamlToSqlite(self, filenames, db_name, deleteSql=True): + # """ + # Function that ingests a YAML file into a sqlite database based on the given database name + + # `filenames`: name of YAML file or a list of YAML files to be ingested + + # `db_name`: name of database that YAML file should be added to. Database will be created if it does not exist in local directory. + + # `deleteSql`: flag to delete temp SQL file that creates the database. Default is True, but change to False for testing or comparing outputs + # """ + + # sql_statements = [] + # if isinstance(filenames, str): + # filenames = [filenames] + + # with open(db_name+".sql", "w") as sql_file: + # yml_list = self.yamlDataToList(filenames) + # for table in yml_list: + # tableName = table["segment"] + + # data_types = {float: "FLOAT", str: "VARCHAR", int: "INT"} + # if not os.path.isfile(db_name+".db") or os.path.getsize(db_name+".db") == 0: + # createStmt = f"CREATE TABLE IF NOT EXISTS {tableName} ( " + # createUnitStmt = f"CREATE TABLE IF NOT EXISTS {tableName}_units ( " + # insertUnitStmt = f"INSERT INTO {tableName}_units VALUES( " + + # for key, val in table['columns'].items(): + # createUnitStmt+= f"{key} VARCHAR, " + # if data_types[type(val)] == "VARCHAR" and self.check_type(val[:val.find(" ")]) in [" INT", " FLOAT"]: + # createStmt += f"{key}{self.check_type(val[:val.find(' ')])}, " + # insertUnitStmt+= f"'{val[val.find(' ')+1:]}', " + # else: + # createStmt += f"{key} {data_types[type(val)]}, " + # insertUnitStmt+= "NULL, " + + # if createStmt not in sql_statements: + # sql_statements.append(createStmt) + # sql_file.write(createStmt[:-2] + ");\n\n") + # if createUnitStmt not in sql_statements: + # sql_statements.append(createUnitStmt) + # sql_file.write(createUnitStmt[:-2] + ");\n\n") + # if insertUnitStmt not in sql_statements: + # sql_statements.append(insertUnitStmt) + # sql_file.write(insertUnitStmt[:-2] + ");\n\n") + + # insertStmt = f"INSERT INTO {tableName} VALUES( " + # for val in table['columns'].values(): + # if data_types[type(val)] == "VARCHAR" and self.check_type(val[:val.find(" ")]) in [" INT", " FLOAT"]: + # insertStmt+= f"{val[:val.find(' ')]}, " + # elif data_types[type(val)] == "VARCHAR": + # insertStmt+= f"'{val}', " + # else: + # insertStmt+= f"{val}, " + + # if insertStmt not in sql_statements: + # sql_statements.append(insertStmt) + # sql_file.write(insertStmt[:-2] + ");\n\n") - subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r")) + # subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r")) - if deleteSql == True: - os.remove(db_name+".sql") + # if deleteSql == True: + # os.remove(db_name+".sql") - def tomlDataToList(self, filenames): - """ - Function that reads a TOML file or files into a list - """ + # def tomlDataToList(self, filenames): + # """ + # Function that reads a TOML file or files into a list + # """ - toml_data = [] - for filename in filenames: - with open(filename, 'r') as toml_file: - data = toml.load(toml_file) - for tableName, tableData in data.items(): - toml_data.append([tableName, tableData]) - - return toml_data + # toml_data = [] + # for filename in filenames: + # with open(filename, 'r') as toml_file: + # data = toml.load(toml_file) + # for tableName, tableData in data.items(): + # toml_data.append([tableName, tableData]) + + # return toml_data - def tomlToSqlite(self, filenames, db_name, deleteSql=True): - """ - Function that ingests a TOML file into a sqlite database based on the given database name - - `filenames`: name of TOML file or a list of TOML files to be ingested - - `db_name`: name of database that TOML file should be added to. Database will be created if it does not exist in local directory. - - `deleteSql`: flag to delete temp SQL file that creates the database. Default is True, but change to False for testing or comparing outputs - """ - - sql_statements = [] - if isinstance(filenames, str): - filenames = [filenames] - - with open(db_name+".sql", "w") as sql_file: - data = self.tomlDataToList(filenames) - - for item in data: - tableName, tableData = item - data_types = {float: "FLOAT", str: "VARCHAR", int: "INT"} - - if not os.path.isfile(db_name+".db") or os.path.getsize(db_name+".db") == 0: - createStmt = f"CREATE TABLE IF NOT EXISTS {tableName} ( " - createUnitStmt = f"CREATE TABLE IF NOT EXISTS {tableName}_units ( " - insertUnitStmt = f"INSERT INTO {tableName}_units VALUES( " - - for key, val in tableData.items(): - createUnitStmt+= f"{key} VARCHAR, " - if type(val) == list and type(val[0]) == str and self.check_type(val[0]) in [" INT", " FLOAT"]: - createStmt += f"{key}{self.check_type(val[0])}, " - insertUnitStmt+= f"'{val[1]}', " - else: - createStmt += f"{key} {data_types[type(val)]}, " - insertUnitStmt+= "NULL, " - - if createStmt not in sql_statements: - sql_statements.append(createStmt) - sql_file.write(createStmt[:-2] + ");\n\n") - if createUnitStmt not in sql_statements: - sql_statements.append(createUnitStmt) - sql_file.write(createUnitStmt[:-2] + ");\n\n") - if insertUnitStmt not in sql_statements: - sql_statements.append(insertUnitStmt) - sql_file.write(insertUnitStmt[:-2] + ");\n\n") + # def tomlToSqlite(self, filenames, db_name, deleteSql=True): + # """ + # Function that ingests a TOML file into a sqlite database based on the given database name + + # `filenames`: name of TOML file or a list of TOML files to be ingested + + # `db_name`: name of database that TOML file should be added to. Database will be created if it does not exist in local directory. + + # `deleteSql`: flag to delete temp SQL file that creates the database. Default is True, but change to False for testing or comparing outputs + # """ + + # sql_statements = [] + # if isinstance(filenames, str): + # filenames = [filenames] + + # with open(db_name+".sql", "w") as sql_file: + # data = self.tomlDataToList(filenames) + + # for item in data: + # tableName, tableData = item + # data_types = {float: "FLOAT", str: "VARCHAR", int: "INT"} + + # if not os.path.isfile(db_name+".db") or os.path.getsize(db_name+".db") == 0: + # createStmt = f"CREATE TABLE IF NOT EXISTS {tableName} ( " + # createUnitStmt = f"CREATE TABLE IF NOT EXISTS {tableName}_units ( " + # insertUnitStmt = f"INSERT INTO {tableName}_units VALUES( " + + # for key, val in tableData.items(): + # createUnitStmt+= f"{key} VARCHAR, " + # if type(val) == list and type(val[0]) == str and self.check_type(val[0]) in [" INT", " FLOAT"]: + # createStmt += f"{key}{self.check_type(val[0])}, " + # insertUnitStmt+= f"'{val[1]}', " + # else: + # createStmt += f"{key} {data_types[type(val)]}, " + # insertUnitStmt+= "NULL, " + + # if createStmt not in sql_statements: + # sql_statements.append(createStmt) + # sql_file.write(createStmt[:-2] + ");\n\n") + # if createUnitStmt not in sql_statements: + # sql_statements.append(createUnitStmt) + # sql_file.write(createUnitStmt[:-2] + ");\n\n") + # if insertUnitStmt not in sql_statements: + # sql_statements.append(insertUnitStmt) + # sql_file.write(insertUnitStmt[:-2] + ");\n\n") - insertStmt = f"INSERT INTO {tableName} VALUES( " - for val in tableData.values(): - if type(val) == list and type(val[0]) == str and self.check_type(val[0]) in [" INT", " FLOAT"]: - insertStmt+= f"{val[0]}, " - elif type(val) == str: - insertStmt+= f"'{val}', " - else: - insertStmt+= f"{val}, " - - if insertStmt not in sql_statements: - sql_statements.append(insertStmt) - sql_file.write(insertStmt[:-2] + ");\n\n") - - subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r")) - - if deleteSql == True: - os.remove(db_name+".sql") + # insertStmt = f"INSERT INTO {tableName} VALUES( " + # for val in tableData.values(): + # if type(val) == list and type(val[0]) == str and self.check_type(val[0]) in [" INT", " FLOAT"]: + # insertStmt+= f"{val[0]}, " + # elif type(val) == str: + # insertStmt+= f"'{val}', " + # else: + # insertStmt+= f"{val}, " + + # if insertStmt not in sql_statements: + # sql_statements.append(insertStmt) + # sql_file.write(insertStmt[:-2] + ");\n\n") + + # subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r")) + + # if deleteSql == True: + # os.remove(db_name+".sql") \ No newline at end of file diff --git a/dsi/core.py b/dsi/core.py index 3811f07..590dce5 100644 --- a/dsi/core.py +++ b/dsi/core.py @@ -5,6 +5,8 @@ import os import shutil from pathlib import Path +import logging +from datetime import datetime from dsi.backends.filesystem import Filesystem from dsi.backends.sqlite import Sqlite, DataType, Artifact @@ -15,7 +17,7 @@ class Terminal(): An instantiated Terminal is the DSI human/machine interface. Terminals are a home for Plugins and an interface for Backends. Backends may be - front-ends or back-ends. Plugins may be Writers or readers. See documentation + back-reads or back-writes. Plugins may be Writers or readers. See documentation for more information. """ BACKEND_PREFIX = ['dsi.backends'] @@ -26,10 +28,10 @@ class Terminal(): VALID_BACKENDS = ['Gufi', 'Sqlite', 'Parquet'] VALID_MODULES = VALID_PLUGINS + VALID_BACKENDS VALID_MODULE_FUNCTIONS = {'plugin': [ - 'writer', 'reader'], 'backend': ['front-end', 'back-end']} + 'writer', 'reader'], 'backend': ['back-read', 'back-write']} VALID_ARTIFACT_INTERACTION_TYPES = ['get', 'set', 'put', 'inspect'] - def __init__(self): + def __init__(self, debug_flag = False): # Helper function to get parent module names. def static_munge(prefix, implementations): return (['.'.join(i) for i in product(prefix, implementations)]) @@ -55,6 +57,16 @@ def static_munge(prefix, implementations): self.active_metadata = OrderedDict() self.transload_lock = False + self.logger = logging.getLogger(self.__class__.__name__) + + if debug_flag: + logging.basicConfig( + filename='logger.txt', # Name of the log file + filemode='a', # Append mode ('w' for overwrite) + format='%(asctime)s - %(levelname)s - %(message)s', # Log message format + level=logging.INFO # Minimum log level to capture + ) + def list_available_modules(self, mod_type): """ List available DSI modules of an arbitrary module type. @@ -82,16 +94,25 @@ def load_module(self, mod_type, mod_name, mod_function, **kwargs): We expect most users will work with module implementations rather than templates, but but all high level class abstractions are accessible with this method. """ + self.logger.info(f"-------------------------------------") + self.logger.info(f"Loading {mod_name} {mod_function} {mod_type}") + start = datetime.now() if self.transload_lock and mod_type == 'plugin': print('Plugin module loading is prohibited after transload. No action taken.') + end = datetime.now() + self.logger.info(f"Runtime: {end-start}") return if mod_function not in self.VALID_MODULE_FUNCTIONS[mod_type]: print( 'Hint: Did you declare your Module Function in the Terminal Global vars?') + end = datetime.now() + self.logger.info(f"Runtime: {end-start}") raise NotImplementedError if mod_name in [obj.__class__.__name__ for obj in self.active_modules[mod_function]]: print('{} {} already loaded as {}. Nothing to do.'.format( mod_name, mod_type, mod_function)) + end = datetime.now() + self.logger.info(f"Runtime: {end-start}") return # DSI Modules are Python classes. class_name = mod_name @@ -109,7 +130,11 @@ def load_module(self, mod_type, mod_name, mod_function, **kwargs): mod_name, mod_type, mod_function)) else: print('Hint: Did you declare your Plugin/Backend in the Terminal Global vars?') + end = datetime.now() + self.logger.info(f"Runtime: {end-start}") raise NotImplementedError + end = datetime.now() + self.logger.info(f"Runtime: {end-start}") def unload_module(self, mod_type, mod_name, mod_function): """ @@ -170,12 +195,20 @@ def transload(self, **kwargs): # Note this transload supports plugin.env Environment types now. for module_type, objs in selected_function_modules.items(): for obj in objs: + self.logger.info(f"-------------------------------------") + self.logger.info(obj.__class__.__name__ + f" {module_type}") if module_type == "reader": + start = datetime.now() obj.add_rows(**kwargs) for table_name, table_metadata in obj.output_collector.items(): self.active_metadata[table_name] = table_metadata + end = datetime.now() + self.logger.info(f"Runtime: {end-start}") elif module_type == "writer": + start = datetime.now() obj.get_rows(self.active_metadata, **kwargs) + end = datetime.now() + self.logger.info(f"Runtime: {end-start}") # Plugins may add one or more rows (vector vs matrix data). # You may have two or more plugins with different numbers of rows. @@ -197,12 +230,12 @@ def transload(self, **kwargs): self.transload_lock = True - def artifact_handler(self, interaction_type, **kwargs): + def artifact_handler(self, interaction_type, query = None, **kwargs): """ Store or retrieve using all loaded DSI Backends with storage functionality. A DSI Core Terminal may load zero or more Backends with storage functionality. - Calling artifact_handler will execute all back-end functionality currently loaded, given + Calling artifact_handler will execute all back-write functionality currently loaded, given the provided ``interaction_type``. """ if interaction_type not in self.VALID_ARTIFACT_INTERACTION_TYPES: @@ -213,25 +246,40 @@ def artifact_handler(self, interaction_type, **kwargs): # Perform artifact movement first, because inspect implementation may rely on # self.active_metadata or some stored artifact. selected_function_modules = dict( - (k, self.active_modules[k]) for k in (['back-end'])) + (k, self.active_modules[k]) for k in (['back-write'])) for module_type, objs in selected_function_modules.items(): for obj in objs: + self.logger.info(f"-------------------------------------") + self.logger.info(obj.__class__.__name__ + f" {module_type} - {interaction_type} the data") if interaction_type == 'put' or interaction_type == 'set': + start = datetime.now() obj.put_artifacts( collection=self.active_metadata, **kwargs) operation_success = True + end = datetime.now() + self.logger.info(f"Runtime: {end-start}") elif interaction_type == 'get': - self.active_metadata = obj.get_artifacts(**kwargs) + self.logger.info(f"Query to get data: {query}") + start = datetime.now() + if query != None: + self.active_metadata = obj.get_artifacts(query, **kwargs) + else: + raise ValueError("Need to specify a query of the database to return data") operation_success = True - if interaction_type == 'inspect': - for module_type, objs in selected_function_modules.items(): - for obj in objs: + end = datetime.now() + self.logger.info(f"Runtime: {end-start}") + elif interaction_type == 'inspect': + start = datetime.now() obj.put_artifacts( collection=self.active_metadata, **kwargs) self.active_metadata = obj.inspect_artifacts( collection=self.active_metadata, **kwargs) operation_success = True + end = datetime.now() + self.logger.info(f"Runtime: {end-start}") if operation_success: + if self.active_metadata: + return self.active_metadata return else: print( diff --git a/dsi/plugins/file_reader.py b/dsi/plugins/file_reader.py index eac0579..7c72382 100644 --- a/dsi/plugins/file_reader.py +++ b/dsi/plugins/file_reader.py @@ -6,8 +6,10 @@ from pandas import DataFrame, read_csv, concat import re import yaml -import toml -import ast +try: import tomllib +except ModuleNotFoundError: import pip._vendor.tomli as tomllib + +# import ast from dsi.plugins.metadata import StructuredMetadata @@ -284,59 +286,48 @@ def pack_header(self) -> None: table_info.append((self.target_table_prefix + "__" + table_name, list(self.toml_data[table_name].keys()))) self.set_schema(table_info) - def check_type(self, text): - """ - Tests input text and returns a predicted compatible SQL Type - `text`: text string - `return`: string returned as int, float or still a string - """ - try: - _ = int(text) - return int(text) - except ValueError: - try: - _ = float(text) - return float(text) - except ValueError: - return text - def add_rows(self) -> None: """ Parses TOML data and creates an ordered dict whose keys are table names and values are an ordered dict for each table. """ for filename in self.toml_files: - with open(filename, 'r+') as temp_file: - editedString = temp_file.read() - if '"{' not in editedString: - editedString = re.sub('{', '"{', editedString) - editedString = re.sub('}', '}"', editedString) - temp_file.seek(0) - temp_file.write(editedString) - - with open(filename, 'r') as toml_file: - toml_load_data = toml.load(toml_file) - - if not self.schema_is_set(): - for tableName, tableData in toml_load_data.items(): - self.toml_data[tableName] = OrderedDict((key, []) for key in tableData.keys()) - self.toml_data[tableName + "_units"] = OrderedDict((key, []) for key in tableData.keys()) - self.toml_data["dsi_relations"] = OrderedDict([('primary_key', []), ('foreign_key', [])]) - self.pack_header() + # with open(filename, 'r+') as temp_file: + # editedString = temp_file.read() + # if '"{' not in editedString: + # editedString = re.sub('{', '"{', editedString) + # editedString = re.sub('}', '}"', editedString) + # temp_file.seek(0) + # temp_file.write(editedString) + + toml_load_data = None + with open(filename, 'rb') as toml_file: + toml_load_data = tomllib.load(toml_file) + if not self.schema_is_set(): for tableName, tableData in toml_load_data.items(): - row = [] - unit_row = [] - for col_name, data in tableData.items(): - unit_data = "NULL" - if isinstance(data, str) and data[0] == "{" and data[-1] == "}": - data = ast.literal_eval(data) - unit_data = data["units"] - data = data["value"] - self.toml_data[tableName][col_name].append(data) - if len(self.toml_data[tableName + "_units"][col_name]) < 1: - unit_row.append(unit_data) - self.toml_data[tableName + "_units"][col_name].append(unit_data) - row.append(data) - self.add_to_output(row, self.target_table_prefix + "__" + tableName) - if len(next(iter(self.output_collector[self.target_table_prefix + "__" + tableName + "_units"].values()))) < 1: - self.add_to_output(unit_row, self.target_table_prefix + "__" + tableName + "_units") \ No newline at end of file + self.toml_data[tableName] = OrderedDict((key, []) for key in tableData.keys()) + self.toml_data[tableName + "_units"] = OrderedDict((key, []) for key in tableData.keys()) + self.toml_data["dsi_relations"] = OrderedDict([('primary_key', []), ('foreign_key', [])]) + self.pack_header() + + for tableName, tableData in toml_load_data.items(): + row = [] + unit_row = [] + for col_name, data in tableData.items(): + unit_data = "NULL" + if isinstance(data, dict): + unit_data = data["units"] + data = data["value"] + # IF statement for manual data parsing for python 3.10 and below + # if isinstance(data, str) and data[0] == "{" and data[-1] == "}": + # data = ast.literal_eval(data) + # unit_data = data["units"] + # data = data["value"] + self.toml_data[tableName][col_name].append(data) + if len(self.toml_data[tableName + "_units"][col_name]) < 1: + unit_row.append(unit_data) + self.toml_data[tableName + "_units"][col_name].append(unit_data) + row.append(data) + self.add_to_output(row, self.target_table_prefix + "__" + tableName) + if len(next(iter(self.output_collector[self.target_table_prefix + "__" + tableName + "_units"].values()))) < 1: + self.add_to_output(unit_row, self.target_table_prefix + "__" + tableName + "_units") \ No newline at end of file diff --git a/examples/coreterminal.py b/examples/coreterminal.py index e3a4d3e..6927c16 100644 --- a/examples/coreterminal.py +++ b/examples/coreterminal.py @@ -3,7 +3,7 @@ '''This is an example workflow using core.py''' -a=Terminal() +a=Terminal(debug_flag=True) # a.list_available_modules('plugin') # ['GitInfo', 'Hostname', 'SystemKernel', 'Bueno', 'Csv'] @@ -17,17 +17,17 @@ # a.list_available_modules('backend') # ['Gufi', 'Sqlite', 'Parquet'] -#a.load_module('plugin', 'YAML', 'reader', filenames=["data/schema.yml", "data/schema2.yml"], target_table_prefix = "schema") +a.load_module('plugin', 'YAML', 'reader', filenames=["data/student_test1.yml", "data/student_test2.yml"], target_table_prefix = "student") #a.load_module('plugin', 'YAML', 'reader', filenames=["data/cmf.yml", "data/cmf.yml"], target_table_name = "cmf") # print(a.active_metadata) -a.load_module('plugin', 'TOML', 'reader', filenames=["data/schema.toml", "data/schema2.toml"], target_table_prefix = "schema") +a.load_module('plugin', 'TOML', 'reader', filenames=["data/results.toml"], target_table_prefix = "results") # print(a.active_metadata) -a.load_module('backend','Sqlite','back-end', filename='data/data.db') -#a.load_module('backend','Sqlite','back-end', filename='data/data2.db') -# a.load_module('backend','Parquet','back-end',filename='./data/bueno.pq') +a.load_module('backend','Sqlite','back-write', filename='data/data.db') +#a.load_module('backend','Sqlite','back-write', filename='data/data2.db') +# a.load_module('backend','Parquet','back-write',filename='./data/bueno.pq') -a.load_module('plugin', "Table_Plot", "writer", table_name = "schema_physics", filename = "schema_physics") +#a.load_module('plugin', "Table_Plot", "writer", table_name = "schema_physics", filename = "schema_physics") a.transload() a.artifact_handler(interaction_type='put') @@ -35,12 +35,14 @@ # {'writer': [], # 'reader': [], # 'front-end': [], -# 'back-end': []} +# 'back-write': []} # Example use # a.load_module('plugin','Bueno','reader',filenames='data/bueno1.data') -# a.load_module('backend','Sqlite','back-end',filename='data/bueno.db') +# a.load_module('backend','Sqlite','back-write',filename='data/bueno.db') # a.transload() # a.artifact_handler(interaction_type='put') -# a.artifact_handler(interaction_type='get', query = "SELECT * FROM sqlite_master WHERE type='table';", isVerbose = True) \ No newline at end of file +data = a.artifact_handler(interaction_type='get', query = "SELECT * FROM sqlite_master WHERE type='table';")#, isVerbose = True) +#CAN PRINT THE DATA OUTPUT +# print(data) \ No newline at end of file diff --git a/examples/data/results.toml b/examples/data/results.toml new file mode 100644 index 0000000..f22cbdd --- /dev/null +++ b/examples/data/results.toml @@ -0,0 +1,5 @@ + +[people] +avg_height = {'value'= 5.5, 'units'= 'm'} # overall average height +median_speed = {'value'= 6.95, 'units'= 's'} # overall median speed +std_gravity = {'value'= 9.83, 'units'= 'm/s/s'} # overall std dev gravity diff --git a/examples/data/schema.yml b/examples/data/student_test1.yml similarity index 100% rename from examples/data/schema.yml rename to examples/data/student_test1.yml diff --git a/examples/data/schema2.yml b/examples/data/student_test2.yml similarity index 100% rename from examples/data/schema2.yml rename to examples/data/student_test2.yml diff --git a/examples/data/schema.toml b/examples/data/teacher_test1.toml similarity index 100% rename from examples/data/schema.toml rename to examples/data/teacher_test1.toml diff --git a/examples/data/schema2.toml b/examples/data/teacher_test2.toml similarity index 100% rename from examples/data/schema2.toml rename to examples/data/teacher_test2.toml diff --git a/requirements.txt b/requirements.txt index 897cf03..bca5e85 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,5 +4,4 @@ pydantic>=2.1.1 nbconvert>=7.13.0 gitpython>=3.0.0 matplotlib>=3.6.0 -pyyaml>=6.0 -toml>=0.10.2 \ No newline at end of file +pyyaml>=6.0 \ No newline at end of file