Merge branch 'main' into CI_testing

lanl · Sep 5, 2024 · 3b7b44a · 3b7b44a
2 parents 28f97e4 + fd5402c
commit 3b7b44a
Show file tree

Hide file tree

Showing 9 changed files with 207 additions and 96 deletions.
diff --git a/dsi/backends/sqlite.py b/dsi/backends/sqlite.py
@@ -1,9 +1,11 @@
 import csv
 import sqlite3
-import yaml
+import json
 import re
+import yaml
 import subprocess
 import os
+import toml
 
 from dsi.backends.filesystem import Filesystem
 
@@ -13,6 +15,7 @@
 STRING = "VARCHAR"
 FLOAT = "FLOAT"
 INT = "INT"
+JSON = "TEXT"
 
 # Holds table name and data properties
 
@@ -87,7 +90,6 @@ def put_artifact_type(self, types, isVerbose=False):
         if isVerbose:
             print(str_query)
 
-        print(str_query)
         self.cur.execute(str_query)
         self.con.commit()
 
@@ -223,6 +225,53 @@ def put_artifacts_lgcy(self,artifacts, isVerbose=False):
         self.cur.execute(str_query)
         self.con.commit()
 
+    def put_artifacts_json(self, fname, tname, isVerbose=False):
+        """
+        Function for insertion of Artifact metadata into a defined schema by using a JSON file
+        `fname`: filepath to the .json file to be read and inserted into the database
+
+        `tname`: String name of the table to be inserted
+
+        `return`: none
+        """
+
+        json_str = None
+        try:
+            j = open(fname)
+            data = json.load(j)
+            json_str = json.dumps(data)
+            json_str = "'" + json_str + "'"
+            j.close()
+        except IOError as i:
+            print(i)
+            return
+        except ValueError as v:
+            print(v)
+            return
+
+        types = DataType()
+        types.properties = {}
+        types.name = tname
+
+        # Check if this has been defined from helper function
+        if self.types != None:
+            types.name = self.types.name
+
+        col_name = re.sub(r'.json', '', fname)
+        col_name = re.sub(r'.*/', '', col_name)
+        col_name = "'" + col_name + "'"
+        types.properties[col_name] = JSON
+
+        self.put_artifact_type(types)
+        col_names = ', '.join(types.properties.keys())
+        str_query = "INSERT INTO {} ({}) VALUES ({});".format(str(types.name), col_names, json_str)
+        if isVerbose:
+            print(str_query)
+
+        self.types = types
+        self.cur.execute(str_query)
+        self.con.commit()
+
     # Adds columns and rows automaticallly based on a csv file
     #[NOTE 3] This method should be deprecated in favor of put_artifacts.
     def put_artifacts_csv(self, fname, tname, isVerbose=False):
@@ -499,31 +548,16 @@ def query_fctime(self, operator, ctime, isVerbose=False):
 
         return resout
 
-class YamlReader():
 
-    def __init__(self):
-        pass
-
-    def yamlToSqlite(self, filename, db_name):
+    def yamlToSqlite(self, filename, db_name, deleteSql=True):
         """
-        Function that should be called externally to create the sqlite database file and delete temporary sql file used to ingest data
+        Function that ingests a YAML file into a sqlite database based on the given database name
 
         `filename`: name of YAML file that is ingested
 
         `db_name`: name of database that YAML file should be added to. Database will be created if it does not exist in local directory.
-        """
-        self.yaml_to_db(filename, db_name)
-        os.remove(db_name+".sql")
-
-    def yaml_to_db(self, filename, db_name):
-        """
-        DO NOT CALL EXTERNALLY EXCEPT FOR TESTING
 
-        Function creates/adds to a sqlite db file from a given YAML file with specified database name
-
-        `filename`: name of YAML file that is ingested
-
-        `db_name`: name of database that YAML file should be added to. Database will be created if it does not exist in local directory.
+        `deleteSql`: flag to delete temp SQL file that creates the database. Default is True, but change to False for testing or comparing outputs
         """
 
         with open(filename, 'r') as yaml_file, open(db_name+".sql", "w") as sql_file:
@@ -533,20 +567,18 @@ def yaml_to_db(self, filename, db_name):
             yml_data = yaml.safe_load_all(editedString)
 
             for table in yml_data:
-                cols = table['columns'].keys()
-                vals = table['columns'].values()
                 tableName = table["segment"]
 
-                data_types = {float: "REAL", str: "TEXT", int: "INTEGER"}
-                if not os.path.isfile(db_name+".db"):
+                data_types = {float: "FLOAT", str: "VARCHAR", int: "INT"}
+                if not os.path.isfile(db_name+".db") or os.path.getsize(db_name+".db") == 0:
                     createStmt = f"CREATE TABLE {tableName} ( "
                     createUnitStmt = f"CREATE TABLE {tableName}_units ( "  
-                    insertUnitStmt = f"INSERT INTO {tableName}_units {tuple(cols)} VALUES( "
+                    insertUnitStmt = f"INSERT INTO {tableName}_units VALUES( "
 
                     for key, val in table['columns'].items():
-                        createUnitStmt+= f"{key} TEXT, "
-                        if data_types[type(val)] == "TEXT" and self.check_type(val[:val.find(" ")]) in ["INTEGER", "REAL"]:
-                            createStmt += f"{key} {self.check_type(val[:val.find(" ")])}, "
+                        createUnitStmt+= f"{key} VARCHAR, "
+                        if data_types[type(val)] == "VARCHAR" and self.check_type(val[:val.find(" ")]) in [" INT", " FLOAT"]:
+                            createStmt += f"{key}{self.check_type(val[:val.find(" ")])}, "
                             insertUnitStmt+= f"'{val[val.find(" ")+1:]}', "
                         else:
                             createStmt += f"{key} {data_types[type(val)]}, "
@@ -556,11 +588,11 @@ def yaml_to_db(self, filename, db_name):
                     sql_file.write(createUnitStmt[:-2] + ");\n\n")
                     sql_file.write(insertUnitStmt[:-2] + ");\n\n")
 
-                insertStmt = f"INSERT INTO {tableName} {tuple(cols)} VALUES( "
-                for val in vals:
-                    if data_types[type(val)] == "TEXT" and self.check_type(val[:val.find(" ")]) in ["INTEGER", "REAL"]:
+                insertStmt = f"INSERT INTO {tableName} VALUES( "
+                for val in table['columns'].values():
+                    if data_types[type(val)] == "VARCHAR" and self.check_type(val[:val.find(" ")]) in [" INT", " FLOAT"]:
                         insertStmt+= f"{val[:val.find(" ")]}, "
-                    elif data_types[type(val)] == "TEXT":
+                    elif data_types[type(val)] == "VARCHAR":
                         insertStmt+= f"'{val}', "
                     else:
                         insertStmt+= f"{val}, "
@@ -569,20 +601,56 @@ def yaml_to_db(self, filename, db_name):
 
         subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r"))
 
-    def check_type(self, text):
+        if deleteSql == True:
+            os.remove(db_name+".sql")
+
+    def tomlToSqlite(self, filename, db_name, deleteSql=True):
         """
-        Tests input text and returns a predicted compatible SQL Type
+        Function that ingests a TOML file into a sqlite database based on the given database name
 
-        `text`: text string
+        `filename`: name of TOML file that is ingested
 
-        `return`: string description of a SQL data type
+        `db_name`: name of database that TOML file should be added to. Database will be created if it does not exist in local directory.
+
+        `deleteSql`: flag to delete temp SQL file that creates the database. Default is True, but change to False for testing or comparing outputs
         """
-        try:
-            value = int(text)
-            return "INTEGER"
-        except ValueError:
-            try:
-                value = float(text)
-                return "REAL"
-            except ValueError:
-                return "TEXT"
+        with open(filename, 'r') as toml_file, open(db_name+".sql", "w") as sql_file:
+            data = toml.load(toml_file)
+
+            for tableName, tableData in data.items():
+                data_types = {float: "FLOAT", str: "VARCHAR", int: "INT"}
+
+                if not os.path.isfile(db_name+".db") or os.path.getsize(db_name+".db") == 0:
+                    createStmt = f"CREATE TABLE {tableName} ( "
+                    createUnitStmt = f"CREATE TABLE {tableName}_units ( "
+                    insertUnitStmt = f"INSERT INTO {tableName}_units VALUES( "
+
+                    for key, val in tableData.items():
+                        createUnitStmt+= f"{key} VARCHAR, "
+                        if type(val) == list and type(val[0]) == str and self.check_type(val[0]) in [" INT", " FLOAT"]:
+                            createStmt += f"{key}{self.check_type(val[0])}, "
+                            insertUnitStmt+= f"'{val[1]}', "
+                        else:
+                            createStmt += f"{key} {data_types[type(val)]}, "
+                            insertUnitStmt+= "NULL, "
+
+                    sql_file.write(createStmt[:-2] + ");\n\n")
+                    sql_file.write(createUnitStmt[:-2] + ");\n\n")
+                    sql_file.write(insertUnitStmt[:-2] + ");\n\n")
+
+                insertStmt = f"INSERT INTO {tableName} VALUES( "
+                for val in tableData.values():
+                    if type(val) == list and type(val[0]) == str and self.check_type(val[0]) in [" INT", " FLOAT"]:
+                        insertStmt+= f"{val[0]}, "
+                    elif type(val) == str:
+                        insertStmt+= f"'{val}', "
+                    else:
+                        insertStmt+= f"{val}, "
+
+                sql_file.write(insertStmt[:-2] + ");\n\n")
+
+        subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r"))
+
+        if deleteSql == True:
+            os.remove(db_name+".sql")
+
diff --git a/dsi/backends/tests/test_sqlite.py b/dsi/backends/tests/test_sqlite.py
@@ -1,6 +1,6 @@
 import git
 from collections import OrderedDict
-from dsi.backends.sqlite import Sqlite, DataType, YamlReader
+from dsi.backends.sqlite import Sqlite, DataType
 import os
 import subprocess
 
@@ -38,6 +38,25 @@ def test_wildfiredata_artifact_put():
    # No error implies success
    assert True
 
+def test_wildfiredata_artifact_put_t():
+   valid_middleware_datastructure = OrderedDict({'foo':[1,2,3],'bar':[3,2,1]})
+   dbpath = 'test_wildfiredata_artifact.sqlite_data'
+   store = Sqlite(dbpath)
+   store.put_artifacts_t(valid_middleware_datastructure, tableName="Wildfire")
+   store.close()
+   # No error implies success
+   assert True
+
+#Data from: https://microsoftedge.github.io/Demos/json-dummy-data/64KB.json
+def test_jsondata_artifact_put():
+   jsonpath = '/'.join([get_git_root('.'), 'dsi/data/64KB.json'])
+   dbpath = "jsondata.db"
+   store = Sqlite(dbpath)
+   store.put_artifacts_json(jsonpath, tname="JSONData")
+   store.close()
+   # No error implies success
+   assert True
+
 def test_yosemite_data_csv_artifact():
     csvpath = '/'.join([get_git_root('.'), 'dsi/data/yosemite5.csv'])
     dbpath = "yosemite.db"
@@ -56,18 +75,21 @@ def test_artifact_query():
     data_type.name = "simulation"
     result = store.sqlquery("SELECT *, MAX(wind_speed) AS max_windspeed FROM " +
                             str(data_type.name) + " GROUP BY safe_unsafe_fire_behavior")
-    store.export_csv(result, "query.csv")
+    store.export_csv(result, "TABLENAME", "query.csv")
     store.close()
     # No error implies success
     assert True
 
 def test_yaml_reader():
-    reader = YamlReader()
-    reader.yaml_to_db("../../../examples/data/schema.yml", "vedant-test")
-    subprocess.run(["diff", "../../../examples/data/compare-yml.sql", "vedant-test.sql"], stdout=open("output.txt", "w"))
-    file_size = os.path.getsize("output.txt")
-    os.remove("output.txt")
-    os.remove("vedant-test.sql")
-    os.remove("vedant-test.db")
+    reader = Sqlite("yaml-test.db")
+    reader.yamlToSqlite("../../../examples/data/schema.yml", "yaml-test", deleteSql=False)
+    subprocess.run(["diff", "../../../examples/data/compare-schema.sql", "yaml-test.sql"], stdout=open("compare_sql.txt", "w"))
+    file_size = os.path.getsize("compare_sql.txt")
+    assert file_size == 0 #difference between sql files should be 0 characters
 
+def test_toml_reader():
+    reader = Sqlite("toml-test.db")
+    reader.tomlToSqlite("../../../examples/data/schema.toml", "toml-test", deleteSql=False)
+    subprocess.run(["diff", "../../../examples/data/compare-schema.sql", "toml-test.sql"], stdout=open("compare_sql.txt", "w"))
+    file_size = os.path.getsize("compare_sql.txt")
     assert file_size == 0 #difference between sql files should be 0 characters
diff --git a/dsi/plugins/file_writer.py b/dsi/plugins/file_writer.py
@@ -40,14 +40,11 @@ def export_erd(self, dbname, fname):
 
         `dbname`: database to create an ER diagram for
 
-        `fname`: name (including path) of the png file that contains the generated ER diagram
+        `fname`: name (including path) of the image file that contains the generated ER diagram - default png if not specified
 
         `return`: none
         """
         db = sqlite3.connect(dbname)
-
-        # if fname[-4:] == ".dot":
-        #     fname = fname[:-4]
 
         file_type = ".png"
         if fname[-4:] == ".png" or fname[-4:] == ".pdf" or fname[-4:] == ".jpg":
@@ -57,8 +54,6 @@ def export_erd(self, dbname, fname):
             file_type = fname[-5:]
             fname = fname[:-5]
 
-        # if fname[-4:] == ".dot":
-        #     fname = fname[:-4]
         dot_file = open(fname + ".dot", "w")
 
         numColsERD = 1

diff --git a/dsi/tests/test_plugin.py b/dsi/tests/test_plugin.py
@@ -2,6 +2,7 @@
 import cv2
 import sqlite3
 import numpy as np
+import os
 
 def test_export_db_erd():
 
@@ -14,10 +15,9 @@ def test_export_db_erd():
     connection.commit()
     connection.close()
 
-    erd = fw.ER_Diagram("test.db")
-    erd.export_erd("test.db", "test1")
-
     er_image = cv2.imread("test1.png") 
+    pixel_mean = np.mean(er_image)
 
     assert er_image is not None #check if image generated at all
-    assert np.mean(er_image) != 255 #check if image is all white pixels (no diagram generated)
+    os.remove("test1.png")
+    assert pixel_mean != 255 #check if image is all white pixels (no diagram generated)
diff --git a/examples/data/compare-schema.sql b/examples/data/compare-schema.sql
@@ -0,0 +1,24 @@
+CREATE TABLE math ( specification VARCHAR, a INT, b VARCHAR, c FLOAT, d INT, e FLOAT, f FLOAT);
+
+CREATE TABLE math_units ( specification VARCHAR, a VARCHAR, b VARCHAR, c VARCHAR, d VARCHAR, e VARCHAR, f VARCHAR);
+
+INSERT INTO math_units VALUES( NULL, NULL, NULL, 'cm', NULL, NULL, NULL);
+
+INSERT INTO math VALUES( '!jack', 1, 'there is CM', 45.98, 2, 34.8, 0.0089);
+
+CREATE TABLE address ( specification VARCHAR, fileLoc VARCHAR, g VARCHAR, h VARCHAR, i INT, j INT, k INT, l FLOAT, m INT);
+
+CREATE TABLE address_units ( specification VARCHAR, fileLoc VARCHAR, g VARCHAR, h VARCHAR, i VARCHAR, j VARCHAR, k VARCHAR, l VARCHAR, m VARCHAR);
+
+INSERT INTO address_units VALUES( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+
+INSERT INTO address VALUES( '!sam', '/home/sam/lib/data', 'good memories', '556place street', 2, 3, 4, 1.0, 99);
+
+CREATE TABLE physics ( specification VARCHAR, n FLOAT, o VARCHAR, p INT, q VARCHAR, r INT, s FLOAT);
+
+CREATE TABLE physics_units ( specification VARCHAR, n VARCHAR, o VARCHAR, p VARCHAR, q VARCHAR, r VARCHAR, s VARCHAR);
+
+INSERT INTO physics_units VALUES( NULL, 'm / s / s', NULL, 's', NULL, 'million grams', NULL);
+
+INSERT INTO physics VALUES( '!amy', 9.8, 'gravity', 23, 'home 23', 1, -0.0012);
+
diff --git a/examples/data/schema.toml b/examples/data/schema.toml
@@ -0,0 +1,28 @@
+[math]
+specification = "!jack"
+a = 1
+b = "there is CM"
+c = ["45.98", "cm"]
+d = 2
+e = 34.8
+f = 89.0e-4
+
+[address]
+specification = "!sam"
+fileLoc = '/home/sam/lib/data'
+g = "good memories"
+h = "556place street"
+i = 2
+j = 3 
+k = 4
+l = 10000.0e-4
+m = 99
+
+[physics]
+specification = "!amy"
+n = ["9.8", "m / s / s"]
+o = "gravity"
+p = ["23", "s"]
+q = "home 23"
+r = ['1', 'million grams']
+s = -12.0e-4