Skip to content

Commit

Permalink
Merge branch 'main' into CI_testing
Browse files Browse the repository at this point in the history
  • Loading branch information
jpulidojr authored Sep 5, 2024
2 parents 28f97e4 + fd5402c commit 3b7b44a
Show file tree
Hide file tree
Showing 9 changed files with 207 additions and 96 deletions.
158 changes: 113 additions & 45 deletions dsi/backends/sqlite.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import csv
import sqlite3
import yaml
import json
import re
import yaml
import subprocess
import os
import toml

from dsi.backends.filesystem import Filesystem

Expand All @@ -13,6 +15,7 @@
STRING = "VARCHAR"
FLOAT = "FLOAT"
INT = "INT"
JSON = "TEXT"

# Holds table name and data properties

Expand Down Expand Up @@ -87,7 +90,6 @@ def put_artifact_type(self, types, isVerbose=False):
if isVerbose:
print(str_query)

print(str_query)
self.cur.execute(str_query)
self.con.commit()

Expand Down Expand Up @@ -223,6 +225,53 @@ def put_artifacts_lgcy(self,artifacts, isVerbose=False):
self.cur.execute(str_query)
self.con.commit()

def put_artifacts_json(self, fname, tname, isVerbose=False):
"""
Function for insertion of Artifact metadata into a defined schema by using a JSON file
`fname`: filepath to the .json file to be read and inserted into the database
`tname`: String name of the table to be inserted
`return`: none
"""

json_str = None
try:
j = open(fname)
data = json.load(j)
json_str = json.dumps(data)
json_str = "'" + json_str + "'"
j.close()
except IOError as i:
print(i)
return
except ValueError as v:
print(v)
return

types = DataType()
types.properties = {}
types.name = tname

# Check if this has been defined from helper function
if self.types != None:
types.name = self.types.name

col_name = re.sub(r'.json', '', fname)
col_name = re.sub(r'.*/', '', col_name)
col_name = "'" + col_name + "'"
types.properties[col_name] = JSON

self.put_artifact_type(types)
col_names = ', '.join(types.properties.keys())
str_query = "INSERT INTO {} ({}) VALUES ({});".format(str(types.name), col_names, json_str)
if isVerbose:
print(str_query)

self.types = types
self.cur.execute(str_query)
self.con.commit()

# Adds columns and rows automaticallly based on a csv file
#[NOTE 3] This method should be deprecated in favor of put_artifacts.
def put_artifacts_csv(self, fname, tname, isVerbose=False):
Expand Down Expand Up @@ -499,31 +548,16 @@ def query_fctime(self, operator, ctime, isVerbose=False):

return resout

class YamlReader():

def __init__(self):
pass

def yamlToSqlite(self, filename, db_name):
def yamlToSqlite(self, filename, db_name, deleteSql=True):
"""
Function that should be called externally to create the sqlite database file and delete temporary sql file used to ingest data
Function that ingests a YAML file into a sqlite database based on the given database name
`filename`: name of YAML file that is ingested
`db_name`: name of database that YAML file should be added to. Database will be created if it does not exist in local directory.
"""
self.yaml_to_db(filename, db_name)
os.remove(db_name+".sql")

def yaml_to_db(self, filename, db_name):
"""
DO NOT CALL EXTERNALLY EXCEPT FOR TESTING
Function creates/adds to a sqlite db file from a given YAML file with specified database name
`filename`: name of YAML file that is ingested
`db_name`: name of database that YAML file should be added to. Database will be created if it does not exist in local directory.
`deleteSql`: flag to delete temp SQL file that creates the database. Default is True, but change to False for testing or comparing outputs
"""

with open(filename, 'r') as yaml_file, open(db_name+".sql", "w") as sql_file:
Expand All @@ -533,20 +567,18 @@ def yaml_to_db(self, filename, db_name):
yml_data = yaml.safe_load_all(editedString)

for table in yml_data:
cols = table['columns'].keys()
vals = table['columns'].values()
tableName = table["segment"]

data_types = {float: "REAL", str: "TEXT", int: "INTEGER"}
if not os.path.isfile(db_name+".db"):
data_types = {float: "FLOAT", str: "VARCHAR", int: "INT"}
if not os.path.isfile(db_name+".db") or os.path.getsize(db_name+".db") == 0:
createStmt = f"CREATE TABLE {tableName} ( "
createUnitStmt = f"CREATE TABLE {tableName}_units ( "
insertUnitStmt = f"INSERT INTO {tableName}_units {tuple(cols)} VALUES( "
insertUnitStmt = f"INSERT INTO {tableName}_units VALUES( "

for key, val in table['columns'].items():
createUnitStmt+= f"{key} TEXT, "
if data_types[type(val)] == "TEXT" and self.check_type(val[:val.find(" ")]) in ["INTEGER", "REAL"]:
createStmt += f"{key} {self.check_type(val[:val.find(" ")])}, "
createUnitStmt+= f"{key} VARCHAR, "
if data_types[type(val)] == "VARCHAR" and self.check_type(val[:val.find(" ")]) in [" INT", " FLOAT"]:
createStmt += f"{key}{self.check_type(val[:val.find(" ")])}, "
insertUnitStmt+= f"'{val[val.find(" ")+1:]}', "
else:
createStmt += f"{key} {data_types[type(val)]}, "
Expand All @@ -556,11 +588,11 @@ def yaml_to_db(self, filename, db_name):
sql_file.write(createUnitStmt[:-2] + ");\n\n")
sql_file.write(insertUnitStmt[:-2] + ");\n\n")

insertStmt = f"INSERT INTO {tableName} {tuple(cols)} VALUES( "
for val in vals:
if data_types[type(val)] == "TEXT" and self.check_type(val[:val.find(" ")]) in ["INTEGER", "REAL"]:
insertStmt = f"INSERT INTO {tableName} VALUES( "
for val in table['columns'].values():
if data_types[type(val)] == "VARCHAR" and self.check_type(val[:val.find(" ")]) in [" INT", " FLOAT"]:
insertStmt+= f"{val[:val.find(" ")]}, "
elif data_types[type(val)] == "TEXT":
elif data_types[type(val)] == "VARCHAR":
insertStmt+= f"'{val}', "
else:
insertStmt+= f"{val}, "
Expand All @@ -569,20 +601,56 @@ def yaml_to_db(self, filename, db_name):

subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r"))

def check_type(self, text):
if deleteSql == True:
os.remove(db_name+".sql")

def tomlToSqlite(self, filename, db_name, deleteSql=True):
"""
Tests input text and returns a predicted compatible SQL Type
Function that ingests a TOML file into a sqlite database based on the given database name
`text`: text string
`filename`: name of TOML file that is ingested
`return`: string description of a SQL data type
`db_name`: name of database that TOML file should be added to. Database will be created if it does not exist in local directory.
`deleteSql`: flag to delete temp SQL file that creates the database. Default is True, but change to False for testing or comparing outputs
"""
try:
value = int(text)
return "INTEGER"
except ValueError:
try:
value = float(text)
return "REAL"
except ValueError:
return "TEXT"
with open(filename, 'r') as toml_file, open(db_name+".sql", "w") as sql_file:
data = toml.load(toml_file)

for tableName, tableData in data.items():
data_types = {float: "FLOAT", str: "VARCHAR", int: "INT"}

if not os.path.isfile(db_name+".db") or os.path.getsize(db_name+".db") == 0:
createStmt = f"CREATE TABLE {tableName} ( "
createUnitStmt = f"CREATE TABLE {tableName}_units ( "
insertUnitStmt = f"INSERT INTO {tableName}_units VALUES( "

for key, val in tableData.items():
createUnitStmt+= f"{key} VARCHAR, "
if type(val) == list and type(val[0]) == str and self.check_type(val[0]) in [" INT", " FLOAT"]:
createStmt += f"{key}{self.check_type(val[0])}, "
insertUnitStmt+= f"'{val[1]}', "
else:
createStmt += f"{key} {data_types[type(val)]}, "
insertUnitStmt+= "NULL, "

sql_file.write(createStmt[:-2] + ");\n\n")
sql_file.write(createUnitStmt[:-2] + ");\n\n")
sql_file.write(insertUnitStmt[:-2] + ");\n\n")

insertStmt = f"INSERT INTO {tableName} VALUES( "
for val in tableData.values():
if type(val) == list and type(val[0]) == str and self.check_type(val[0]) in [" INT", " FLOAT"]:
insertStmt+= f"{val[0]}, "
elif type(val) == str:
insertStmt+= f"'{val}', "
else:
insertStmt+= f"{val}, "

sql_file.write(insertStmt[:-2] + ");\n\n")

subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r"))

if deleteSql == True:
os.remove(db_name+".sql")

40 changes: 31 additions & 9 deletions dsi/backends/tests/test_sqlite.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import git
from collections import OrderedDict
from dsi.backends.sqlite import Sqlite, DataType, YamlReader
from dsi.backends.sqlite import Sqlite, DataType
import os
import subprocess

Expand Down Expand Up @@ -38,6 +38,25 @@ def test_wildfiredata_artifact_put():
# No error implies success
assert True

def test_wildfiredata_artifact_put_t():
valid_middleware_datastructure = OrderedDict({'foo':[1,2,3],'bar':[3,2,1]})
dbpath = 'test_wildfiredata_artifact.sqlite_data'
store = Sqlite(dbpath)
store.put_artifacts_t(valid_middleware_datastructure, tableName="Wildfire")
store.close()
# No error implies success
assert True

#Data from: https://microsoftedge.github.io/Demos/json-dummy-data/64KB.json
def test_jsondata_artifact_put():
jsonpath = '/'.join([get_git_root('.'), 'dsi/data/64KB.json'])
dbpath = "jsondata.db"
store = Sqlite(dbpath)
store.put_artifacts_json(jsonpath, tname="JSONData")
store.close()
# No error implies success
assert True

def test_yosemite_data_csv_artifact():
csvpath = '/'.join([get_git_root('.'), 'dsi/data/yosemite5.csv'])
dbpath = "yosemite.db"
Expand All @@ -56,18 +75,21 @@ def test_artifact_query():
data_type.name = "simulation"
result = store.sqlquery("SELECT *, MAX(wind_speed) AS max_windspeed FROM " +
str(data_type.name) + " GROUP BY safe_unsafe_fire_behavior")
store.export_csv(result, "query.csv")
store.export_csv(result, "TABLENAME", "query.csv")
store.close()
# No error implies success
assert True

def test_yaml_reader():
reader = YamlReader()
reader.yaml_to_db("../../../examples/data/schema.yml", "vedant-test")
subprocess.run(["diff", "../../../examples/data/compare-yml.sql", "vedant-test.sql"], stdout=open("output.txt", "w"))
file_size = os.path.getsize("output.txt")
os.remove("output.txt")
os.remove("vedant-test.sql")
os.remove("vedant-test.db")
reader = Sqlite("yaml-test.db")
reader.yamlToSqlite("../../../examples/data/schema.yml", "yaml-test", deleteSql=False)
subprocess.run(["diff", "../../../examples/data/compare-schema.sql", "yaml-test.sql"], stdout=open("compare_sql.txt", "w"))
file_size = os.path.getsize("compare_sql.txt")
assert file_size == 0 #difference between sql files should be 0 characters

def test_toml_reader():
reader = Sqlite("toml-test.db")
reader.tomlToSqlite("../../../examples/data/schema.toml", "toml-test", deleteSql=False)
subprocess.run(["diff", "../../../examples/data/compare-schema.sql", "toml-test.sql"], stdout=open("compare_sql.txt", "w"))
file_size = os.path.getsize("compare_sql.txt")
assert file_size == 0 #difference between sql files should be 0 characters
7 changes: 1 addition & 6 deletions dsi/plugins/file_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,11 @@ def export_erd(self, dbname, fname):
`dbname`: database to create an ER diagram for
`fname`: name (including path) of the png file that contains the generated ER diagram
`fname`: name (including path) of the image file that contains the generated ER diagram - default png if not specified
`return`: none
"""
db = sqlite3.connect(dbname)

# if fname[-4:] == ".dot":
# fname = fname[:-4]

file_type = ".png"
if fname[-4:] == ".png" or fname[-4:] == ".pdf" or fname[-4:] == ".jpg":
Expand All @@ -57,8 +54,6 @@ def export_erd(self, dbname, fname):
file_type = fname[-5:]
fname = fname[:-5]

# if fname[-4:] == ".dot":
# fname = fname[:-4]
dot_file = open(fname + ".dot", "w")

numColsERD = 1
Expand Down
8 changes: 4 additions & 4 deletions dsi/tests/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import cv2
import sqlite3
import numpy as np
import os

def test_export_db_erd():

Expand All @@ -14,10 +15,9 @@ def test_export_db_erd():
connection.commit()
connection.close()

erd = fw.ER_Diagram("test.db")
erd.export_erd("test.db", "test1")

er_image = cv2.imread("test1.png")
pixel_mean = np.mean(er_image)

assert er_image is not None #check if image generated at all
assert np.mean(er_image) != 255 #check if image is all white pixels (no diagram generated)
os.remove("test1.png")
assert pixel_mean != 255 #check if image is all white pixels (no diagram generated)
24 changes: 24 additions & 0 deletions examples/data/compare-schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
CREATE TABLE math ( specification VARCHAR, a INT, b VARCHAR, c FLOAT, d INT, e FLOAT, f FLOAT);

CREATE TABLE math_units ( specification VARCHAR, a VARCHAR, b VARCHAR, c VARCHAR, d VARCHAR, e VARCHAR, f VARCHAR);

INSERT INTO math_units VALUES( NULL, NULL, NULL, 'cm', NULL, NULL, NULL);

INSERT INTO math VALUES( '!jack', 1, 'there is CM', 45.98, 2, 34.8, 0.0089);

CREATE TABLE address ( specification VARCHAR, fileLoc VARCHAR, g VARCHAR, h VARCHAR, i INT, j INT, k INT, l FLOAT, m INT);

CREATE TABLE address_units ( specification VARCHAR, fileLoc VARCHAR, g VARCHAR, h VARCHAR, i VARCHAR, j VARCHAR, k VARCHAR, l VARCHAR, m VARCHAR);

INSERT INTO address_units VALUES( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);

INSERT INTO address VALUES( '!sam', '/home/sam/lib/data', 'good memories', '556place street', 2, 3, 4, 1.0, 99);

CREATE TABLE physics ( specification VARCHAR, n FLOAT, o VARCHAR, p INT, q VARCHAR, r INT, s FLOAT);

CREATE TABLE physics_units ( specification VARCHAR, n VARCHAR, o VARCHAR, p VARCHAR, q VARCHAR, r VARCHAR, s VARCHAR);

INSERT INTO physics_units VALUES( NULL, 'm / s / s', NULL, 's', NULL, 'million grams', NULL);

INSERT INTO physics VALUES( '!amy', 9.8, 'gravity', 23, 'home 23', 1, -0.0012);

28 changes: 28 additions & 0 deletions examples/data/schema.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
[math]
specification = "!jack"
a = 1
b = "there is CM"
c = ["45.98", "cm"]
d = 2
e = 34.8
f = 89.0e-4

[address]
specification = "!sam"
fileLoc = '/home/sam/lib/data'
g = "good memories"
h = "556place street"
i = 2
j = 3
k = 4
l = 10000.0e-4
m = 99

[physics]
specification = "!amy"
n = ["9.8", "m / s / s"]
o = "gravity"
p = ["23", "s"]
q = "home 23"
r = ['1', 'million grams']
s = -12.0e-4
Loading

0 comments on commit 3b7b44a

Please sign in to comment.