-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
df46a09
commit 01c6583
Showing
5 changed files
with
1,330 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import sys | ||
sys.path.append('../src') | ||
from util.SQLiteDBBulkLoader import SQLiteDBBulkLoader | ||
|
||
|
||
|
||
class mainSQLiteDBLoad(object): | ||
|
||
def __init__(self): | ||
|
||
|
||
# db login credentials and host info | ||
# this is a private text file for login credentials | ||
# Format: | ||
# host=<host_uri> | ||
# dbname=<db_name_usually_ramp> | ||
# username=<db_user_name_often_root> | ||
# conpass=<db_connection_password> | ||
self.dbPropsFile = "../config/ramp_db_props.txt" | ||
|
||
# config for tables to load | ||
# a tab delimited file indicating which tables to load. | ||
self.dbConfigFilePath = "../config/db_load_resource_config.txt" | ||
|
||
|
||
|
||
def loadDBAfterTruncatingTables(self, sqliteFile, incrementLevel = 'increment_patch_release', optionalVersionOveride = None, optionalVersionNote = None, truncateTables = False, tablesToKeep=['db_version', 'version_info']): | ||
|
||
|
||
|
||
################# DB Loading Instructions | ||
|
||
# Sets logging level | ||
|
||
# config file holds login credentials in this format: | ||
|
||
# pass the credentials object to the constructed rampDBBulLoader | ||
|
||
loader = SQLiteDBBulkLoader(self.dbPropsFile, sqliteFile) | ||
|
||
|
||
# truncate tables | ||
if truncateTables: | ||
loader.truncateTables(tablesToSkip=tablesToKeep) | ||
|
||
|
||
# update methods | ||
# the sql_resource_config.txt is a tab delimited file indicating which resources to load | ||
# those marked as 'ready' will be updated. Usually all database tables are updated in one run. | ||
# this method loads the intermediate parsing results from the ../../misc/sql/ directory. | ||
loader.load(self.dbConfigFilePath) | ||
|
||
# update Ontology Metabolite counts | ||
loader.updateOntologyMetaboliteCounts() | ||
|
||
# update Source pathwayCount | ||
loader.updateSourcePathwayCount() | ||
|
||
# sets the new updated version | ||
loader.updateDBVersion(incrementLevel = incrementLevel, optionalVersion = optionalVersionOveride, optionalNote = optionalVersionNote) | ||
|
||
# sets the analyte intercept json in the version table. | ||
# precondition: the updateDBVersion must have been set so that the | ||
# intersections can be attached to the current version | ||
loader.updateEntityIntersects() | ||
|
||
# this optional method tracks database version information supplied in this file. | ||
loader.updateVersionInfo("../config/ramp_resource_version_update.txt") | ||
|
||
# this method populates a table that reflects the current status of the database. | ||
# metrics such as gene and metabolite counts for reach data sets are tallied. | ||
loader.updateDataStatusSummary() | ||
|
||
# generate pathway similarity matrices, analyte lists and whatnot | ||
# this process replaced the old system of having Rdata in the package | ||
loader.generateAndLoadRampSupplementalData() | ||
|
||
loader = mainSQLiteDBLoad() | ||
|
||
# increment level 'increment_patch_release', 'increment_minor_release', | ||
# or 'specified' (new version, perhaps major release) | ||
loader.loadDBAfterTruncatingTables(sqliteFile = '../RaMP_SQLite_v2.3.0_Structure.sqlite', incrementLevel = 'specified', | ||
optionalVersionOveride = "2.3.0", | ||
optionalVersionNote = "20230727 data update/refresh release", | ||
truncateTables=True) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
''' | ||
Created on Aug 2, 2023 | ||
@author: braistedjc | ||
''' | ||
import pandas as pd | ||
from sqlalchemy import create_engine | ||
from sqlalchemy import MetaData | ||
from sklearn.metrics.pairwise import pairwise_distances | ||
|
||
class RampSupplementalDataBuilder(object): | ||
''' | ||
classdocs | ||
''' | ||
|
||
|
||
def __init__(self, dbType, credInfo): | ||
''' | ||
Constructor | ||
''' | ||
# the type of DB, MySQL or SQLite | ||
self.dbType = dbType | ||
|
||
# a MySQL RaMP db_properties file, or an SQLite DB file | ||
self.credInfo = credInfo | ||
|
||
# sqlalchemy engine to provide connections to DB | ||
self.engine = None | ||
|
||
if self.dbType == 'sqlite': | ||
self.engine = self.createSQLiteEngine(self.credInfo) | ||
|
||
# all analyte pathway similarity matrix | ||
self.analyteResult = None | ||
|
||
# all analyte pathway similarity matrix | ||
self.metsResult = None | ||
|
||
# all analyte pathway similarity matrix | ||
self.genesResult = None | ||
|
||
|
||
|
||
def createSQLiteEngine(self, sqliteFile=None): | ||
engine = create_engine('sqlite:///'+sqliteFile, echo=False) | ||
return engine | ||
|
||
def listTables(self): | ||
if self.dbType == 'mysql': | ||
sql = 'show tables' | ||
elif self.dbType == 'sqlite': | ||
sql = "SELECT name FROM sqlite_master WHERE type ='table' AND name NOT LIKE 'sqlite_%'"; | ||
else: | ||
print("Unsupported DB Type: " + self.dbType) | ||
return | ||
|
||
with self.engine.connect() as conn: | ||
tables = conn.execute(sql).all() | ||
tables = pd.DataFrame(tables) | ||
print("tables shape:" + str(tables.shape)) | ||
print(tables) | ||
conn.close() | ||
|
||
def buildPathwaySimilarityMatrices(self): | ||
x = None | ||
|
||
def buildAnalyteSetStats(self): | ||
x = None | ||
|
||
def buildSimilarityMatrix(self, matrixType): | ||
df = None | ||
|
||
analyteKey = 'RAMP_%' | ||
minPathwaySize = 10 | ||
|
||
if matrixType == 'mets': | ||
analyteKey = 'RAMP_C%' | ||
minPathwaySize = 5 | ||
elif matrixType == 'genes': | ||
analyteKey = 'RAMP_G%' | ||
minPathwaySize = 5 | ||
|
||
sql = "select ap.pathwayRampId, ap.rampID from analytehaspathway ap, pathway p "\ | ||
"where p.type != 'hmdb' and ap.pathwayRampId = p.pathwayRampId and ap.rampId like '" + analyteKey + "'" | ||
|
||
with self.engine.connect() as conn: | ||
df = conn.execute(sql).all() | ||
df = pd.DataFrame(df) | ||
df.columns = ['pathwayRampId', 'rampId'] | ||
print(df.shape) | ||
print(list(df.columns)) | ||
|
||
crossTab = pd.crosstab(df['rampId'], df['pathwayRampId']) | ||
ctSums = crossTab.sum(axis=0) | ||
pwSubset = ctSums[ctSums >= minPathwaySize] | ||
|
||
pwNames = pwSubset.index.values.tolist() | ||
crossTab = crossTab.loc[:,pwNames] | ||
|
||
dm = 1.0 - pairwise_distances(crossTab.T.to_numpy(), metric='jaccard') | ||
|
||
|
||
dm = pd.DataFrame(dm) | ||
|
||
dm.columns = crossTab.columns | ||
dm.index = crossTab.columns | ||
|
||
conn.close() | ||
|
||
return dm | ||
|
||
|
||
|
||
def buildAnalyteSet(self, dataSource, geneOrMet): | ||
|
||
print("building analyte stat set") | ||
|
||
rampIdPrefix = "RAMP_C%" | ||
if geneOrMet == 'genes': | ||
rampIdPrefix = "RAMP_G%" | ||
|
||
sql = "select ap.pathwayRampId, count(distinct(ap.rampId)) as Freq, p.type as pathwaySource "\ | ||
"from analytehaspathway ap, pathway p "\ | ||
"where p.type = '" + dataSource + "' and ap.pathwayRampId = p.pathwayRampId and ap.rampId like '" + rampIdPrefix + "' group by ap.pathwayRampId" | ||
|
||
df = None | ||
|
||
with self.engine.connect() as conn: | ||
df = conn.execute(sql).all() | ||
df = pd.DataFrame(df) | ||
|
||
print("Stats shape") | ||
print(df.shape) | ||
print("Stats header") | ||
print(df.columns) | ||
|
||
conn.close() | ||
|
||
return df | ||
|
||
|
||
#pwob = PathwayOverlapBuilder(dbType = "sqlite", credInfo = "X:\\braistedjc\\tmp_work\\RaMP_SQLite_v2.3.0_Structure.sqlite") | ||
#pwob.listTables() | ||
#pwob.buildBaseMatrix(matrixType = "analytes") | ||
# pwob.buildSimilarityMatrix(matrixType = "genes") | ||
|
||
#pwob.buildAnalyteSet("wiki", "met") | ||
#pwob.buildAnalyteSet("wiki", "gene") | ||
|
||
#pwob.buildAnalyteSet("reactome", "met") | ||
#pwob.buildAnalyteSet("reactome", "gene") | ||
|
||
#pwob.buildAnalyteSet("hmdb", "met") | ||
# pwob.buildAnalyteSet("hmdb", "gene") | ||
#pwob.buildBaseMatrix(matrixType = "genes") |
Oops, something went wrong.