-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #257 from Steinbeck-Lab/main
Merge main into development
- Loading branch information
Showing
139 changed files
with
1,650 additions
and
428 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,397 @@ | ||
<?php | ||
|
||
namespace App\Actions\Coconut; | ||
|
||
use App\Models\Citation; | ||
use App\Models\Collection; | ||
use App\Models\Molecule; | ||
use App\Models\Organism; | ||
use Illuminate\Pagination\LengthAwarePaginator; | ||
use Illuminate\Support\Facades\DB; | ||
|
||
class SearchMolecule | ||
{ | ||
public $query = ''; | ||
|
||
public $size = 20; | ||
|
||
public $sort = null; | ||
|
||
public $page = null; | ||
|
||
public $tagType = null; | ||
|
||
public $type = null; | ||
|
||
public $collection = null; | ||
|
||
public $organisms = null; | ||
|
||
public $citations = null; | ||
|
||
/** | ||
* Search based on given query. | ||
*/ | ||
public function query($query, $size, $type, $sort, $tagType, $page) | ||
{ | ||
$this->query = $query; | ||
$this->size = $size; | ||
$this->type = $type; | ||
$this->sort = $sort; | ||
$this->tagType = $tagType; | ||
$this->page = $page; | ||
|
||
try { | ||
set_time_limit(300); | ||
|
||
$queryType = 'text'; | ||
$results = []; | ||
|
||
if ($this->query == '') { | ||
$this->type = ''; | ||
$this->tagType = ''; | ||
} | ||
|
||
$offset = (($this->page ?? 1) - 1) * $this->size; | ||
|
||
if ($this->type) { | ||
$queryType = $this->type; | ||
} else { | ||
$queryType = $this->determineQueryType($this->query); | ||
} | ||
$queryType = strtolower($queryType); | ||
|
||
$filterMap = $this->getFilterMap(); | ||
|
||
if ($queryType == 'tags') { | ||
$results = $this->buildTagsStatement($offset); | ||
} else { | ||
$statement = $this->buildStatement($queryType, $offset, $filterMap); | ||
if ($statement) { | ||
$results = $this->executeQuery($statement); | ||
} | ||
} | ||
|
||
return [$results, $this->collection, $this->organisms]; | ||
|
||
} catch (QueryException $exception) { | ||
|
||
return $this->handleException($exception); | ||
|
||
} | ||
} | ||
|
||
/** | ||
* Determine the query type based on the query pattern. | ||
*/ | ||
private function determineQueryType($query) | ||
{ | ||
$patterns = [ | ||
'inchi' => '/^((InChI=)?[^J][0-9BCOHNSOPrIFla+\-\(\)\\\\\/,pqbtmsih]{6,})$/i', | ||
'inchikey' => '/^([0-9A-Z\-]{27})$/i', // Modified to ensure exact length | ||
'smiles' => '/^([^J][0-9BCOHNSOPrIFla@+\-\[\]\(\)\\\\\/%=#$]{6,})$/i', | ||
]; | ||
|
||
if (strpos($query, 'CNP') === 0) { | ||
return 'identifier'; | ||
} | ||
|
||
foreach ($patterns as $type => $pattern) { | ||
if (preg_match($pattern, $query)) { | ||
if ($type == 'inchi' && substr($query, 0, 6) == 'InChI=') { | ||
return 'inchi'; | ||
} elseif ($type == 'inchikey' && substr($query, 14, 1) == '-' && strlen($query) == 27) { | ||
return 'inchikey'; | ||
} elseif ($type == 'smiles') { | ||
return 'smiles'; | ||
} | ||
} | ||
} | ||
|
||
return 'text'; | ||
} | ||
|
||
/** | ||
* Return a mapping of filter codes to database columns. | ||
*/ | ||
private function getFilterMap() | ||
{ | ||
return [ | ||
'mf' => 'molecular_formula', | ||
'mw' => 'molecular_weight', | ||
'hac' => 'heavy_atom_count', | ||
'tac' => 'total_atom_count', | ||
'arc' => 'aromatic_ring_count', | ||
'rbc' => 'rotatable_bond_count', | ||
'mrc' => 'minimal_number_of_rings', | ||
'fc' => 'formal_charge', | ||
'cs' => 'contains_sugar', | ||
'crs' => 'contains_ring_sugars', | ||
'cls' => 'contains_linear_sugars', | ||
'npl' => 'np_likeness_score', | ||
'alogp' => 'alogp', | ||
'topopsa' => 'topo_psa', | ||
'fsp3' => 'fsp3', | ||
'hba' => 'h_bond_acceptor_count', | ||
'hbd' => 'h_bond_donor_count', | ||
'ro5v' => 'rule_of_5_violations', | ||
'lhba' => 'lipinski_h_bond_acceptor_count', | ||
'lhbd' => 'lipinski_h_bond_donor_count', | ||
'lro5v' => 'lipinski_rule_of_5_violations', | ||
'ds' => 'found_in_databases', | ||
'class' => 'chemical_class', | ||
'subclass' => 'chemical_sub_class', | ||
'superclass' => 'chemical_super_class', | ||
'parent' => 'direct_parent_classification', | ||
'org' => 'organism', | ||
'cite' => 'ciatation', | ||
]; | ||
} | ||
|
||
/** | ||
* Build the SQL statement based on the query type. | ||
*/ | ||
private function buildStatement($queryType, $offset, $filterMap) | ||
{ | ||
$statement = null; | ||
|
||
switch ($queryType) { | ||
case 'smiles': | ||
case 'substructure': | ||
$statement = "SELECT id, m, | ||
tanimoto_sml(morganbv_fp(mol_from_smiles('{$this->query}')::mol), morganbv_fp(m::mol)) AS similarity, | ||
COUNT(*) OVER () AS count | ||
FROM mols | ||
WHERE m@> mol_from_smiles('{$this->query}')::mol | ||
ORDER BY similarity DESC | ||
LIMIT {$this->size} OFFSET {$offset}"; | ||
break; | ||
|
||
case 'inchi': | ||
$statement = "SELECT id, COUNT(*) OVER () | ||
FROM molecules | ||
WHERE standard_inchi LIKE '%{$this->query}%' | ||
LIMIT {$this->size} OFFSET {$offset}"; | ||
break; | ||
|
||
case 'inchikey': | ||
$statement = "SELECT id, COUNT(*) OVER () | ||
FROM molecules | ||
WHERE standard_inchi_key LIKE '%{$this->query}%' | ||
LIMIT {$this->size} OFFSET {$offset}"; | ||
break; | ||
|
||
case 'exact': | ||
$statement = "SELECT id, COUNT(*) OVER () | ||
FROM mols | ||
WHERE m@='{$this->query}' | ||
LIMIT {$this->size} OFFSET {$offset}"; | ||
break; | ||
|
||
case 'similarity': | ||
$statement = "SELECT id, COUNT(*) OVER () | ||
FROM fps | ||
WHERE mfp2%morganbv_fp('{$this->query}') | ||
ORDER BY morganbv_fp(mol_from_smiles('{$this->query}'))<%>mfp2 | ||
LIMIT {$this->size} OFFSET {$offset}"; | ||
break; | ||
|
||
case 'identifier': | ||
$statement = "SELECT id, COUNT(*) OVER () | ||
FROM molecules | ||
WHERE (\"identifier\"::TEXT ILIKE '%{$this->query}%') | ||
LIMIT {$this->size} OFFSET {$offset}"; | ||
break; | ||
|
||
case 'filters': | ||
$statement = $this->buildFiltersStatement($filterMap); | ||
break; | ||
|
||
default: | ||
$statement = $this->buildDefaultStatement($offset); | ||
break; | ||
} | ||
|
||
return $statement; | ||
} | ||
|
||
/** | ||
* Build the SQL statement for 'tags' query type. | ||
*/ | ||
private function buildTagsStatement($offset) | ||
{ | ||
if ($this->tagType == 'dataSource') { | ||
$this->collection = Collection::where('title', $this->query)->first(); | ||
if ($this->collection) { | ||
return $this->collection->molecules()->where('active', true)->where('is_parent', false)->orderBy('annotation_level', 'desc')->paginate($this->size); | ||
} else { | ||
return []; | ||
} | ||
} elseif ($this->tagType == 'organisms') { | ||
$query_organisms = array_map('strtolower', array_map('trim', explode(',', $this->query))); | ||
$this->organisms = Organism::where(function ($query) use ($query_organisms) { | ||
foreach ($query_organisms as $name) { | ||
$query->orWhereRaw('LOWER(name) LIKE ?', ['%'.strtolower($name).'%']); | ||
} | ||
})->get(); | ||
$organismIds = $this->organisms->pluck('id'); | ||
|
||
return Molecule::whereHas('organisms', function ($query) use ($organismIds) { | ||
$query->whereIn('organism_id', $organismIds); | ||
})->where('active', true)->where('is_parent', false)->orderBy('annotation_level', 'DESC')->paginate($this->size); | ||
} elseif ($this->tagType == 'citations') { | ||
$this->citations = array_map('strtolower', array_map('trim', explode(',', $this->query))); | ||
$citationIds = Citation::where(function ($query) { | ||
foreach ($this->citations as $name) { | ||
$query->orWhereRaw('LOWER(doi) LIKE ?', ['%'.strtolower($name).'%']) | ||
->orWhereRaw('LOWER(title) LIKE ?', ['%'.strtolower($name).'%']); | ||
} | ||
})->pluck('id'); | ||
|
||
return Molecule::whereHas('citations', function ($query) use ($citationIds) { | ||
$query->whereIn('citation_id', $citationIds); | ||
})->where('active', true)->where('is_parent', false)->orderBy('annotation_level', 'DESC')->paginate($this->size); | ||
} else { | ||
return Molecule::withAnyTags([$this->query], $this->tagType)->where('active', true)->where('is_parent', false)->paginate($this->size); | ||
} | ||
} | ||
|
||
/** | ||
* Build the SQL statement for 'filters' query type. | ||
*/ | ||
private function buildFiltersStatement($filterMap) | ||
{ | ||
$orConditions = explode('OR', $this->query); | ||
$statement = 'SELECT molecule_id as id, COUNT(*) OVER () | ||
FROM properties WHERE '; | ||
|
||
foreach ($orConditions as $index => $orCondition) { | ||
if ($index > 0) { | ||
$statement .= ' OR '; | ||
} | ||
|
||
$andConditions = explode(' ', trim($orCondition)); | ||
$statement .= '('; | ||
|
||
foreach ($andConditions as $index => $andCondition) { | ||
if ($index > 0) { | ||
$statement .= ' AND '; | ||
} | ||
|
||
[$filterKey, $filterValue] = explode(':', $andCondition); | ||
|
||
if (str_contains($filterValue, '..')) { | ||
[$start, $end] = explode('..', $filterValue); | ||
$statement .= "({$filterMap[$filterKey]} BETWEEN {$start} AND {$end})"; | ||
} elseif (in_array($filterValue, ['true', 'false'])) { | ||
$statement .= "({$filterMap[$filterKey]} = {$filterValue})"; | ||
} elseif (str_contains($filterValue, '|')) { | ||
$dbFilters = explode('|', $filterValue); | ||
$dbs = explode('+', $dbFilters[0]); | ||
$statement .= "({$filterMap[$filterKey]} @> '[\"".implode('","', $dbs)."\"]')"; | ||
} else { | ||
$filterValue = str_replace('+', ' ', $filterValue); | ||
$statement .= "(LOWER(REGEXP_REPLACE({$filterMap[$filterKey]}, '\\s+', '-', 'g'))::TEXT ILIKE '%{$filterValue}%')"; | ||
} | ||
} | ||
|
||
$statement .= ')'; | ||
} | ||
|
||
return "{$statement} LIMIT {$this->size}"; | ||
} | ||
|
||
/** | ||
* Build the default SQL statement. | ||
*/ | ||
private function buildDefaultStatement($offset) | ||
{ | ||
if ($this->query) { | ||
$this->query = str_replace("'", "''", $this->query); | ||
|
||
return " | ||
SELECT id, COUNT(*) OVER () | ||
FROM molecules | ||
WHERE | ||
((\"name\"::TEXT ILIKE '%{$this->query}%') | ||
OR (\"synonyms\"::TEXT ILIKE '%{$this->query}%') | ||
OR (\"identifier\"::TEXT ILIKE '%{$this->query}%')) | ||
AND is_parent = FALSE | ||
AND active = TRUE | ||
ORDER BY | ||
CASE | ||
WHEN \"name\"::TEXT ILIKE '{$this->query}' THEN 1 | ||
WHEN \"synonyms\"::TEXT ILIKE '{$this->query}' THEN 2 | ||
WHEN \"identifier\"::TEXT ILIKE '{$this->query}' THEN 3 | ||
WHEN \"name\"::TEXT ILIKE '%{$this->query}%' THEN 4 | ||
WHEN \"synonyms\"::TEXT ILIKE '%{$this->query}%' THEN 5 | ||
WHEN \"identifier\"::TEXT ILIKE '%{$this->query}%' THEN 6 | ||
ELSE 7 | ||
END | ||
LIMIT {$this->size} OFFSET {$offset}"; | ||
} else { | ||
return "SELECT id, COUNT(*) OVER () | ||
FROM molecules | ||
WHERE active = TRUE AND NOT (is_parent = TRUE AND has_variants = TRUE) | ||
ORDER BY annotation_level DESC | ||
LIMIT {$this->size} OFFSET {$offset}"; | ||
} | ||
} | ||
|
||
/** | ||
* Execute the given SQL statement and return the results. | ||
*/ | ||
private function executeQuery($statement) | ||
{ | ||
$expression = DB::raw($statement); | ||
$string = $expression->getValue(DB::connection()->getQueryGrammar()); | ||
$hits = DB::select($string); | ||
$count = count($hits) > 0 ? $hits[0]->count : 0; | ||
|
||
$ids_array = collect($hits)->pluck('id')->toArray(); | ||
$ids = implode(',', $ids_array); | ||
// dd($ids); | ||
|
||
if ($ids != '') { | ||
|
||
$statement = " | ||
SELECT identifier, canonical_smiles, annotation_level, name, iupac_name, organism_count, citation_count, geo_count, collection_count | ||
FROM molecules | ||
WHERE id = ANY (array[{$ids}]) | ||
ORDER BY array_position(array[{$ids}], id); | ||
"; | ||
if ($this->sort == 'recent') { | ||
$statement .= ' ORDER BY created_at DESC'; | ||
} | ||
$expression = DB::raw($statement); | ||
$string = $expression->getValue(DB::connection()->getQueryGrammar()); | ||
|
||
return new LengthAwarePaginator(DB::select($string), $count, $this->size, $this->page); | ||
} else { | ||
return new LengthAwarePaginator([], 0, $this->size, $this->page); | ||
} | ||
} | ||
|
||
/** | ||
* Handle exceptions by returning a proper JSON response. | ||
*/ | ||
private function handleException(QueryException $exception) | ||
{ | ||
$message = $exception->getMessage(); | ||
if (str_contains(strtolower($message), 'sqlstate[42p01]')) { | ||
return response()->json( | ||
[ | ||
'message' => 'It appears that the molecules table is not indexed. To enable search, please index molecules table and generate corresponding fingerprints.', | ||
], | ||
500 | ||
); | ||
} | ||
|
||
return response()->json( | ||
[ | ||
'message' => $message, | ||
], | ||
500 | ||
); | ||
} | ||
} |
Oops, something went wrong.