Skip to content

Commit

Permalink
fix: various updates
Browse files Browse the repository at this point in the history
  • Loading branch information
CS76 committed Jul 30, 2024
1 parent f982a42 commit c49023e
Show file tree
Hide file tree
Showing 37 changed files with 2,102 additions and 1,106 deletions.
366 changes: 366 additions & 0 deletions app/Actions/Coconut/SearchMolecule.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,366 @@
<?php

namespace App\Actions\Coconut;

use Illuminate\Pagination\LengthAwarePaginator;
use Illuminate\Support\Facades\DB;

class SearchMolecule
{
public $query = '';

public $size = 20;

public $sort = null;

public $page = null;

public $tagType = null;

public $type = null;

public $collection = null;

public $organisms = null;

/**
* Search based on given query.
*/
public function query($query, $size, $type, $sort, $tagType)
{
$this->query = $query;
$this->size = $size;
$this->type = $type;
$this->sort = $sort;
$this->tagType = $tagType;

try {
set_time_limit(300);

$queryType = 'text';
$results = [];

if ($this->query == '') {
$this->type = '';
$this->tagType = '';
}

$offset = (($this->page ?? 1) - 1) * $this->size;

if ($this->type) {
$queryType = $this->type;
} else {
$queryType = $this->determineQueryType($this->query);
}
$queryType = strtolower($queryType);

$filterMap = $this->getFilterMap();

$statement = $this->buildStatement($queryType, $offset, $filterMap);

if ($statement) {
$results = $this->executeQuery($statement);
}

return $results;

} catch (QueryException $exception) {

return $this->handleException($exception);

}
}

/**
* Determine the query type based on the query pattern.
*/
private function determineQueryType($query)
{
$patterns = [
'inchi' => '/^((InChI=)?[^J][0-9BCOHNSOPrIFla+\-\(\)\\\\\/,pqbtmsih]{6,})$/i',
'inchikey' => '/^([0-9A-Z\-]+)$/i',
'smiles' => '/^([^J][0-9BCOHNSOPrIFla@+\-\[\]\(\)\\\\\/%=#$]{6,})$/i',
];

if (strpos($query, 'CNP') === 0) {
return 'identifier';
}

foreach ($patterns as $type => $pattern) {
if (preg_match_all($pattern, $query, $matches, PREG_SET_ORDER, 0)) {
if ($type == 'inchi' && substr($query, 0, 6) == 'InChI=') {
return 'inchi';
} elseif ($type == 'inchikey' && substr($query, 14, 1) == '-' && strlen($query) == 27) {
return 'inchikey';
} elseif ($type == 'smiles' && substr($query, 14, 1) != '-') {
return 'smiles';
}
}
}

return 'text';
}

/**
* Return a mapping of filter codes to database columns.
*/
private function getFilterMap()
{
return [
'mf' => 'molecular_formula',
'mw' => 'molecular_weight',
'hac' => 'heavy_atom_count',
'tac' => 'total_atom_count',
'arc' => 'aromatic_ring_count',
'rbc' => 'rotatable_bond_count',
'mrc' => 'minimal_number_of_rings',
'fc' => 'formal_charge',
'cs' => 'contains_sugar',
'crs' => 'contains_ring_sugars',
'cls' => 'contains_linear_sugars',
'npl' => 'np_likeness_score',
'alogp' => 'alogp',
'topopsa' => 'topo_psa',
'fsp3' => 'fsp3',
'hba' => 'h_bond_acceptor_count',
'hbd' => 'h_bond_donor_count',
'ro5v' => 'rule_of_5_violations',
'lhba' => 'lipinski_h_bond_acceptor_count',
'lhbd' => 'lipinski_h_bond_donor_count',
'lro5v' => 'lipinski_rule_of_5_violations',
'ds' => 'found_in_databases',
'class' => 'chemical_class',
'subclass' => 'chemical_sub_class',
'superclass' => 'chemical_super_class',
'parent' => 'direct_parent_classification',
];
}

/**
* Build the SQL statement based on the query type.
*/
private function buildStatement($queryType, $offset, $filterMap)
{
$statement = null;

switch ($queryType) {
case 'smiles':
case 'substructure':
$statement = "SELECT id, COUNT(*) OVER ()
FROM mols
WHERE m@>'{$this->query}'
LIMIT {$this->size} OFFSET {$offset}";

break;

case 'inchi':
$statement = "SELECT id, COUNT(*) OVER ()
FROM molecules
WHERE standard_inchi LIKE '%{$this->query}%'
LIMIT {$this->size} OFFSET {$offset}";
break;

case 'inchikey':
$statement = "SELECT id, COUNT(*) OVER ()
FROM molecules
WHERE standard_inchi_key LIKE '%{$this->query}%'
LIMIT {$this->size} OFFSET {$offset}";
break;

case 'exact':
$statement = "SELECT id, COUNT(*) OVER ()
FROM mols
WHERE m@='{$this->query}'
LIMIT {$this->size} OFFSET {$offset}";
break;

case 'similarity':
$statement = "SELECT id, COUNT(*) OVER ()
FROM fps
WHERE mfp2%morganbv_fp('{$this->query}')
ORDER BY morganbv_fp(mol_from_smiles('{$this->query}'))<%>mfp2
LIMIT {$this->size} OFFSET {$offset}";
break;

case 'identifier':
$statement = "SELECT id, COUNT(*) OVER ()
FROM molecules
WHERE (\"identifier\"::TEXT ILIKE '%{$this->query}%')
LIMIT {$this->size} OFFSET {$offset}";
break;

case 'tags':
$statement = $this->buildTagsStatement($offset);
break;

case 'filters':
$statement = $this->buildFiltersStatement($filterMap);
break;

default:
$statement = $this->buildDefaultStatement($offset);
break;
}

return $statement;
}

/**
* Build the SQL statement for 'tags' query type.
*/
private function buildTagsStatement($offset)
{
if ($this->tagType == 'dataSource') {
$this->collection = Collection::where('title', $this->query)->first();
if ($this->collection) {
return $this->collection->molecules()->orderBy('annotation_level', 'desc')->paginate($this->size);
} else {
return [];
}
} elseif ($this->tagType == 'organisms') {
$this->organisms = array_map('strtolower', array_map('trim', explode(',', $this->query)));

$organismIds = Organism::where(function ($query) {
foreach ($this->organisms as $name) {
$query->orWhereRaw('LOWER(name) = ?', [$name]);
}
})->pluck('id');

return Molecule::whereHas('organisms', function ($query) use ($organismIds) {
$query->whereIn('organism_id', $organismIds);
})->orderBy('annotation_level', 'DESC')->paginate($this->size);
} else {
return Molecule::withAnyTags([$this->query], $this->tagType)->paginate($this->size);
}
}

/**
* Build the SQL statement for 'filters' query type.
*/
private function buildFiltersStatement($filterMap)
{
$orConditions = explode('OR', $this->query);
$statement = 'SELECT molecule_id as id, COUNT(*) OVER ()
FROM properties WHERE ';

foreach ($orConditions as $index => $orCondition) {
if ($index > 0) {
$statement .= ' OR ';
}

$andConditions = explode(' ', trim($orCondition));
$statement .= '(';

foreach ($andConditions as $index => $andCondition) {
if ($index > 0) {
$statement .= ' AND ';
}

[$filterKey, $filterValue] = explode(':', $andCondition);

if (str_contains($filterValue, '..')) {
[$start, $end] = explode('..', $filterValue);
$statement .= "({$filterMap[$filterKey]} BETWEEN {$start} AND {$end})";
} elseif (in_array($filterValue, ['true', 'false'])) {
$statement .= "({$filterMap[$filterKey]} = {$filterValue})";
} elseif (str_contains($filterValue, '|')) {
$dbFilters = explode('|', $filterValue);
$dbs = explode('+', $dbFilters[0]);
$statement .= "({$filterMap[$filterKey]} @> '[\"".implode('","', $dbs)."\"]')";
} else {
$filterValue = str_replace('+', ' ', $filterValue);
$statement .= "(LOWER(REGEXP_REPLACE({$filterMap[$filterKey]}, '\\s+', '-', 'g'))::TEXT ILIKE '%{$filterValue}%')";
}
}

$statement .= ')';
}

return "{$statement} LIMIT {$this->size}";
}

/**
* Build the default SQL statement.
*/
private function buildDefaultStatement($offset)
{
if ($this->query) {
$this->query = str_replace("'", "''", $this->query);

return "
SELECT id, COUNT(*) OVER ()
FROM molecules
WHERE
(\"name\"::TEXT ILIKE '%{$this->query}%')
OR (\"synonyms\"::TEXT ILIKE '%{$this->query}%')
OR (\"identifier\"::TEXT ILIKE '%{$this->query}%')
ORDER BY
CASE
WHEN \"name\"::TEXT ILIKE '{$this->query}' THEN 1
WHEN \"synonyms\"::TEXT ILIKE '{$this->query}' THEN 2
WHEN \"identifier\"::TEXT ILIKE '{$this->query}' THEN 3
WHEN \"name\"::TEXT ILIKE '%{$this->query}%' THEN 4
WHEN \"synonyms\"::TEXT ILIKE '%{$this->query}%' THEN 5
WHEN \"identifier\"::TEXT ILIKE '%{$this->query}%' THEN 6
ELSE 7
END
LIMIT {$this->size} OFFSET {$offset}";
} else {
return "SELECT id, COUNT(*) OVER ()
FROM molecules
ORDER BY annotation_level DESC
LIMIT {$this->size} OFFSET {$offset}";
}
}

/**
* Execute the given SQL statement and return the results.
*/
private function executeQuery($statement)
{
$expression = DB::raw($statement);
$qString = $expression->getValue(DB::connection()->getQueryGrammar());
$hits = DB::select($qString);
$count = count($hits) > 0 ? $hits[0]->count : 0;

$ids = implode(',', collect($hits)->pluck('id')->toArray());

// dd($ids);

if ($ids != '') {
$statement = "SELECT * FROM molecules WHERE ID IN ({$ids})";
if ($this->sort == 'recent') {
$statement .= ' ORDER BY created_at DESC';
}
$expression = DB::raw($statement);
$string = $expression->getValue(DB::connection()->getQueryGrammar());

return new LengthAwarePaginator(DB::select($string), $count, $this->size, $this->page);
} else {
return new LengthAwarePaginator([], 0, $this->size, $this->page);
}
}

/**
* Handle exceptions by returning a proper JSON response.
*/
private function handleException(QueryException $exception)
{
$message = $exception->getMessage();
if (str_contains(strtolower($message), 'sqlstate[42p01]')) {
return response()->json(
[
'message' => 'It appears that the molecules table is not indexed. To enable search, please index molecules table and generate corresponding fingerprints.',
],
500
);
}

return response()->json(
[
'message' => $message,
],
500
);
}
}
10 changes: 9 additions & 1 deletion app/Console/Commands/DashWidgetsRefresh.php
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,15 @@ public function handle()

// Create the cache for all DashboardStatsMid widgets
Cache::rememberForever('stats.molecules', function () {
return DB::table('molecules')->selectRaw('count(*)')->get()[0]->count;
return DB::table('molecules')
->where('is_parent', false)
->where('has_stereo', false)
->count()
+ DB::table('molecules')
->where('is_parent', false)
->where('has_stereo', true)
->whereNotNull('parent_id')
->count();
});
Cache::rememberForever('stats.molecules.non_stereo', function () {
return DB::table('molecules')->selectRaw('count(*)')->whereRaw('has_stereo=false and is_parent=false')->get()[0]->count;
Expand Down
5 changes: 1 addition & 4 deletions app/Console/Commands/GenerateProperties.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,7 @@ public function handle()
Molecule::doesntHave('properties')->select('id')->chunk(30000, function ($mols) use (&$i) {
$batchJobs = [];
array_push($batchJobs, new GeneratePropertiesBatch($mols->pluck('id')->toArray()));
$batch = Bus::batch($batchJobs)->then(function (Batch $batch) {
})->catch(function (Batch $batch, Throwable $e) {
})->finally(function (Batch $batch) {
})->name('Generate Properties Batch:'.$i)
$batch = Bus::batch($batchJobs)->then(function (Batch $batch) {})->catch(function (Batch $batch, Throwable $e) {})->finally(function (Batch $batch) {})->name('Generate Properties Batch:'.$i)
->allowFailures(false)
->onConnection('redis')
->onQueue('default')
Expand Down
Loading

0 comments on commit c49023e

Please sign in to comment.