Skip to content

Commit

Permalink
Merge issue_423_selected; fix #406 #423 #580 #587
Browse files Browse the repository at this point in the history
- Add 'selected_samples' state to prevent Genotype based queries from failing due to too many SQLite clause terms
- Pass it to querybuilder
- Log message instead of letting the unrecoverable error occur
- Fix group by widget related issues
- Fix report related issues
- Update tests
  • Loading branch information
SamuelNicaise authored and bioinfo committed Oct 31, 2023
1 parent e19d32a commit 7faa03a
Show file tree
Hide file tree
Showing 11 changed files with 116 additions and 160 deletions.
5 changes: 5 additions & 0 deletions cutevariant/core/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def select_cmd(
having={}, # {"op":">", "value": 3 }
limit=50,
offset=0,
selected_samples=[],
**kwargs,
):
"""Select query Command
Expand Down Expand Up @@ -76,9 +77,11 @@ def select_cmd(
offset=offset,
group_by=group_by,
having=having,
selected_samples=selected_samples,
**kwargs,
)
LOGGER.debug("command:select_cmd:: %s", query)
print("cmd:select_cmd:: %s", query)
for i in conn.execute(query):
# THIS IS INSANE... SQLITE DOESNT RETURN ALIAS NAME WITH SQUARE BRACKET....
# I HAVE TO replace [] by () and go back after...
Expand All @@ -95,6 +98,7 @@ def count_cmd(
filters={},
group_by=[],
having={},
selected_samples=[],
**kwargs,
):
"""Count command
Expand Down Expand Up @@ -140,6 +144,7 @@ def count_cmd(
order_by=None,
group_by=group_by,
having=having,
selected_samples=selected_samples,
**kwargs,
)

Expand Down
35 changes: 24 additions & 11 deletions cutevariant/core/querybuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from functools import lru_cache

# Custom imports
from cutevariant.config import Config
from cutevariant.core import sql

import cutevariant.constants as cst
Expand Down Expand Up @@ -129,7 +130,6 @@ def is_annotation_join_required(fields, filters, order_by=None) -> bool:
return True

for condition in filters_to_flat(filters):

condition = list(condition.keys())[0]
if condition.startswith("ann."):
return True
Expand Down Expand Up @@ -217,7 +217,6 @@ def samples_join_required(fields, filters, order_by=None) -> list:


def fields_to_vql(fields) -> list:

vql_fields = []
for field in fields:
if field.startswith("samples."):
Expand Down Expand Up @@ -272,7 +271,6 @@ def fields_to_sql(fields, use_as=False) -> list:
sql_fields = []

for field in fields:

if field.startswith("ann."):
sql_field = f"`annotations`.`{field[4:]}`"
if use_as:
Expand Down Expand Up @@ -388,15 +386,13 @@ def condition_to_sql(item: dict, samples=None) -> str:
condition = ""

if table == "samples":

if name == "$any":
operator = "OR"

if name == "$all":
operator = "AND"

if operator and samples:

condition = (
"("
+ f" {operator} ".join(
Expand Down Expand Up @@ -519,9 +515,9 @@ def remove_field_in_filter(filters: dict, field: str = None) -> dict:
Returns:
dict: New filters dict with field removed
"""

# ---------------------------------
def recursive(obj):

output = {}
for k, v in obj.items():
if k in ["$and", "$or"]:
Expand Down Expand Up @@ -568,9 +564,9 @@ def filters_to_sql(filters: dict, samples=None) -> str:
Returns:
str: A sql where expression
"""

# ---------------------------------
def recursive(obj):

conditions = ""
for k, v in obj.items():
if k in ["$and", "$or"]:
Expand Down Expand Up @@ -617,9 +613,9 @@ def filters_to_vql(filters: dict) -> str:
Returns:
str: A sql where expression
"""

# ---------------------------------
def recursive(obj):

conditions = ""
for k, v in obj.items():
if k in ["$and", "$or"]:
Expand Down Expand Up @@ -691,10 +687,17 @@ def build_sql_query(
offset (int): record count per page
group_by (list/None): list of field you want to group
"""

# get samples ids

samples_ids = {i["name"]: i["id"] for i in sql.get_samples(conn)}
if selected_samples: # value can be None or list
if len(selected_samples) > 0:
samples_ids = {
i["name"]: i["id"] for i in sql.get_samples(conn) if i["name"] in selected_samples
}
else:
samples_ids = {i["name"]: i["id"] for i in sql.get_samples(conn)}
else:
samples_ids = {i["name"]: i["id"] for i in sql.get_samples(conn)}

# Create fields
sql_fields = ["`variants`.`id`"] + fields_to_sql(fields, use_as=True)
Expand Down Expand Up @@ -756,6 +759,17 @@ def build_sql_query(
if limit:
sql_query += f" LIMIT {limit} OFFSET {offset}"

# prevent the "too many FROM clause term, max 200" error
MAX_SAMPLES_DEFAULT = 100
config = Config("app")
max_samples = config.get("max_samples_in_query", MAX_SAMPLES_DEFAULT)
if len(samples_ids) > max_samples:
LOGGER.debug(f"failed query: {sql_query}")
LOGGER.error(
f"QUERY FAILED because too many samples in query. Expected {max_samples} max, got instead: {len(samples_ids)}"
)
return "SELECT * FROM variants WHERE 0 = 1 LIMIT 1" # bogus query to return 0 rows

return sql_query


Expand All @@ -766,7 +780,6 @@ def build_vql_query(
order_by=[],
**kwargs,
):

select_clause = ",".join(fields_to_vql(fields))

where_clause = filters_to_vql(filters)
Expand Down
7 changes: 5 additions & 2 deletions cutevariant/core/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def __init__(self, conn: sqlite3.Connection, sample_id: int):
super().__init__(conn)
self._sample_id = sample_id
self._variant_classif_threshold = 1
self._sample = sql.get_sample(self._conn, self._sample_id)

def set_variant_classif_threshold(self, threshold: int):
self._variant_classif_threshold = threshold
Expand Down Expand Up @@ -107,11 +108,12 @@ def get_stats(self) -> dict:
"$and": [
{
"samples."
+ sql.get_sample(self._conn, self._sample_id)["name"]
+ self._sample["name"]
+ ".gt": {"$gt": 0}
}
]
},
[self._sample["name"]]
):
# if classif is not defined in config, keep the number by default
row = [variant_classifs.get(row["classification"], row["classification"]), row["count"]]
Expand Down Expand Up @@ -159,11 +161,12 @@ def get_variants(self) -> typing.List[dict]:
"$and": [
{
"samples."
+ sql.get_sample(self._conn, self._sample_id)["name"]
+ self._sample["name"]
+ ".classification": {"$gte": self._variant_classif_threshold}
}
]
},
selected_samples= [self._sample["name"]]
)
variants = []
for var_id in variants_ids:
Expand Down
Loading

0 comments on commit 7faa03a

Please sign in to comment.