Skip to content

Commit

Permalink
Merge pull request #148 from Ensembl/feat/expand_metadata
Browse files Browse the repository at this point in the history
Expand `genomes` results to include assembly and linked objects
  • Loading branch information
bilalebi authored Jul 22, 2024
2 parents 995716d + 1274e8c commit 31697f3
Show file tree
Hide file tree
Showing 13 changed files with 393 additions and 87 deletions.
11 changes: 8 additions & 3 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Workflow name
name: PyTest, Black and Pylint
name: PyTest, Black, Pylint and Mypy

# Controls when the workflow will run
on:
Expand All @@ -17,7 +17,7 @@ jobs:
# This workflow contains a single job called "tests"
tests:
# The type of runner that the job will run on and timeout in minutes
name: Run Python Tests, Black formatter and Pylint
name: Run Python Tests, Black formatter, Pylint and Mypy
runs-on: ubuntu-latest
timeout-minutes: 10

Expand All @@ -32,7 +32,7 @@ jobs:
- name: Check out repository code
uses: actions/checkout@v3

# Set up Python version from the matrix
# Set up Python version from the matrix
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
Expand All @@ -59,3 +59,8 @@ jobs:
- name: Run Pylint
run: |
pylint $(git ls-files '*.py') --fail-under=9.5
# Run Mypy
- name: Run Mypy type checker
run: |
mypy graphql_service
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ venv/
ENV/
env.bak/
venv.bak/
node_modules/
appenv/

# editor cruft
Expand Down
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,16 @@ To install dependencies, run:
Rename example_connections.conf to connections.conf and update the config values accordingly.

This command will start the server:
```
uvicorn --workers 1 --host=0.0.0.0 graphql_service.server:APP
```

```uvicorn --workers 1 --host=0.0.0.0 graphql_service.server:APP```

To run a Uvicorn server with automatic reload for development purposes, you can use the --reload flag. This flag will make Uvicorn watch your code for changes and automatically restart the server when it detects any changes.
```
uvicorn --workers 1 --host 0.0.0.0 --reload graphql_service.server:APP
```

If you're developing in PyCharm, you will probably find it useful to create a run
Also, if you're developing in PyCharm, you will probably find it useful to create a run
configuration so that you can use the debugger. Create a run configuration that
looks like this:

Expand Down
28 changes: 17 additions & 11 deletions common/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import grpc
from ensembl.production.metadata.grpc import ensembl_metadata_pb2_grpc

from common.utils import process_release_version

logger = logging.getLogger(__name__)


Expand All @@ -33,26 +35,30 @@ def __init__(self, config):
self.config = config
self.mongo_client = MongoDbClient.connect_mongo(self.config)

def get_database_conn(self, grpc_model, uuid):
def get_database_conn(self, grpc_model, uuid, force_grpc=False):
grpc_response = None
chosen_db = self.config.get("mongo_default_db")
# Try to connect to gRPC
try:
grpc_response = grpc_model.get_release_by_genome_uuid(uuid)
except Exception as grpc_exp:
# chosen_db value will fall back to the default value, which is 'mongo_default_db' that is in the config
# TODO: check why "except graphql.error.graphql_error.GraphQLError as grpc_exp:" didn't catch the error
logger.debug(
"[get_database_conn] Couldn't connect to gRPC Host: %s", grpc_exp
)

if grpc_response:
logger.debug("[get_database_conn] grpc_response: %s", grpc_response)
# replacing '.' with '_' to avoid
# "pymongo.errors.InvalidName: database names cannot contain the character '.'" error ¯\_(ツ)_/¯
release_version = str(grpc_response.release_version).replace(".", "_")
logger.debug("[get_database_conn] release_version: %s", release_version)
chosen_db = "release_" + release_version
if force_grpc:
chosen_db = process_release_version(grpc_response)
else:
if grpc_response and grpc_response.release_version:
chosen_db = process_release_version(grpc_response)
else:
# chosen_db value will fall back to the default value, which is 'mongo_default_db' that is in the config
# if force_grpc is not True
logger.warning(
"[get_database_conn] Falling back to the default Mongo DB: '%s'",
chosen_db,
)

logger.debug("[get_database_conn] Connected to '%s' MongoDB", chosen_db)
data_database_connection = self.mongo_client[chosen_db]
Expand All @@ -68,8 +74,8 @@ def connect_mongo(config):
password = config.get("mongo_password")

client = pymongo.MongoClient(
host,
port,
host=host,
port=port,
username=user,
password=password,
read_preference=pymongo.ReadPreference.SECONDARY_PREFERRED,
Expand Down
11 changes: 11 additions & 0 deletions common/schemas/dataset.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
type Dataset {
dataset_id: String!
name: String!
release: Float!
type: String!
source: String!
dataset_type: String!
version: String
release_date: String!
release_type: String!
}
3 changes: 3 additions & 0 deletions common/schemas/genome.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,12 @@ type Genome {
assembly_accession: String!
scientific_name: String!
release_number: Float!
release_date: String!
taxon_id: Int!
tol_id: String
parlance_name: String
genome_tag: String
is_reference: Boolean!
assembly: Assembly
dataset: [Dataset]
}
19 changes: 13 additions & 6 deletions common/schemas/query.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ type Query {
by_slice: SliceInput): Locus
region(by_name: RegionNameInput!): Region

genomes(by_keyword: GenomeByKeywordInput,
by_assembly_accession_id: AssemblyAccessionIDInput): [Genome]

genomes(by_keyword: GenomeBySpecificKeywordInput): [Genome]

genome(by_genome_uuid: GenomeUUIDInput!): Genome

}

input SymbolInput {
symbol: String!
symbol: String
genome_id: String!
}

Expand Down Expand Up @@ -58,9 +58,16 @@ input GenomeUUIDInput {
release_version: Float
}

input GenomeByKeywordInput {
keyword: String!
release_version: Float
input GenomeBySpecificKeywordInput{
tolid: String
assembly_accession_id: String
assembly_name: String
ensembl_name: String
common_name: String
scientific_name: String
scientific_parlance_name: String
species_taxonomy_id: String
release_version: Float
}

input AssemblyAccessionIDInput {
Expand Down
55 changes: 55 additions & 0 deletions common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
limitations under the License.
"""

import logging
from typing import List

from graphql import GraphQLResolveInfo, FieldNode

logger = logging.getLogger(__name__)


def check_config_validity(config):
mandatory_fields = [
Expand All @@ -30,6 +37,28 @@ def check_config_validity(config):
)


def process_release_version(grpc_response):
"""
Processes the release version from the gRPC response and formats it for use as a database name.
This function extracts the release version from the provided gRPC response, replaces any dots ('.')
with underscores ('_') to avoid pymongo.errors.InvalidName errors, and returns a formatted database
name string.
Args:
grpc_response: The gRPC response object containing the release version.
Returns:
str: A formatted string suitable for use as a database name, prefixed with 'release_'.
"""
logger.debug("[get_database_conn] grpc_response: %s", grpc_response)
# replacing '.' with '_' to avoid
# "pymongo.errors.InvalidName: database names cannot contain the character '.'" error ¯\_(ツ)_/¯
release_version = str(grpc_response.release_version).replace(".", "_")
logger.debug("[get_database_conn] release_version: %s", release_version)
return "release_" + release_version


def get_ensembl_metadata_api_version():
"""
Get the Metadata API tag from requirement.txt file
Expand All @@ -43,3 +72,29 @@ def get_ensembl_metadata_api_version():
version = line.strip().split("@")[-1]
break
return version


def check_requested_fields(info: GraphQLResolveInfo, fields: List[str]) -> List[bool]:
"""
Check if specific fields are requested in the GraphQL query.
Args:
info (GraphQLResolveInfo): The GraphQL resolve information containing query details.
fields (List[str]): A list of field names to check for in the query.
Returns:
List[bool]: A list of booleans indicating whether each field is present in the query.
Usage example:
fields_to_check = ["assembly", "dataset"]
is_assembly_present, is_dataset_present = check_requested_fields(info, fields_to_check)
"""
requested_fields = []
if info.field_nodes:
selection_set = info.field_nodes[0].selection_set
if selection_set and selection_set.selections:
for field in selection_set.selections:
if isinstance(field, FieldNode) and field.name and field.name.value:
requested_fields.append(field.name.value)

return [field in requested_fields for field in fields]
37 changes: 36 additions & 1 deletion graphql_service/resolver/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"""
from typing import Optional, Dict

import grpc
from graphql import GraphQLError


Expand All @@ -22,11 +23,22 @@ class DatabaseNotFoundError(GraphQLError):
"""

def __init__(self, db_name: str):
self.extensions = {"code": f"DATABASE_NOT_FOUND"}
self.extensions = {"code": "DATABASE_NOT_FOUND"}
message = f"Failed to find database: {db_name}"
super().__init__(message, extensions=self.extensions)


class CollectionNotFoundError(GraphQLError):
"""
Custom error to be raised if collection is not found
"""

def __init__(self, collection_name: str):
self.extensions = {"code": "COLLECTION_NOT_FOUND"}
message = f"Failed to find collection: {collection_name}"
super().__init__(message, extensions=self.extensions)


class FieldNotFoundError(GraphQLError):
"""
Custom error to be raised if a field cannot be found by id
Expand Down Expand Up @@ -141,6 +153,15 @@ def __init__(self, organism_id):
super().__init__("organism", {"organism_id": organism_id})


class AssembliesFromGenomeNotFound(FieldNotFoundError):
"""
Custom error to be raised if we can't find the assemblies for a genome
"""

def __init__(self, assembly_id):
super().__init__("assemblies", {"assembly_id": assembly_id})


class AssembliesFromOrganismNotFound(FieldNotFoundError):
"""
Custom error to be raised if we can't find the assemblies for an organism
Expand Down Expand Up @@ -211,3 +232,17 @@ def __init__(self, message: str):
message: The error message describing the missing argument.
"""
super().__init__(message)


class FailedToConnectToGrpc(grpc.RpcError):
"""
Exception raised when there is gRPC connection issue.
"""

def __init__(self, message: str):
"""Initializes a FailedToConnectToGrpc instance.
Args:
message: The error message describing the issue.
"""
super().__init__(message)
Loading

0 comments on commit 31697f3

Please sign in to comment.