diff --git a/docs/conf.py b/docs/conf.py index 89ee37a0..1cafb347 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -5,8 +5,9 @@ from myst_parser import __version__ from sphinx.application import Sphinx -# Add parent directory to sys.path for autodoc +# Add current and parent directory to sys.path for autodoc sys.path.insert(0, str(Path(__file__).parent.parent)) +sys.path.insert(0, str(Path(__file__).parent)) # Project information project = "medmodels" @@ -23,6 +24,7 @@ "sphinx.ext.autosummary", "sphinx_autodoc_typehints", "sphinx_design", + "sphinx_extensions", "sphinx_copybutton", "sphinx_pyscript", "sphinx_tippy", diff --git a/docs/developer_guide/docstrings.md b/docs/developer_guide/docstrings.md index 5668959c..59d31a85 100644 --- a/docs/developer_guide/docstrings.md +++ b/docs/developer_guide/docstrings.md @@ -274,7 +274,7 @@ The first code block needs to start as shown below. It sets the code highlightin ``` -2. Result block +1. Result block The second block shows the return value when executing the code. The output value(s) should be entered after `>>>`. @@ -286,8 +286,6 @@ The second block shows the return value when executing the code. The output valu ``` - - Full Example: ```python @@ -337,7 +335,7 @@ Will be shown in the API Docs as: ``` ```` -Check out the docs of [find_node_in_time_window()](medmodels.treatment_effect.temporal_analysis.find_node_in_time_window) for a real example. +Check out the docs of [find_reference_edge()](medmodels.treatment_effect.temporal_analysis.find_reference_edge) for a real example. ### Raises diff --git a/docs/sphinx_extensions.py b/docs/sphinx_extensions.py new file mode 100644 index 00000000..6d24e163 --- /dev/null +++ b/docs/sphinx_extensions.py @@ -0,0 +1,167 @@ +"""Sphinx extension to execute included code snippets and display their output. + +This module defines a custom Sphinx directive `ExecLiteralInclude` that allows +including code from external files, executing it, and displaying both the code +and its output in the documentation. + +Example: + ```{exec-literalinclude} path/to/your_script.py + --- + language: python + setup-lines: 1-4 + lines: 6-10 + --- + ``` +""" + +import contextlib +import io +from typing import List + +from docutils import nodes +from sphinx.application import Sphinx +from sphinx.util import parselinenos +from sphinx.util.docutils import SphinxDirective + + +class ExecLiteralInclude(SphinxDirective): + """Directive to include, execute, and display code from external files.""" + + required_arguments = 1 # The file path is the only required argument + optional_arguments = 0 + option_spec = { + "lines": lambda x: x, + "setup-lines": lambda x: x, + "language": lambda x: x, + } + has_content = False + + def run(self) -> List[nodes.Node]: + """Process the directive and return nodes to be inserted into the document. + + Returns: + List[nodes.Node]: A list of docutils nodes representing the code block + and its output. + + Raises: + FileNotFoundError: If the specified file does not exist. + Exception: If an error occurs while executing the code. + """ + environment = self.state.document.settings.env + _, filename = environment.relfn2path(self.arguments[0]) + + try: + with open(filename, "r") as file: + code_lines = file.readlines() + except FileNotFoundError: + error = self.state_machine.reporter.error( + f"File not found: {filename}", line=self.lineno + ) + + return [error] + + total_lines = len(code_lines) + + # Extract setup code + setup_code = "" + if "setup-lines" in self.options: + setup_line_numbers = parselinenos(self.options["setup-lines"], total_lines) + setup_code = "".join([code_lines[i] for i in setup_line_numbers]) + + # Extract main code + main_code = "" + if "lines" in self.options: + main_line_numbers = parselinenos(self.options["lines"], total_lines) + main_code = "".join([code_lines[i] for i in main_line_numbers]) + else: + main_code = "".join(code_lines) + + # Create a literal block node for the main code + code_node = nodes.literal_block(main_code, main_code) + code_node["language"] = self.options.get("language", "python") + + # Prepare code for execution + main_code_lines = main_code.rstrip().split("\n") + + # Remove trailing empty lines + while main_code_lines and not main_code_lines[-1].strip(): + main_code_lines.pop() + + if main_code_lines: + last_line = main_code_lines.pop() + code_before_last_line = "\n".join(main_code_lines) + else: + last_line = "" + code_before_last_line = "" + + # Execute code and capture output + output_io = io.StringIO() + exec_globals = {} + + try: + with ( + contextlib.redirect_stdout(output_io), + contextlib.redirect_stderr(output_io), + ): + if setup_code: + exec(setup_code, exec_globals) + + if code_before_last_line: + exec(code_before_last_line, exec_globals) + + if last_line: + if self._is_expression(last_line): + result = eval(last_line, exec_globals) + if result is not None: + print(repr(result)) + else: + exec(last_line, exec_globals) + + except Exception as e: + error_msg = f"Error executing code: {e}" + error_node = nodes.error("", nodes.paragraph(text=error_msg)) + return [code_node, error_node] + + output_text = output_io.getvalue() + + # Create a literal block for the output + output_node = nodes.literal_block(output_text, output_text) + output_node["language"] = "none" + + return [code_node, output_node] + + def _is_expression(self, code_line: str) -> bool: + """Determine if a line of code is an expression. + + Args: + code_line (str): The line of code to check. + + Returns: + bool: True if the line is an expression, False otherwise. + """ + try: + compile(code_line, "", "eval") + return True + except SyntaxError: + return False + + def _parse_line_range(self, line_range_str: str, total_lines: int) -> List[int]: + """Parse a line range string into a list of line indices. + + Args: + line_range_str (str): The line range string (e.g., "1-3,5"). + total_lines (int): The total number of lines in the source file. + + Returns: + List[int]: A list of line indices (0-based). + """ + return parselinenos(line_range_str, total_lines) + + +def setup(app: Sphinx) -> None: + """Set up the Sphinx extension. + + Args: + app (Sphinx): The Sphinx application instance. + """ + app.add_directive("exec-literalinclude", ExecLiteralInclude) diff --git a/docs/user_guide/02b_query_engine.md b/docs/user_guide/02b_query_engine.md index 4c84b5fd..d8916a54 100644 --- a/docs/user_guide/02b_query_engine.md +++ b/docs/user_guide/02b_query_engine.md @@ -1 +1,406 @@ # Query Engine + +## What is the Query Engine? + +The **MedRecord Query Engine** enables users to find node and edges' indices stored in the graph structure efficiently. Thanks to an intuitive interface, complex queries can be performed, allowing you to filter nodes and edges by their properties and relationships. This section introduces the basic concepts of querying MedRecords and explores advanced techniques for working with complex datasets. + +## Example dataset + +An example dataset for the following demonstrations was generated with the method [`from_example_dataset`](medmodels.medrecord.medrecord.MedRecord.from_example_dataset){target="_blank"} from the [`MedRecord`](medmodels.medrecord.medrecord.MedRecord){target="_blank"} class. + +```{literalinclude} scripts/02b_show_dataset.py +--- +language: python +lines: 7 +--- +``` + +This example dataset includes a set of patients, drugs, diagnoses and procedures. For this section, we will only use the patients, drugs and the edges that connect these two groups. + +```{exec-literalinclude} scripts/02b_show_dataset.py +--- +language: python +setup-lines: 1-36 +lines: 38 +--- +``` + +```{exec-literalinclude} scripts/02b_show_dataset.py +--- +language: python +setup-lines: 1-36 +lines: 39 +--- +``` + +```{exec-literalinclude} scripts/02b_show_dataset.py +--- +language: python +setup-lines: 1-36 +lines: 40 +--- +``` + +## Node Queries + +The [`NodeOperand`](medmodels.medrecord.querying.NodeOperand){target="_blank"} querying class allow you to define specific criteria for selecting nodes within a [`MedRecord`](medmodels.medrecord.medrecord.MedRecord){target="_blank"}. These operands enable flexible and complex queries by combining multiple conditions, such as group membership, attributes' selection and querying, attribute values, and relationships to other nodes or edges. This section introduces the basic usage of node operands to create a powerful foundation for your data queries. + +```{exec-literalinclude} scripts/02b_query_engine.py +--- +language: python +setup-lines: 1-4 +lines: 7-12 +--- +``` + +:::{dropdown} Methods used in the snippet + +- [`in_group()`](medmodels.medrecord.querying.NodeOperand.in_group){target="_blank"} : Query nodes that belong to that group. +- [`select_nodes()`](medmodels.medrecord.medrecord.MedRecord.select_nodes){target="_blank"} : Select nodes that match that query. + +::: + +You can get to the same result via different approaches. That makes the query engine very versatile and adaptive to your specific needs. Let's complicate it a bit more involving more than one operand. + +```{exec-literalinclude} scripts/02b_query_engine.py +--- +language: python +setup-lines: 1-4 +lines: 15-24 +--- +``` + +:::{dropdown} Methods used in the snippet + +- [`in_group()`](medmodels.medrecord.querying.NodeOperand.in_group){target="_blank"} : Query nodes that belong to that group. +- [`index()`](medmodels.medrecord.querying.NodeOperand.index){target="_blank"}: Returns a [`NodeIndexOperand`](medmodels.medrecord.querying.NodeIndexOperand){target="_blank"}` to query on the indices. +- [`contains()`](medmodels.medrecord.querying.NodeIndexOperand.contains){target="_blank"} : Query node indices containing that argument. +- [`has_attribute()`](medmodels.medrecord.querying.NodeOperand.has_attribute){target="_blank"} : Query nodes that have that attribute. +- [`attribute()`](medmodels.medrecord.querying.NodeOperand.attribute){target="_blank"} : Returns a [`MultipleValuesOperand`](medmodels.medrecord.querying.MultipleValuesOperand){target="_blank"} to query on the values of the nodes for that attribute. +- [`greater_than()`](medmodels.medrecord.querying.MultipleValuesOperand.greater_than){target="_blank"}` : Query values that are greater than that value. +- [`select_nodes()`](medmodels.medrecord.medrecord.MedRecord.select_nodes){target="_blank"} : Select nodes that match that query. + +::: + +:::{note} +The [`has_attribute()`](medmodels.medrecord.querying.NodeOperand.has_attribute){target="_blank"} method is not needed in this example, since the [`attribute()`](medmodels.medrecord.querying.NodeOperand.attribute){target="_blank"} one already checks whether the nodes have the attribute. It is placed there merely for educational purposes. This will happen in different examples in this user guide to ensure the maximum amount of methods are portrayed. +::: + +### Reusing Node Queries + +As you can see, the query engine can prove to be highly useful for finding nodes that fulfill different criteria, these criteria being as specific and narrowing as we like. A key feature of the query engine is that it allows for re-using previous queries in new ones. For instance, the previous query can be written as follows: + +```{exec-literalinclude} scripts/02b_query_engine.py +--- +language: python +setup-lines: 1-12 +lines: 27-36 +--- +``` + +:::{dropdown} Methods used in the snippet + +- [`index()`](medmodels.medrecord.querying.NodeOperand.index){target="_blank"}: Returns a [`NodeIndexOperand`](medmodels.medrecord.querying.NodeIndexOperand){target="_blank"}` to query on the indices. +- [`contains()`](medmodels.medrecord.querying.NodeIndexOperand.contains){target="_blank"} : Query node indices containing that argument. +- [`has_attribute()`](medmodels.medrecord.querying.NodeOperand.has_attribute){target="_blank"} : Query nodes that have that attribute. +- [`attribute()`](medmodels.medrecord.querying.NodeOperand.attribute){target="_blank"} : Returns a [`MultipleValuesOperand`](medmodels.medrecord.querying.MultipleValuesOperand){target="_blank"} to query on the values of the nodes for that attribute. +- [`greater_than()`](medmodels.medrecord.querying.MultipleValuesOperand.greater_than){target="_blank"}` : Query values that are greater than that value. +- [`select_nodes()`](medmodels.medrecord.medrecord.MedRecord.select_nodes){target="_blank"} : Select nodes that match that query. + +::: + +### Advanced Query Operations + +In case, for instance, that you do not know whether there are different ways to assign the `gender` attribute across the [`MedRecord`](medmodels.medrecord.medrecord.MedRecord){target="_blank"} (with leading/trailing whitespaces or formatted in lower/uppercase), you can modify the value of the attributes of a node/edge inside the query. + +:::{note} + +It is important to note that modifying these values **does not** change the actual value of the attributes within the [`MedRecord`](medmodels.medrecord.medrecord.MedRecord){target="_blank"}: it just changes the value of those variables in the query. + +::: + +You can also perform mathematical calculations like [`mean()`](medmodels.medrecord.querying.MultipleValuesOperand.mean){target="_blank"}, [`median()`](medmodels.medrecord.querying.MultipleValuesOperand.median){target="_blank"} or [`min()`](medmodels.medrecord.querying.MultipleValuesOperand.min){target="_blank"} and assign them to a variable. Also, you can keep manipulating the operand, like in the following example, where we are subtracting _5_ years from the `mean_age` to query on that value. + +```{exec-literalinclude} scripts/02b_query_engine.py +--- +language: python +setup-lines: 1-4 +lines: 39-55 +--- +``` + +:::{dropdown} Methods used in the snippet + +- [`in_group()`](medmodels.medrecord.querying.NodeOperand.in_group){target="_blank"} : Query nodes that belong to that group. +- [`index()`](medmodels.medrecord.querying.NodeOperand.index){target="_blank"}: Returns a [`NodeIndexOperand`](medmodels.medrecord.querying.NodeIndexOperand){target="_blank"}` to query on the indices. +- [`contains()`](medmodels.medrecord.querying.NodeIndexOperand.contains){target="_blank"} : Query node indices containing that argument. +- [`attribute()`](medmodels.medrecord.querying.NodeOperand.attribute){target="_blank"} : Returns a [`MultipleValuesOperand`](medmodels.medrecord.querying.MultipleValuesOperand){target="_blank"} to query on the values of the nodes for that attribute. +- [`lowercase()`](medmodels.medrecord.querying.MultipleValuesOperand.lowercase){target="_blank"} : Converts the values that are strings to lowercase. +- [`trim()`](medmodels.medrecord.querying.MultipleValuesOperand.trim){target="_blank"} : Removes leading and trailing whitespacing from the values. +- [`equal_to()`](medmodels.medrecord.querying.MultipleValuesOperand.equal_to){target="_blank"} : Query values equal to that value. + +- [`has_attribute()`](medmodels.medrecord.querying.NodeOperand.has_attribute){target="_blank"} : Query nodes that have that attribute. +- [`mean()`](medmodels.medrecord.querying.MultipleValuesOperand.mean){target="_blank"}: Returns a [`SingleValueOperand`](medmodels.medrecord.querying.SingleValueOperand){target="_blank"} containing the mean of those values. +- [`subtract()`](medmodels.medrecord.querying.SingleValueOperand.subtract){target="_blank"} : Subtract the argument from the single value operand. + +- [`less_than()`](medmodels.medrecord.querying.MultipleValuesOperand.less_than){target="_blank"} : Query values that are less than that value. +- [`select_nodes()`](medmodels.medrecord.medrecord.MedRecord.select_nodes){target="_blank"} : Select nodes that match that query. + +::: + +:::{note} +Query methods used for changing the operands cannot be concatenated or assigned to variables, since their Return is None. That is, the following code snippet will set `gender_lowercase` as None, and as a result, an AttributeError will be thrown: + +```python +# Wrong implementation +gender_lowercase = node.attribute("gender").lowercase() +gender_lowercase.equal_to("m") + +AttributeError("'NoneType' object has no attribute 'equal_to'") + +# Wrong implementation +gender = node.attribute("gender") +gender.lowercase().trim() +gender.equal_to("m") + +AttributeError("'NoneType' object has no attribute 'trim'") + +# Correct implementation +gender = node.attribute("gender") +gender.lowercase() +gender.trim() +gender.equal_to("m") +``` + +Nor do the ones that compare operands to other operands, since their Return value is also None. + +```python +# Wrong implementation +gender = node.attribute("gender") +gender.equal_to("M").not_equal_to("F") + +AttributeError("'NoneType' object has no attribute 'not_equal_to'") + +# Correct implementation +gender = node.attribute("gender") +gender.equal_to("M") +gender.not_equal_to("F") +``` + +::: + +Another very useful method is [`neighbors()`](medmodels.medrecord.querying.NodeOperand.neighbors){target="_blank"}, which can be used to query through the nodes that are neighbors to those nodes (they have edges connecting them). + +In this following example we are selecting the nodes that fulfill the following criteria: + +- Are in group `patient`. +- Their node index contains the string _"pat"_ +- Their attribute `age` is greater than 30, and their attribute `gender` is equal to _"M"_. +- They are connected to nodes which attribute `description` contains the word _"fentanyl"_ in either upper or lowercase. + +```{exec-literalinclude} scripts/02b_query_engine.py +--- +language: python +setup-lines: 1-4, 15-24 +lines: 58-67 +--- +``` + +:::{dropdown} Methods used in the snippet + +- [`neighbors()`](medmodels.medrecord.querying.NodeOperand.neighbors){target="_blank"} : Returns a [`NodeOperand()`](medmodels.medrecord.querying.NodeOperand){target="_blank"} to query the neighbors of those nodes. +- [`attribute()`](medmodels.medrecord.querying.NodeOperand.attribute){target="_blank"} : Returns a [`MultipleValuesOperand()`](medmodels.medrecord.querying.MultipleValuesOperand){target="_blank"} to query on the values of the nodes for that attribute. +- [`lowercase()`](medmodels.medrecord.querying.MultipleValuesOperand.lowercase){target="_blank"} : Converts the values that are strings to lowercase. +- [`contains()`](medmodels.medrecord.querying.NodeIndexOperand.contains){target="_blank"} : Query node indices containing that argument. +- [`select_nodes()`](medmodels.medrecord.medrecord.MedRecord.select_nodes){target="_blank"} : Select nodes that match that query. + +::: + +## Edge Queries + +The querying class [`EdgeOperand`](medmodels.medrecord.querying.EdgeOperand){target="_blank"} provides a way to query through the edgs contained in a [`MedRecord`](medmodels.medrecord.medrecord.MedRecord){target="_blank"}. Edge operands show the same functionalities as Node operands, creating a very powerful tandem to query throughout your data. In this section, we will portray different ways the edge operands can be employed. + +```{exec-literalinclude} scripts/02b_query_engine.py +--- +language: python +setup-lines: 1-4 +lines: 70-76 +--- +``` + +:::{dropdown} Methods used in the snippet + +- [`in_group()`](medmodels.medrecord.querying.EdgeOperand.in_group){target="_blank"} : Query nodes that belong to that group. +- [`select_edges()`](medmodels.medrecord.medrecord.MedRecord.select_edges){target="_blank"} : Select edges that match that query. + +::: + +The edge operand follows the same principles as the node operand, with some extra queries applicable only to edges like [`source_node()`](medmodels.medrecord.querying.EdgeOperand.source_node){target="_blank"} or [`target_node()`](medmodels.medrecord.querying.EdgeOperand.target_node){target="_blank"} (instead of [`neighbors()`](medmodels.medrecord.querying.NodeOperand.neighbors){target="_blank"}). + +```{exec-literalinclude} scripts/02b_query_engine.py +--- +language: python +setup-lines: 1-4 +lines: 79-88 +--- +``` + +:::{dropdown} Methods used in the snippet + +- [`in_group()`](medmodels.medrecord.querying.EdgeOperand.in_group){target="_blank"} : Query nodes that belong to that group. +- [`attribute()`](medmodels.medrecord.querying.EdgeOperand.attribute){target="_blank"} : Returns a [`MultipleValuesOperand()`](medmodels.medrecord.querying.MultipleValuesOperand){target="_blank"} to query on the values of the nodes for that attribute. +- [`less_than()`](medmodels.medrecord.querying.MultipleValuesOperand.less_than){target="_blank"} : Query values that are less than that value. +- [`source_node()`](medmodels.medrecord.querying.EdgeOperand.source_node){target="_blank"} : Returns a [`NodeOperand()`](medmodels.medrecord.querying.NodeOperand) to query on the source nodes for those edges. +- [`is_max()`](medmodels.medrecord.querying.MultipleValuesOperand.is_max){target="_blank"} : Query on the values that hold on the maximum value among all of the. +- [`target_node()`](medmodels.medrecord.querying.EdgeOperand.target_node){target="_blank"} : Returns a [`NodeOperand()`](medmodels.medrecord.querying.NodeOperand){target="_blank"} to query on the target nodes for those edges. +- [`contains()`](medmodels.medrecord.querying.NodeIndexOperand.contains){target="_blank"} : Query node indices containing that argument. +- [`select_edges()`](medmodels.medrecord.medrecord.MedRecord.select_edges){target="_blank"} : Select edges that match that query. + +::: + +## Combining Node & Edge Queries + +The full power of the query engine appears once you combine both operands inside the queries. In the following query, we are able to query for nodes that: + +- Are in group `patient` +- Their attribute `age` is greater than 30, and their attribute `gender` is equal to _"M"_. +- They have at least an edge that is in in the `patient_drug` group, which attribute `cost` is less than 200 and its attribute `quantity` is equal to 1. + +```{exec-literalinclude} scripts/02b_query_engine.py +--- +language: python +setup-lines: 1-4 +lines: 91-107 +--- +``` + +:::{dropdown} Methods used in the snippet + +- [`in_group()`](medmodels.medrecord.querying.EdgeOperand.in_group){target="_blank"} : Query nodes that belong to that group. +- [`attribute()`](medmodels.medrecord.querying.EdgeOperand.attribute){target="_blank"} : Returns a [`MultipleValuesOperand()`](medmodels.medrecord.querying.MultipleValuesOperand){target="_blank"} to query on the values of the nodes for that attribute. +- [`less_than()`](medmodels.medrecord.querying.MultipleValuesOperand.less_than){target="_blank"} : Query values that are less than that value. +- [`equal_to()`](medmodels.medrecord.querying.MultipleValuesOperand.equal_to){target="_blank"} : Query values that are equal to that value. +- [`is_int()`](medmodels.medrecord.querying.MultipleValuesOperand.is_int){target="_blank"} : Query on the values which format is `int`. +- [`greater_than()`](medmodels.medrecord.querying.MultipleValuesOperand.greater_than){target="_blank"} : Query values that are greater than that value. +- [`edges()`](medmodels.medrecord.querying.NodeOperand.edges){target="_blank"} : Returns a [`EdgeOperand()`](medmodels.medrecord.querying.EdgeOperand){target="_blank"} to query on the edges of those nodes. +- [`select_nodes()`](medmodels.medrecord.medrecord.MedRecord.select_nodes){target="_blank"} : Select nodes that match that query. + +::: + +## OR & NOT operations + +The inherent structure of the query engine works with logical **AND** operations. However, a complete query engine should also include **OR** and **NOT** operations to be able to address all scenarios. For that the methods [`exclude()`](medmodels.medrecord.querying.NodeOperand.exclude){target="_blank"} and [`either_or()`](medmodels.medrecord.querying.NodeOperand.either_or){target="_blank"}. + +```{exec-literalinclude} scripts/02b_query_engine.py +--- +language: python +setup-lines: 1-4 +lines: 110-130 +--- +``` + +:::{dropdown} Methods used in the snippet + +- [`in_group()`](medmodels.medrecord.querying.EdgeOperand.in_group){target="_blank"} : Query nodes that belong to that group. +- [`attribute()`](medmodels.medrecord.querying.EdgeOperand.attribute){target="_blank"} : Returns a [`MultipleValuesOperand()`](medmodels.medrecord.querying.MultipleValuesOperand){target="_blank"} to query on the values of the nodes for that attribute. +- [`less_than()`](medmodels.medrecord.querying.MultipleValuesOperand.less_than){target="_blank"} : Query values that are less than that value. +- [`equal_to()`](medmodels.medrecord.querying.MultipleValuesOperand.equal_to){target="_blank"} : Query values that are equal to that value. +- [`greater_than()`](medmodels.medrecord.querying.MultipleValuesOperand.greater_than){target="_blank"} : Query values that are greater than that value. +- [`edges()`](medmodels.medrecord.querying.NodeOperand.edges){target="_blank"} : Returns a [`EdgeOperand()`](medmodels.medrecord.querying.EdgeOperand){target="_blank"} to query on the edges of those nodes. +- [`either_or()`](medmodels.medrecord.querying.NodeOperand.either_or){target="_blank"} : Queries edges that match either one or the other given queries. +- [`select_nodes()`](medmodels.medrecord.medrecord.MedRecord.select_nodes){target="_blank"} : Select nodes that match that query. + +::: + +This includes also _"pat_3"_, that was not included in the previous section because none of its edges was included in the `query_edge_either()`, but it can be found in the `query_edge_or()` now. + +```{exec-literalinclude} scripts/02b_query_engine.py +--- +language: python +setup-lines: 1-4, 109-129 +lines: 133-139 +--- +``` + +:::{dropdown} Methods used in the snippet + +- [`in_group()`](medmodels.medrecord.querying.EdgeOperand.in_group){target="_blank"} : Query nodes that belong to that group.: Query edges that belong to that group. +- [`exclude()`](medmodels.medrecord.querying.NodeOperand.exclude){target="_blank"} : Exclude the nodes that belong to the given query. +- [`select_nodes()`](medmodels.medrecord.medrecord.MedRecord.select_nodes){target="_blank"} : Select nodes that match that query. + +::: + +So this gives us all the patient nodes that were not selected with the previous query (logical **NOT** applied). + +## Clones + +Since the statements in the query engine are additive, every operation modifies the state of the query. That means that it is not possible to revert to a previous state unless the entire query is rewritten from scratch for that intermediate step. This can become inefficient and redundant, particularly when multiple branches of a query or comparisons with intermediate results are required. + +To address this limitation, the [`clone()`](medmodels.medrecord.querying.SingleValueOperand.clone){target="_blank"} method was introduced. This method allows users to create independent copies - or **clones** - of operands or computed values at any point in the query chain. Clones are completely decoupled from the original object, meaning that modifications of the clone do not affect the original, and vice versa. This functionality applies to all types of operands. + +```{exec-literalinclude} scripts/02b_query_engine.py +--- +language: python +setup-lines: 1-4 +lines: 142-157 +--- +``` + +:::{dropdown} Methods used in the snippet + +- [`in_group()`](medmodels.medrecord.querying.EdgeOperand.in_group){target="_blank"} : Query nodes that belong to that group.: Query edges that belong to that group. +- [`index()`](medmodels.medrecord.querying.NodeOperand.index){target="_blank"}: Returns a [`NodeIndexOperand`](medmodels.medrecord.querying.NodeIndexOperand){target="_blank"}` to query on the indices. +- [`contains()`](medmodels.medrecord.querying.NodeIndexOperand.contains){target="_blank"} : Query node indices containing that argument. +- [`contains()`](medmodels.medrecord.querying.EdgeOperand.attribute){target="_blank"} : Returns a [`MultipleValuesOperand()`](medmodels.medrecord.querying.MultipleValuesOperand){target="_blank"} to query on the values of the edges for that attribute. +- [`mean()`](medmodels.medrecord.querying.MultipleValuesOperand.mean){target="_blank"}: Returns a [`SingleValueOperand`](medmodels.medrecord.querying.SingleValueOperand){target="_blank"} containing the mean of those values. +- [`clone()`](medmodels.medrecord.querying.SingleValueOperand.clone){target="_blank"} : Returns a clone of the operand. +- [`subtract()`](medmodels.medrecord.querying.SingleValueOperand.subtract){target="_blank"} : Subtract the argument from the single value operand. +- [`greater_than()`](medmodels.medrecord.querying.MultipleValuesOperand.greater_than){target="_blank"} : Query values that are greater than that value. +- [`less_than()`](medmodels.medrecord.querying.MultipleValuesOperand.less_than){target="_blank"} : Query values that are less than that value. +- [`select_nodes()`](medmodels.medrecord.medrecord.MedRecord.select_nodes){target="_blank"} : Select nodes that match that query. + +::: + +## Queries as Function Arguments + +In all previous snippets, we have used queries with the method [`select_nodes()`](medmodels.medrecord.medrecord.MedRecord.select_nodes){target="_blank"} for representation purposes of its capacities. However, queries can also be used as function arguments to other methods or indexers from the [`MedRecord`](medmodels.medrecord.medrecord.MedRecord){target="_blank"} that take edge/node indices or the queries that result on those indices as arguments. Here are some examples of those functions: + +- Using the [`node[]`](medmodels.medrecord.medrecord.MedRecord.node){target="_blank"}, an indexer that retrieves the attributes for the given node indices. + +```{exec-literalinclude} scripts/02b_query_engine.py +--- +language: python +setup-lines: 1-153 +lines: 160 +--- +``` + +- Using [`groups_of_node()`](medmodels.medrecord.medrecord.MedRecord.groups_of_node){target="_blank"}, a method that retrieves the groups to which a specific node index belongs to. + +```{exec-literalinclude} scripts/02b_query_engine.py +--- +language: python +setup-lines: 1-153 +lines: 161 +--- +``` + +- Using [`edge_endpoints()`](medmodels.medrecord.medrecord.MedRecord.edge_endpoints){target="_blank"}, a method that retrieves the source and target nodes of the specified edge(s) in the MedRecord. + +```{exec-literalinclude} scripts/02b_query_engine.py +--- +language: python +setup-lines: 1-154 +lines: 162 +--- +``` + +## Full example Code + +The full code examples for this chapter can be found here: + +```{literalinclude} scripts/02b_query_engine.py +--- +language: python +--- +``` diff --git a/docs/user_guide/getstarted.md b/docs/user_guide/getstarted.md index bdad0686..dad1e612 100644 --- a/docs/user_guide/getstarted.md +++ b/docs/user_guide/getstarted.md @@ -387,11 +387,11 @@ Grouping can also be used to make sub populations that share the same properties ```python young_age = 20 # query and get node indices -young_id = medrecord.select_nodes(node().attribute("age") < young_age) +young_id = medrecord.select_nodes(node().attribute("age").less_than(young_age)) medrecord.add_group(group="Young", node=young_id) # node operation -medrecord.add_group(group="Woman", node=node().attribute("gender").equal("F")) +medrecord.add_group(group="Woman", node=node().attribute("gender").equal_to("F")) ``` The nodes of a group or a list of groups can be easily accessed with `group()`. The return is either a list of node indices for a single group or a dictionary with each group name, @@ -538,7 +538,7 @@ print(medrecord.edge[patient_drug_edges[0]]) Edges can also be selected using the query engine. The logic operators and functions are similar to the ones used for `select_nodes()`. ```python -medrecord.select_edges(edge().attribute("cost").greater(500)) +medrecord.select_edges(edge().attribute("cost").greater_than(500)) ``` [114, 117, 124] diff --git a/docs/user_guide/scripts/02_medrecord_intro.py b/docs/user_guide/scripts/02_medrecord_intro.py index c1647905..437a6742 100644 --- a/docs/user_guide/scripts/02_medrecord_intro.py +++ b/docs/user_guide/scripts/02_medrecord_intro.py @@ -35,9 +35,9 @@ record.add_group("US-Patients", ["Patient 01", "Patient 02"]) -record.print_attribute_table_nodes() +record.overview_nodes() -record.print_attribute_table_edges() +record.overview_edges() # Getting all available nodes record.nodes diff --git a/docs/user_guide/scripts/02b_query_engine.py b/docs/user_guide/scripts/02b_query_engine.py new file mode 100644 index 00000000..66beb173 --- /dev/null +++ b/docs/user_guide/scripts/02b_query_engine.py @@ -0,0 +1,162 @@ +from medmodels import MedRecord +from medmodels.medrecord.querying import EdgeOperand, NodeOperand + +medrecord = MedRecord().from_example_dataset() + + +# Basic node query +def query_node_in_patient(node: NodeOperand): + node.in_group("patient") + + +medrecord.select_nodes(query_node_in_patient) + + +# Intermediate node query +def query_node_patient_older_than_30(node: NodeOperand): + node.in_group("patient") + node.index().contains("pat") + + node.has_attribute("age") + node.attribute("age").greater_than(30) + + +medrecord.select_nodes(query_node_patient_older_than_30) + + +# Reusing node query +def query_node_reused(node: NodeOperand): + query_node_in_patient(node) + node.index().contains("pat") + + node.has_attribute("age") + node.attribute("age").greater_than(30) + + +medrecord.select_nodes(query_node_reused) + + +# Advanced node query +def query_node_male_patient_under_mean(node: NodeOperand): + node.in_group("patient") + node.index().contains("pat") + + gender = node.attribute("gender") + gender.lowercase() # Converts the string to lowercase + gender.trim() # Removes leading and trailing whitespaces + gender.equal_to("m") + + node.has_attribute("age") + mean_age = node.attribute("age").mean() + mean_age.subtract(5) # Subtract 5 from the mean age + node.attribute("age").less_than(mean_age) + + +medrecord.select_nodes(query_node_male_patient_under_mean) + + +# Node query with neighbors function +def query_node_neighbors(node: NodeOperand): + query_node_patient_older_than_30(node) + + description_neighbors = node.neighbors().attribute("description") + description_neighbors.lowercase() + description_neighbors.contains("fentanyl") + + +medrecord.select_nodes(query_node_neighbors) + + +# Basic edge query +def query_edge_patient_drug(edge: EdgeOperand): + edge.in_group("patient_drug") + + +edges = medrecord.select_edges(query_edge_patient_drug) +edges[0:5] + + +# Advanced edge query +def query_edge_old_patient_cheap_insulin(edge: EdgeOperand): + edge.in_group("patient_drug") + edge.attribute("cost").less_than(200) + + edge.source_node().attribute("age").is_max() + edge.target_node().attribute("description").contains("insulin") + + +medrecord.select_edges(query_edge_old_patient_cheap_insulin) + + +# Combined node and edge query +def query_edge_combined(edge: EdgeOperand): + edge.in_group("patient_drug") + edge.attribute("cost").less_than(200) + edge.attribute("quantity").equal_to(1) + + +def query_node_combined(node: NodeOperand): + node.in_group("patient") + node.attribute("age").is_int() + node.attribute("age").greater_than(30) + node.attribute("gender").equal_to("M") + + query_edge_combined(node.edges()) + + +medrecord.select_nodes(query_node_combined) + + +# Either/or query +def query_edge_either(edge: EdgeOperand): + edge.in_group("patient_drug") + edge.attribute("cost").less_than(200) + edge.attribute("quantity").equal_to(1) + + +def query_edge_or(edge: EdgeOperand): + edge.in_group("patient_drug") + edge.attribute("cost").less_than(200) + edge.attribute("quantity").equal_to(12) + + +def query_node_either_or(node: NodeOperand): + node.in_group("patient") + node.attribute("age").greater_than(30) + + node.edges().either_or(query_edge_either, query_edge_or) + + +medrecord.select_nodes(query_node_either_or) + + +# Exclude query +def query_node_exclude(node: NodeOperand): + node.in_group("patient") + node.exclude(query_node_either_or) + + +medrecord.select_nodes(query_node_exclude) + + +# Clone query +def query_node_clone(node: NodeOperand): + node.in_group("patient") + node.index().contains("pat") + + mean_age_original = node.attribute("age").mean() + mean_age_clone = mean_age_original.clone() # Clone the mean age + + # Subtract 5 fom the cloned mean age (original remains unchanged) + mean_age_clone.subtract(5) + + node.attribute("age").less_than(mean_age_original) # Mean age + node.attribute("age").greater_than(mean_age_clone) # Mean age minus 5 + + +medrecord.select_nodes(query_node_clone) + +# Node queries as function arguments +medrecord.node[query_node_either_or] +medrecord.groups_of_node(query_node_patient_older_than_30) +medrecord.edge_endpoints(query_edge_old_patient_cheap_insulin) diff --git a/docs/user_guide/scripts/02b_show_dataset.py b/docs/user_guide/scripts/02b_show_dataset.py new file mode 100644 index 00000000..ec3b9988 --- /dev/null +++ b/docs/user_guide/scripts/02b_show_dataset.py @@ -0,0 +1,40 @@ +from typing import Tuple + +import pandas as pd + +from medmodels import MedRecord + +medrecord = MedRecord().from_example_dataset() + + +# Showing example dataset +def retrieve_example_dataset( + medrecord: MedRecord, +) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: + patients = pd.DataFrame( + medrecord.node[medrecord.nodes_in_group("patient")] + ).T.sort_index() + drugs = pd.DataFrame( + medrecord.node[medrecord.nodes_in_group("drug")] + ).T.sort_index() + + patients_drugs_edges = medrecord.edge[medrecord.edges_in_group("patient_drug")] + for edge in patients_drugs_edges: + patients_drugs_edges[edge]["source"], patients_drugs_edges[edge]["target"] = ( + medrecord.edge_endpoints(edge) + ) + + patients_drugs = pd.DataFrame(patients_drugs_edges).T.sort_index() + patients_drugs = patients_drugs[ + ["source", "target"] + + [col for col in patients_drugs.columns if col not in ["source", "target"]] + ] + + return patients, drugs, patients_drugs + + +patients, drugs, patients_drugs_edges = retrieve_example_dataset(medrecord) + +patients.head(10) +drugs.head(10) +patients_drugs_edges.head(10)