From 0bfe2086bd67b17a0f5a23d0c386b27d0d8ecb31 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 2 May 2024 16:58:41 -0400 Subject: [PATCH 01/34] initial work on uploading to pephub with new backend --- pipestat/backends/pephub_backend/__init__.py | 0 .../backends/pephub_backend/pephubbackend.py | 135 ++++++++++++++++++ pipestat/pipestat.py | 21 +++ tests/test_pipestat.py | 38 +++++ 4 files changed, 194 insertions(+) create mode 100644 pipestat/backends/pephub_backend/__init__.py create mode 100644 pipestat/backends/pephub_backend/pephubbackend.py diff --git a/pipestat/backends/pephub_backend/__init__.py b/pipestat/backends/pephub_backend/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py new file mode 100644 index 00000000..e6e9fe0d --- /dev/null +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -0,0 +1,135 @@ +import datetime +from logging import getLogger + +import pephubclient +from pephubclient.constants import RegistryPath +from pephubclient.exceptions import ResponseError +from ubiquerg import parse_registry_path + +from ...backends.abstract import PipestatBackend +from ...const import PKG_NAME +from typing import List, Dict, Any, Optional, Union, NoReturn, Tuple + + +from pephubclient import PEPHubClient + + +_LOGGER = getLogger(PKG_NAME) + +class PEPHUBBACKEND(PipestatBackend): + def __init__( + self, + record_identifier: Optional[str] = None, + pephub_path: Optional[str] = None, + pipeline_name: Optional[str] = None, + pipeline_type: Optional[str] = None, + parsed_schema: Optional[str] = None, + status_schema: Optional[str] = None, + ): + """ + ADD DOCSTRINGS! + + """ + super().__init__(pipeline_type) + + self.phc = PEPHubClient() + self.pipeline_name = pipeline_name + self.parsed_schema = parsed_schema + + # Test Registry Path + _LOGGER.warning(f"Is pephub registry path? {pephubclient.is_registry_path(pephub_path)}") + + if pephubclient.is_registry_path(pephub_path): + # Deconstruct registry path so that phc can use it to create/update/delete samples + _LOGGER.warning("Initialize PEPHub Backend") + + self.pep_registry = RegistryPath(**parse_registry_path(pephub_path)) + _LOGGER.warning(f"Registry namespace: {self.pep_registry.namespace} item: {self.pep_registry.item} tag: {self.pep_registry.tag}") + + + else: + raise Exception + + + def report( + self, + values: Dict[str, Any], + record_identifier: Optional[str] = None, + force_overwrite: bool = True, + result_formatter: Optional[staticmethod] = None, + history_enabled: bool = True, + ) -> Union[List[str], bool]: + """ + Update the value of a result in a current namespace. + + This method overwrites any existing data and creates the required + hierarchical mapping structure if needed. + + :param Dict[str, Any] values: dict of results identifiers and values + to be reported + :param str record_identifier: unique identifier of the record + :param bool force_overwrite: Toggles force overwriting results, defaults to False + :param str result_formatter: function for formatting result + :return bool | list[str] results_formatted: return list of formatted string + """ + + # record_identifier = record_identifier or self.record_identifier + record_identifier = record_identifier + + result_formatter = result_formatter or self.result_formatter + results_formatted = [] + current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + result_identifiers = list(values.keys()) + + if self.parsed_schema is not None: + self.assert_results_defined( + results=result_identifiers, pipeline_type=self.pipeline_type + ) + + # existing = self.list_results( + # record_identifier=record_identifier, + # restrict_to=result_identifiers, + # ) + existing = False + + if existing: + existing_str = ", ".join(existing) + _LOGGER.warning(f"These results exist for '{record_identifier}': {existing_str}") + if not force_overwrite: + return False + _LOGGER.info(f"Overwriting existing results: {existing_str}") + + if not existing: + # self._config.phc.sample.update( + # namespace=self._config.config.phc.namespace, + # name=self._config.config.phc.name, + # tag=self._config.config.phc.tag, + # sample_name=identifier, + # sample_dict=metadata, + # ) + + try: + self.phc.sample.update( + namespace=self.pep_registry.namespace, + name="TEST_PIPESTAT", + tag=self.pep_registry.tag, + sample_name=record_identifier, + sample_dict=values, + + ) + except ResponseError: + _LOGGER.warning("Login to pephubclient is required. phc login") + + + # results_formatted.append( + # result_formatter( + # pipeline_name=self.pipeline_name, + # record_identifier=record_identifier, + # res_id=res_id, + # value=val, + # ) + # ) + + + return True \ No newline at end of file diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py index 8a689781..f2a914a0 100644 --- a/pipestat/pipestat.py +++ b/pipestat/pipestat.py @@ -76,6 +76,12 @@ # We let this pass, but if the user attempts to create DBBackend, check_dependencies raises exception. pass +try: + from pipestat.backends.pephub_backend.pephubbackend import PEPHUBBACKEND +except ImportError: + # Let this pass, if phc dependencies cannot be imported, raise exception + pass + _LOGGER = getLogger(PKG_NAME) @@ -138,6 +144,7 @@ def __init__( result_formatter: staticmethod = default_formatter, multi_pipelines: bool = False, output_dir: Optional[str] = None, + pephub_path: Optional[str] = None, ): """ Initialize the PipestatManager object @@ -238,6 +245,8 @@ def __init__( if self.cfg[FILE_KEY]: self.initialize_filebackend(record_identifier, results_file_path, flag_file_dir) + elif pephub_path: + self.initialize_pephubbackend(record_identifier, pephub_path) else: self.initialize_dbbackend(record_identifier, show_db_logs) @@ -367,6 +376,18 @@ def initialize_filebackend(self, record_identifier, results_file_path, flag_file return + def initialize_pephubbackend(self, record_identifier, pephub_path): + self.backend = PEPHUBBACKEND( + record_identifier, + pephub_path, + self.cfg[PIPELINE_NAME], + self.cfg[PIPELINE_TYPE], + self.cfg[SCHEMA_KEY], + # self.cfg[STATUS_SCHEMA_KEY], + ) + + + @check_dependencies( dependency_list=["DBBackend"], msg="Missing required dependencies for this usage, e.g. try pip install pipestat['dbbackend']", diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index dd211663..e93f1fa0 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -2513,3 +2513,41 @@ def test_select_history_complex_objects( assert len(history_result.keys()) == 1 assert "output_image" in history_result assert len(history_result["output_image"].keys()) == 2 + +#@pytest.mark.skipif(not DB_DEPENDENCIES, reason="Requires dependencies") +@pytest.mark.skipif(SERVICE_UNAVAILABLE, reason="requires service X to be available") +class TestPEPHUBBackend: + @pytest.mark.parametrize( + ["rec_id", "val"], + [ + ("test_pipestat_01", {"name_of_something": "test_name"}), + ], + ) + @pytest.mark.parametrize("backend", ["db"]) + def test_pephub_backend( + self, + rec_id, + val, + config_file_path, + schema_file_path, + results_file_path, + backend, + range_values, + ): + # with NamedTemporaryFile() as f, ContextManagerDBTesting(DB_URL): + # results_file_path = f.name + # args = dict(schema_path=output_schema_with_index, database_only=False) + # backend_data = ( + # {"config_file": config_file_path} + # if backend == "db" + # else {"results_file_path": results_file_path} + # ) + # args.update(backend_data) + # psm = SamplePipestatManager(**args) + pephuburl = "donaldcampbelljr/pipestat_demo:default" + + psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) + + psm.report(record_identifier=rec_id, values=val) + + print("done") From b428bf67d195f0fae1cd781fcc12762e8d6d4169 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 2 May 2024 17:07:46 -0400 Subject: [PATCH 02/34] fix name/tag so that reporting works --- .../backends/pephub_backend/pephubbackend.py | 32 +++++++++---------- pipestat/pipestat.py | 2 -- tests/test_pipestat.py | 3 +- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py index e6e9fe0d..ad082380 100644 --- a/pipestat/backends/pephub_backend/pephubbackend.py +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -16,6 +16,7 @@ _LOGGER = getLogger(PKG_NAME) + class PEPHUBBACKEND(PipestatBackend): def __init__( self, @@ -44,13 +45,13 @@ def __init__( _LOGGER.warning("Initialize PEPHub Backend") self.pep_registry = RegistryPath(**parse_registry_path(pephub_path)) - _LOGGER.warning(f"Registry namespace: {self.pep_registry.namespace} item: {self.pep_registry.item} tag: {self.pep_registry.tag}") - + _LOGGER.warning( + f"Registry namespace: {self.pep_registry.namespace} item: {self.pep_registry.item} tag: {self.pep_registry.tag}" + ) else: raise Exception - def report( self, values: Dict[str, Any], @@ -109,18 +110,16 @@ def report( # sample_dict=metadata, # ) - try: - self.phc.sample.update( - namespace=self.pep_registry.namespace, - name="TEST_PIPESTAT", - tag=self.pep_registry.tag, - sample_name=record_identifier, - sample_dict=values, - - ) - except ResponseError: - _LOGGER.warning("Login to pephubclient is required. phc login") - + # try: + self.phc.sample.create( + namespace=self.pep_registry.namespace, + name=self.pep_registry.item, + tag=self.pep_registry.tag, + sample_name=record_identifier, + sample_dict=values, + ) + # except ResponseError: + # _LOGGER.warning("Login to pephubclient is required. phc login") # results_formatted.append( # result_formatter( @@ -131,5 +130,4 @@ def report( # ) # ) - - return True \ No newline at end of file + return True diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py index f2a914a0..25ac6ac4 100644 --- a/pipestat/pipestat.py +++ b/pipestat/pipestat.py @@ -386,8 +386,6 @@ def initialize_pephubbackend(self, record_identifier, pephub_path): # self.cfg[STATUS_SCHEMA_KEY], ) - - @check_dependencies( dependency_list=["DBBackend"], msg="Missing required dependencies for this usage, e.g. try pip install pipestat['dbbackend']", diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index e93f1fa0..bdca2068 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -2514,7 +2514,8 @@ def test_select_history_complex_objects( assert "output_image" in history_result assert len(history_result["output_image"].keys()) == 2 -#@pytest.mark.skipif(not DB_DEPENDENCIES, reason="Requires dependencies") + +# @pytest.mark.skipif(not DB_DEPENDENCIES, reason="Requires dependencies") @pytest.mark.skipif(SERVICE_UNAVAILABLE, reason="requires service X to be available") class TestPEPHUBBackend: @pytest.mark.parametrize( From 02d573f65a185fd7c9768f7e099ee7ed0379b7d4 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 6 May 2024 11:47:18 -0400 Subject: [PATCH 03/34] begin work for select_records pephubbackend --- .../backends/pephub_backend/pephubbackend.py | 96 ++++++++++++++++++- tests/test_pipestat.py | 52 +++++++--- 2 files changed, 134 insertions(+), 14 deletions(-) diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py index ad082380..4e21fa62 100644 --- a/pipestat/backends/pephub_backend/pephubbackend.py +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -1,4 +1,6 @@ +import copy import datetime +import operator from logging import getLogger import pephubclient @@ -8,7 +10,7 @@ from ...backends.abstract import PipestatBackend from ...const import PKG_NAME -from typing import List, Dict, Any, Optional, Union, NoReturn, Tuple +from typing import List, Dict, Any, Optional, Union, NoReturn, Tuple, Literal from pephubclient import PEPHubClient @@ -36,6 +38,7 @@ def __init__( self.phc = PEPHubClient() self.pipeline_name = pipeline_name self.parsed_schema = parsed_schema + self.pephub_path = pephub_path # Test Registry Path _LOGGER.warning(f"Is pephub registry path? {pephubclient.is_registry_path(pephub_path)}") @@ -117,6 +120,7 @@ def report( tag=self.pep_registry.tag, sample_name=record_identifier, sample_dict=values, + overwrite=force_overwrite, ) # except ResponseError: # _LOGGER.warning("Login to pephubclient is required. phc login") @@ -131,3 +135,93 @@ def report( # ) return True + + def select_records( + self, + columns: Optional[List[str]] = None, + filter_conditions: Optional[List[Dict[str, Any]]] = None, + limit: Optional[int] = 1000, + cursor: Optional[int] = None, + bool_operator: Optional[str] = "AND", + ) -> Dict[str, Any]: + """ + Perform a `SELECT` on the table + + :param list[str] columns: columns to include in the result + :param list[dict] filter_conditions: e.g. [{"key": ["id"], "operator": "eq", "value": 1)], operator list: + - eq for == + - lt for < + - ge for >= + - in for in_ + - like for like + :param int limit: maximum number of results to retrieve per page + :param int cursor: cursor position to begin retrieving records + :param bool bool_operator: Perform filtering with AND or OR Logic. + :return dict records_dict = { + "total_size": int, + "page_size": int, + "next_page_token": int, + "records": List[Dict[{key, Any}]], + } + """ + + if cursor: + # TODO can we support cursor through pephubclient? + _LOGGER.warning("Cursor not supported for PEPHubBackend, ignoring cursor") + + def get_operator(op: Literal["eq", "lt", "ge", "gt", "in"]) -> Any: + """ + Get python operator for a given string + + :param str op: desired operator, "eq", "lt" + :return: operator function + """ + + if op == "eq": + return operator.__eq__ + if op == "lt": + return operator.__lt__ + if op == "ge": + return operator.__ge__ + if op == "gt": + return operator.__gt__ + if op == "in": + return operator.contains + raise ValueError(f"Invalid filter operator: {op}") + + # Can we use query_param to do cursor/limit operations if the PEP is very large? + project = self.phc.load_project(project_registry_path=self.pephub_path) + print(project) + + # PEPHub uses sample_name not record_identifier + # Just get the items from the sample table because its a dataframe and return the dict to the end user + if columns is not None: + columns = copy.deepcopy(columns) + for i in ["sample_name"]: # Must add id, need it for cursor + if i not in columns: + columns.insert(0, i) + df = project.sample_table[columns] + else: + df = project.sample_table + + total_count = len(df) + + records_list = [] + if filter_conditions: + for filter_condition in filter_conditions: + retrieved_operator = get_operator(filter_condition["operator"]) + retrieved_results = [] + + # + # filtered_df = df[(df['sample_type'] == 'sample_type1') & (df['genome'] == 'genome1')] + # + # df[df['sample_name'] == 'sample1'] + + records_dict = { + "total_size": total_count, + "page_size": limit, + "next_page_token": 0, + "records": records_list, + } + + return records_dict diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index bdca2068..81ab3fa7 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -2,6 +2,8 @@ import os.path import time from collections.abc import Mapping + +import pephubclient.exceptions from yacman import YAMLConfigManager import pytest @@ -2524,31 +2526,55 @@ class TestPEPHUBBackend: ("test_pipestat_01", {"name_of_something": "test_name"}), ], ) - @pytest.mark.parametrize("backend", ["db"]) - def test_pephub_backend( + def test_pephub_backend_report( self, rec_id, val, config_file_path, schema_file_path, results_file_path, - backend, range_values, ): - # with NamedTemporaryFile() as f, ContextManagerDBTesting(DB_URL): - # results_file_path = f.name - # args = dict(schema_path=output_schema_with_index, database_only=False) - # backend_data = ( - # {"config_file": config_file_path} - # if backend == "db" - # else {"results_file_path": results_file_path} - # ) - # args.update(backend_data) - # psm = SamplePipestatManager(**args) + pephuburl = "donaldcampbelljr/pipestat_demo:default" psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) + # Value already exists should give an error unless forcing overwrite + with pytest.raises(pephubclient.exceptions.ResponseError): + psm.report(record_identifier=rec_id, values=val, force_overwrite=False) + + # force overwrite defaults to true, so it should have no problem reporting psm.report(record_identifier=rec_id, values=val) print("done") + + @pytest.mark.parametrize( + ["rec_id", "val"], + [ + ("test_pipestat_01", {"name_of_something": "test_name"}), + ], + ) + # @pytest.mark.parametrize("backend", ["db"]) + def test_pephub_backend_retrieve( + self, + rec_id, + val, + config_file_path, + schema_file_path, + results_file_path, + range_values, + ): + pephuburl = "donaldcampbelljr/pipestat_demo:default" + + psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) + + psm.retrieve_one(record_identifier=rec_id) + # # Value already exists should give an error unless forcing overwrite + # with pytest.raises(pephubclient.exceptions.ResponseError): + # psm.report(record_identifier=rec_id, values=val, force_overwrite=False) + # + # # force overwrite defaults to true, so it should have no problem reporting + # psm.report(record_identifier=rec_id, values=val) + + print("done") From f8b872123c984906848191525210a1f96d501676 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 6 May 2024 18:35:44 -0400 Subject: [PATCH 04/34] add more for pephubbackend select_records --- .../backends/pephub_backend/pephubbackend.py | 74 ++++++++++++++++--- 1 file changed, 63 insertions(+), 11 deletions(-) diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py index 4e21fa62..c68c36ea 100644 --- a/pipestat/backends/pephub_backend/pephubbackend.py +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -169,6 +169,26 @@ def select_records( # TODO can we support cursor through pephubclient? _LOGGER.warning("Cursor not supported for PEPHubBackend, ignoring cursor") + # def get_operator(op: Literal["eq", "lt", "ge", "gt", "in"]) -> Any: + # """ + # Get python operator for a given string + # + # :param str op: desired operator, "eq", "lt" + # :return: operator function + # """ + # + # if op == "eq": + # return operator.__eq__ + # if op == "lt": + # return operator.__lt__ + # if op == "ge": + # return operator.__ge__ + # if op == "gt": + # return operator.__gt__ + # if op == "in": + # return operator.contains + # raise ValueError(f"Invalid filter operator: {op}") + def get_operator(op: Literal["eq", "lt", "ge", "gt", "in"]) -> Any: """ Get python operator for a given string @@ -178,15 +198,15 @@ def get_operator(op: Literal["eq", "lt", "ge", "gt", "in"]) -> Any: """ if op == "eq": - return operator.__eq__ + return "==" if op == "lt": - return operator.__lt__ + return "<" if op == "ge": - return operator.__ge__ + return ">=" if op == "gt": - return operator.__gt__ - if op == "in": - return operator.contains + return ">" + # if op == "in": + # return operator.contains raise ValueError(f"Invalid filter operator: {op}") # Can we use query_param to do cursor/limit operations if the PEP is very large? @@ -194,7 +214,7 @@ def get_operator(op: Literal["eq", "lt", "ge", "gt", "in"]) -> Any: print(project) # PEPHub uses sample_name not record_identifier - # Just get the items from the sample table because its a dataframe and return the dict to the end user + # Just get the items from the sample table because it is a dataframe and return the dict to the end user if columns is not None: columns = copy.deepcopy(columns) for i in ["sample_name"]: # Must add id, need it for cursor @@ -208,14 +228,46 @@ def get_operator(op: Literal["eq", "lt", "ge", "gt", "in"]) -> Any: records_list = [] if filter_conditions: + filter_expression = "" + all_filter_expressions = [] for filter_condition in filter_conditions: retrieved_operator = get_operator(filter_condition["operator"]) retrieved_results = [] + if filter_condition["key"] == "record_identifier": + filter_condition["key"] = "sample_name" - # - # filtered_df = df[(df['sample_type'] == 'sample_type1') & (df['genome'] == 'genome1')] - # - # df[df['sample_name'] == 'sample1'] + key = filter_condition["key"] + value = filter_condition["value"] + # Create querry for df based on filter conditions + + filter_expression = f"{key} {retrieved_operator} '{value}'" + all_filter_expressions.append(filter_expression) + # filter_expression = str(key) + ' ' + str(retrieved_operator) + ' ' + str(f'{value}') + + if len(all_filter_expressions) > 1: + + # This is for AND logic + if bool_operator == "AND": + for filter in all_filter_expressions: + df = df.query(filter) + if bool_operator == "OR": + filter = f"({' | '.join(str(cond) for cond in all_filter_expressions)})" + df = df.query(filter) + + else: + df = df.query(filter_expression) + + print("done") + + # Once we have the dataframe (filtered or unfiltered), convert to a dict using the sample_name/record_identifier as the primary key + + df2dict = df.set_index("sample_name").transpose().to_dict(orient="dict") + + # Filter out columns + # Must do this to align output structure with that of db_backend and file_backends + records_list = [] + for key, value in df2dict.items(): + records_list.append({key: value}) records_dict = { "total_size": total_count, From e13f9bd19693cdc1c3f50a9f7fd29506d1b343e4 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 13 May 2024 13:57:15 -0400 Subject: [PATCH 05/34] fix retrieve_many for pephub backend --- .../backends/pephub_backend/pephubbackend.py | 9 ++-- tests/test_pipestat.py | 50 ++++++++++++++----- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py index c68c36ea..e5bcd830 100644 --- a/pipestat/backends/pephub_backend/pephubbackend.py +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -205,8 +205,8 @@ def get_operator(op: Literal["eq", "lt", "ge", "gt", "in"]) -> Any: return ">=" if op == "gt": return ">" - # if op == "in": - # return operator.contains + if op == "in": + return "in" raise ValueError(f"Invalid filter operator: {op}") # Can we use query_param to do cursor/limit operations if the PEP is very large? @@ -240,7 +240,10 @@ def get_operator(op: Literal["eq", "lt", "ge", "gt", "in"]) -> Any: value = filter_condition["value"] # Create querry for df based on filter conditions - filter_expression = f"{key} {retrieved_operator} '{value}'" + if isinstance(value, list): + filter_expression = f"{key} {retrieved_operator} {value}" + else: + filter_expression = f"{key} {retrieved_operator} '{value}'" all_filter_expressions.append(filter_expression) # filter_expression = str(key) + ' ' + str(retrieved_operator) + ' ' + str(f'{value}') diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index 81ab3fa7..787ce726 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -2517,13 +2517,29 @@ def test_select_history_complex_objects( assert len(history_result["output_image"].keys()) == 2 -# @pytest.mark.skipif(not DB_DEPENDENCIES, reason="Requires dependencies") @pytest.mark.skipif(SERVICE_UNAVAILABLE, reason="requires service X to be available") class TestPEPHUBBackend: @pytest.mark.parametrize( ["rec_id", "val"], [ - ("test_pipestat_01", {"name_of_something": "test_name"}), + ( + "test_pipestat_01", + { + "name_of_something": "test_name", + "number_of_things": 42, + "md5sum": "example_md5sum", + "percentage_of_things": 10, + }, + ), + ( + "test_pipestat_02", + { + "name_of_something": "test_name_02", + "number_of_things": 52, + "md5sum": "example_md5sum_02", + "percentage_of_things": 30, + }, + ), ], ) def test_pephub_backend_report( @@ -2555,8 +2571,7 @@ def test_pephub_backend_report( ("test_pipestat_01", {"name_of_something": "test_name"}), ], ) - # @pytest.mark.parametrize("backend", ["db"]) - def test_pephub_backend_retrieve( + def test_pephub_backend_retrieve_one( self, rec_id, val, @@ -2569,12 +2584,23 @@ def test_pephub_backend_retrieve( psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) - psm.retrieve_one(record_identifier=rec_id) - # # Value already exists should give an error unless forcing overwrite - # with pytest.raises(pephubclient.exceptions.ResponseError): - # psm.report(record_identifier=rec_id, values=val, force_overwrite=False) - # - # # force overwrite defaults to true, so it should have no problem reporting - # psm.report(record_identifier=rec_id, values=val) + result = psm.retrieve_one(record_identifier=rec_id) - print("done") + assert len(result.keys()) == 1 + + def test_pephub_backend_retrieve_many( + self, + config_file_path, + schema_file_path, + results_file_path, + range_values, + ): + pephuburl = "donaldcampbelljr/pipestat_demo:default" + + rec_ids = ["test_pipestat_01", "test_pipestat_02"] + + psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) + + results = psm.retrieve_many(record_identifiers=rec_ids) + + assert len(results["records"]) == 2 From e56c8139e46e596c44870c1943590a7afb7b77fb Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 13 May 2024 14:06:28 -0400 Subject: [PATCH 06/34] add pephubclient to requirements --- requirements/requirements-pephub-backend.txt | 1 + requirements/requirements-test.txt | 1 + 2 files changed, 2 insertions(+) create mode 100644 requirements/requirements-pephub-backend.txt diff --git a/requirements/requirements-pephub-backend.txt b/requirements/requirements-pephub-backend.txt new file mode 100644 index 00000000..da4b7856 --- /dev/null +++ b/requirements/requirements-pephub-backend.txt @@ -0,0 +1 @@ +pephubclient \ No newline at end of file diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt index 0383dfbd..fbfe980b 100644 --- a/requirements/requirements-test.txt +++ b/requirements/requirements-test.txt @@ -17,5 +17,6 @@ uvicorn fastapi coverage smokeshow +pephubclient From fd5f8c6b77341706c64816a986ea7e1e4f42aeca Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 13 May 2024 15:17:35 -0400 Subject: [PATCH 07/34] add record and result removal for pephubclient backend --- .../backends/pephub_backend/pephubbackend.py | 137 ++++++++++++++++++ tests/test_pipestat.py | 38 ++++- 2 files changed, 173 insertions(+), 2 deletions(-) diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py index e5bcd830..44b732c8 100644 --- a/pipestat/backends/pephub_backend/pephubbackend.py +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -36,6 +36,7 @@ def __init__( super().__init__(pipeline_type) self.phc = PEPHubClient() + self.record_identifier = record_identifier self.pipeline_name = pipeline_name self.parsed_schema = parsed_schema self.pephub_path = pephub_path @@ -55,6 +56,142 @@ def __init__( else: raise Exception + def check_record_exists( + self, + record_identifier: str, + ) -> bool: + """ + Check if the specified record exists in the table + + :param str record_identifier: record to check for + :return bool: whether the record exists in the table + """ + + query_hit = self.select_records( + filter_conditions=[ + { + "key": "record_identifier", + "operator": "eq", + "value": record_identifier, + } + ] + ) + + return bool(query_hit["records"]) + + def list_results( + self, + restrict_to: Optional[List[str]] = None, + record_identifier: str = None, + ) -> List[str]: + """ + Check if the specified results exist in the table + + :param List[str] restrict_to: results identifiers to check for + :param str record_identifier: record to check for + :return List[str] existing: if no result identifier specified, return all results for the record + :return List[str]: results identifiers that exist + """ + rid = record_identifier + record = self.select_records( + filter_conditions=[ + { + "key": "record_identifier", + "operator": "eq", + "value": rid, + } + ] + ) + try: + # TODO had to add rid to step deeper into data structure vs dbbackend implementation. Why are they different? + record = record["records"][0][rid] + except IndexError: + return [] + + if restrict_to is None: + return ( + [ + key + for key in self.parsed_schema.results_data.keys() + if getattr(record, key, None) is not None + ] + if record + else [] + ) + else: + return [r for r in restrict_to if record.get(r, None) is not None] if record else [] + + def remove( + self, + record_identifier: Optional[str] = None, + result_identifier: Optional[str] = None, + ) -> bool: + """ + Remove a result. + + If no result ID specified, the entire record + will be removed. + + :param str record_identifier: unique identifier of the record + :param str result_identifier: name of the result to be removed or None + if the record should be removed. + :return bool: whether the result has been removed + """ + + record_identifier = record_identifier or self.record_identifier + + if not self.check_record_exists( + record_identifier=record_identifier, + ): + _LOGGER.error(f"Record '{record_identifier}' not found") + return False + + if result_identifier and not self.check_result_exists( + result_identifier, record_identifier + ): + _LOGGER.error(f"'{result_identifier}' has not been reported for '{record_identifier}'") + return False + + if result_identifier: + values = {result_identifier: ""} + self.phc.sample.update( + namespace=self.pep_registry.namespace, + name=self.pep_registry.item, + tag=self.pep_registry.tag, + sample_name=record_identifier, + sample_dict=values, + ) + return True + else: + self.remove_record( + record_identifier=record_identifier, + rm_record=True, + ) + return True + + def remove_record( + self, + record_identifier: Optional[str] = None, + rm_record: Optional[bool] = False, + ) -> NoReturn: + """ + Remove a record, requires rm_record to be True + + :param str record_identifier: unique identifier of the record + :param bool rm_record: bool for removing record. + :return bool: whether the result has been removed + :raises RecordNotFoundError: if record not found + """ + if rm_record: + self.phc.sample.remove( + namespace=self.pep_registry.namespace, + name=self.pep_registry.item, + tag=self.pep_registry.tag, + sample_name=record_identifier, + ) + else: + _LOGGER.info(f" rm_record flag False, aborting Removing '{record_identifier}' record") + def report( self, values: Dict[str, Any], diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index 787ce726..e48acd1c 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -2557,8 +2557,8 @@ def test_pephub_backend_report( psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) # Value already exists should give an error unless forcing overwrite - with pytest.raises(pephubclient.exceptions.ResponseError): - psm.report(record_identifier=rec_id, values=val, force_overwrite=False) + # with pytest.raises(pephubclient.exceptions.ResponseError): + # psm.report(record_identifier=rec_id, values=val, force_overwrite=False) # force overwrite defaults to true, so it should have no problem reporting psm.report(record_identifier=rec_id, values=val) @@ -2604,3 +2604,37 @@ def test_pephub_backend_retrieve_many( results = psm.retrieve_many(record_identifiers=rec_ids) assert len(results["records"]) == 2 + + def test_pephub_backend_remove( + self, + config_file_path, + schema_file_path, + results_file_path, + range_values, + ): + pephuburl = "donaldcampbelljr/pipestat_demo:default" + + rec_ids = ["test_pipestat_01"] + + psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) + + results = psm.remove(record_identifier=rec_ids[0], result_identifier="name_of_something") + + assert results is True + + def test_pephub_backend_remove_record( + self, + config_file_path, + schema_file_path, + results_file_path, + range_values, + ): + pephuburl = "donaldcampbelljr/pipestat_demo:default" + + rec_ids = ["test_pipestat_01"] + + psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) + + results = psm.remove_record(record_identifier=rec_ids[0], rm_record=False) + + results = psm.remove_record(record_identifier=rec_ids[0], rm_record=True) From 765cfb3d83565456d4abc3594961620165f6edbb Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 13 May 2024 15:30:29 -0400 Subject: [PATCH 08/34] remove unused code --- tests/test_pipestat.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index e48acd1c..329da7b6 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -2557,8 +2557,6 @@ def test_pephub_backend_report( psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) # Value already exists should give an error unless forcing overwrite - # with pytest.raises(pephubclient.exceptions.ResponseError): - # psm.report(record_identifier=rec_id, values=val, force_overwrite=False) # force overwrite defaults to true, so it should have no problem reporting psm.report(record_identifier=rec_id, values=val) From e31d1648591a5a30de48d1af80d31e9d22ddbec7 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 13 May 2024 15:33:35 -0400 Subject: [PATCH 09/34] comment out removal code to see if it prevents tests hanging --- tests/test_pipestat.py | 66 +++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index 329da7b6..0f3afeaa 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -2603,36 +2603,36 @@ def test_pephub_backend_retrieve_many( assert len(results["records"]) == 2 - def test_pephub_backend_remove( - self, - config_file_path, - schema_file_path, - results_file_path, - range_values, - ): - pephuburl = "donaldcampbelljr/pipestat_demo:default" - - rec_ids = ["test_pipestat_01"] - - psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) - - results = psm.remove(record_identifier=rec_ids[0], result_identifier="name_of_something") - - assert results is True - - def test_pephub_backend_remove_record( - self, - config_file_path, - schema_file_path, - results_file_path, - range_values, - ): - pephuburl = "donaldcampbelljr/pipestat_demo:default" - - rec_ids = ["test_pipestat_01"] - - psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) - - results = psm.remove_record(record_identifier=rec_ids[0], rm_record=False) - - results = psm.remove_record(record_identifier=rec_ids[0], rm_record=True) + # def test_pephub_backend_remove( + # self, + # config_file_path, + # schema_file_path, + # results_file_path, + # range_values, + # ): + # pephuburl = "donaldcampbelljr/pipestat_demo:default" + # + # rec_ids = ["test_pipestat_01"] + # + # psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) + # + # results = psm.remove(record_identifier=rec_ids[0], result_identifier="name_of_something") + # + # assert results is True + # + # def test_pephub_backend_remove_record( + # self, + # config_file_path, + # schema_file_path, + # results_file_path, + # range_values, + # ): + # pephuburl = "donaldcampbelljr/pipestat_demo:default" + # + # rec_ids = ["test_pipestat_01"] + # + # psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) + # + # results = psm.remove_record(record_identifier=rec_ids[0], rm_record=False) + # + # results = psm.remove_record(record_identifier=rec_ids[0], rm_record=True) From e7750c10b2f356a0c1c012143c142eb60e7a0cf3 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Tue, 14 May 2024 10:00:43 -0400 Subject: [PATCH 10/34] comment out pephub testing for now --- tests/test_pipestat.py | 238 ++++++++++++++++++++--------------------- 1 file changed, 119 insertions(+), 119 deletions(-) diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index 0f3afeaa..e9cd026e 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -2517,122 +2517,122 @@ def test_select_history_complex_objects( assert len(history_result["output_image"].keys()) == 2 -@pytest.mark.skipif(SERVICE_UNAVAILABLE, reason="requires service X to be available") -class TestPEPHUBBackend: - @pytest.mark.parametrize( - ["rec_id", "val"], - [ - ( - "test_pipestat_01", - { - "name_of_something": "test_name", - "number_of_things": 42, - "md5sum": "example_md5sum", - "percentage_of_things": 10, - }, - ), - ( - "test_pipestat_02", - { - "name_of_something": "test_name_02", - "number_of_things": 52, - "md5sum": "example_md5sum_02", - "percentage_of_things": 30, - }, - ), - ], - ) - def test_pephub_backend_report( - self, - rec_id, - val, - config_file_path, - schema_file_path, - results_file_path, - range_values, - ): - - pephuburl = "donaldcampbelljr/pipestat_demo:default" - - psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) - - # Value already exists should give an error unless forcing overwrite - - # force overwrite defaults to true, so it should have no problem reporting - psm.report(record_identifier=rec_id, values=val) - - print("done") - - @pytest.mark.parametrize( - ["rec_id", "val"], - [ - ("test_pipestat_01", {"name_of_something": "test_name"}), - ], - ) - def test_pephub_backend_retrieve_one( - self, - rec_id, - val, - config_file_path, - schema_file_path, - results_file_path, - range_values, - ): - pephuburl = "donaldcampbelljr/pipestat_demo:default" - - psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) - - result = psm.retrieve_one(record_identifier=rec_id) - - assert len(result.keys()) == 1 - - def test_pephub_backend_retrieve_many( - self, - config_file_path, - schema_file_path, - results_file_path, - range_values, - ): - pephuburl = "donaldcampbelljr/pipestat_demo:default" - - rec_ids = ["test_pipestat_01", "test_pipestat_02"] - - psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) - - results = psm.retrieve_many(record_identifiers=rec_ids) - - assert len(results["records"]) == 2 - - # def test_pephub_backend_remove( - # self, - # config_file_path, - # schema_file_path, - # results_file_path, - # range_values, - # ): - # pephuburl = "donaldcampbelljr/pipestat_demo:default" - # - # rec_ids = ["test_pipestat_01"] - # - # psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) - # - # results = psm.remove(record_identifier=rec_ids[0], result_identifier="name_of_something") - # - # assert results is True - # - # def test_pephub_backend_remove_record( - # self, - # config_file_path, - # schema_file_path, - # results_file_path, - # range_values, - # ): - # pephuburl = "donaldcampbelljr/pipestat_demo:default" - # - # rec_ids = ["test_pipestat_01"] - # - # psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) - # - # results = psm.remove_record(record_identifier=rec_ids[0], rm_record=False) - # - # results = psm.remove_record(record_identifier=rec_ids[0], rm_record=True) +# @pytest.mark.skipif(SERVICE_UNAVAILABLE, reason="requires service X to be available") +# class TestPEPHUBBackend: +# @pytest.mark.parametrize( +# ["rec_id", "val"], +# [ +# ( +# "test_pipestat_01", +# { +# "name_of_something": "test_name", +# "number_of_things": 42, +# "md5sum": "example_md5sum", +# "percentage_of_things": 10, +# }, +# ), +# ( +# "test_pipestat_02", +# { +# "name_of_something": "test_name_02", +# "number_of_things": 52, +# "md5sum": "example_md5sum_02", +# "percentage_of_things": 30, +# }, +# ), +# ], +# ) +# def test_pephub_backend_report( +# self, +# rec_id, +# val, +# config_file_path, +# schema_file_path, +# results_file_path, +# range_values, +# ): +# +# pephuburl = "donaldcampbelljr/pipestat_demo:default" +# +# psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) +# +# # Value already exists should give an error unless forcing overwrite +# +# # force overwrite defaults to true, so it should have no problem reporting +# psm.report(record_identifier=rec_id, values=val) +# +# print("done") +# +# @pytest.mark.parametrize( +# ["rec_id", "val"], +# [ +# ("test_pipestat_01", {"name_of_something": "test_name"}), +# ], +# ) +# def test_pephub_backend_retrieve_one( +# self, +# rec_id, +# val, +# config_file_path, +# schema_file_path, +# results_file_path, +# range_values, +# ): +# pephuburl = "donaldcampbelljr/pipestat_demo:default" +# +# psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) +# +# result = psm.retrieve_one(record_identifier=rec_id) +# +# assert len(result.keys()) == 1 +# +# def test_pephub_backend_retrieve_many( +# self, +# config_file_path, +# schema_file_path, +# results_file_path, +# range_values, +# ): +# pephuburl = "donaldcampbelljr/pipestat_demo:default" +# +# rec_ids = ["test_pipestat_01", "test_pipestat_02"] +# +# psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) +# +# results = psm.retrieve_many(record_identifiers=rec_ids) +# +# assert len(results["records"]) == 2 +# +# def test_pephub_backend_remove( +# self, +# config_file_path, +# schema_file_path, +# results_file_path, +# range_values, +# ): +# pephuburl = "donaldcampbelljr/pipestat_demo:default" +# +# rec_ids = ["test_pipestat_01"] +# +# psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) +# +# results = psm.remove(record_identifier=rec_ids[0], result_identifier="name_of_something") +# +# assert results is True +# +# def test_pephub_backend_remove_record( +# self, +# config_file_path, +# schema_file_path, +# results_file_path, +# range_values, +# ): +# pephuburl = "donaldcampbelljr/pipestat_demo:default" +# +# rec_ids = ["test_pipestat_01"] +# +# psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) +# +# results = psm.remove_record(record_identifier=rec_ids[0], rm_record=False) +# +# results = psm.remove_record(record_identifier=rec_ids[0], rm_record=True) From 8172545bf8e53bfc3ea70507bca589930a13d4e4 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Tue, 14 May 2024 10:23:16 -0400 Subject: [PATCH 11/34] add skipping to pephub via db dependencies and comment regarding PHC --- tests/test_pipestat.py | 245 +++++++++++++++++++++-------------------- 1 file changed, 126 insertions(+), 119 deletions(-) diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index e9cd026e..49719f50 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -2517,122 +2517,129 @@ def test_select_history_complex_objects( assert len(history_result["output_image"].keys()) == 2 -# @pytest.mark.skipif(SERVICE_UNAVAILABLE, reason="requires service X to be available") -# class TestPEPHUBBackend: -# @pytest.mark.parametrize( -# ["rec_id", "val"], -# [ -# ( -# "test_pipestat_01", -# { -# "name_of_something": "test_name", -# "number_of_things": 42, -# "md5sum": "example_md5sum", -# "percentage_of_things": 10, -# }, -# ), -# ( -# "test_pipestat_02", -# { -# "name_of_something": "test_name_02", -# "number_of_things": 52, -# "md5sum": "example_md5sum_02", -# "percentage_of_things": 30, -# }, -# ), -# ], -# ) -# def test_pephub_backend_report( -# self, -# rec_id, -# val, -# config_file_path, -# schema_file_path, -# results_file_path, -# range_values, -# ): -# -# pephuburl = "donaldcampbelljr/pipestat_demo:default" -# -# psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) -# -# # Value already exists should give an error unless forcing overwrite -# -# # force overwrite defaults to true, so it should have no problem reporting -# psm.report(record_identifier=rec_id, values=val) -# -# print("done") -# -# @pytest.mark.parametrize( -# ["rec_id", "val"], -# [ -# ("test_pipestat_01", {"name_of_something": "test_name"}), -# ], -# ) -# def test_pephub_backend_retrieve_one( -# self, -# rec_id, -# val, -# config_file_path, -# schema_file_path, -# results_file_path, -# range_values, -# ): -# pephuburl = "donaldcampbelljr/pipestat_demo:default" -# -# psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) -# -# result = psm.retrieve_one(record_identifier=rec_id) -# -# assert len(result.keys()) == 1 -# -# def test_pephub_backend_retrieve_many( -# self, -# config_file_path, -# schema_file_path, -# results_file_path, -# range_values, -# ): -# pephuburl = "donaldcampbelljr/pipestat_demo:default" -# -# rec_ids = ["test_pipestat_01", "test_pipestat_02"] -# -# psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) -# -# results = psm.retrieve_many(record_identifiers=rec_ids) -# -# assert len(results["records"]) == 2 -# -# def test_pephub_backend_remove( -# self, -# config_file_path, -# schema_file_path, -# results_file_path, -# range_values, -# ): -# pephuburl = "donaldcampbelljr/pipestat_demo:default" -# -# rec_ids = ["test_pipestat_01"] -# -# psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) -# -# results = psm.remove(record_identifier=rec_ids[0], result_identifier="name_of_something") -# -# assert results is True -# -# def test_pephub_backend_remove_record( -# self, -# config_file_path, -# schema_file_path, -# results_file_path, -# range_values, -# ): -# pephuburl = "donaldcampbelljr/pipestat_demo:default" -# -# rec_ids = ["test_pipestat_01"] -# -# psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) -# -# results = psm.remove_record(record_identifier=rec_ids[0], rm_record=False) -# -# results = psm.remove_record(record_identifier=rec_ids[0], rm_record=True) +@pytest.mark.skipif(not DB_DEPENDENCIES, reason="Requires dependencies") +@pytest.mark.skipif(SERVICE_UNAVAILABLE, reason="requires service X to be available") +class TestPEPHUBBackend: + """ + THESE TESTS WILL FAIL IF YOU ARE NOT SIGNED IN TO PEPHUB + + use `phc login` to sign in. + """ + + @pytest.mark.parametrize( + ["rec_id", "val"], + [ + ( + "test_pipestat_01", + { + "name_of_something": "test_name", + "number_of_things": 42, + "md5sum": "example_md5sum", + "percentage_of_things": 10, + }, + ), + ( + "test_pipestat_02", + { + "name_of_something": "test_name_02", + "number_of_things": 52, + "md5sum": "example_md5sum_02", + "percentage_of_things": 30, + }, + ), + ], + ) + def test_pephub_backend_report( + self, + rec_id, + val, + config_file_path, + schema_file_path, + results_file_path, + range_values, + ): + + pephuburl = "donaldcampbelljr/pipestat_demo:default" + + psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) + + # Value already exists should give an error unless forcing overwrite + + # force overwrite defaults to true, so it should have no problem reporting + psm.report(record_identifier=rec_id, values=val) + + print("done") + + @pytest.mark.parametrize( + ["rec_id", "val"], + [ + ("test_pipestat_01", {"name_of_something": "test_name"}), + ], + ) + def test_pephub_backend_retrieve_one( + self, + rec_id, + val, + config_file_path, + schema_file_path, + results_file_path, + range_values, + ): + pephuburl = "donaldcampbelljr/pipestat_demo:default" + + psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) + + result = psm.retrieve_one(record_identifier=rec_id) + + assert len(result.keys()) == 1 + + def test_pephub_backend_retrieve_many( + self, + config_file_path, + schema_file_path, + results_file_path, + range_values, + ): + pephuburl = "donaldcampbelljr/pipestat_demo:default" + + rec_ids = ["test_pipestat_01", "test_pipestat_02"] + + psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) + + results = psm.retrieve_many(record_identifiers=rec_ids) + + assert len(results["records"]) == 2 + + def test_pephub_backend_remove( + self, + config_file_path, + schema_file_path, + results_file_path, + range_values, + ): + pephuburl = "donaldcampbelljr/pipestat_demo:default" + + rec_ids = ["test_pipestat_01"] + + psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) + + results = psm.remove(record_identifier=rec_ids[0], result_identifier="name_of_something") + + assert results is True + + def test_pephub_backend_remove_record( + self, + config_file_path, + schema_file_path, + results_file_path, + range_values, + ): + pephuburl = "donaldcampbelljr/pipestat_demo:default" + + rec_ids = ["test_pipestat_01"] + + psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) + + results = psm.remove_record(record_identifier=rec_ids[0], rm_record=False) + + results = psm.remove_record(record_identifier=rec_ids[0], rm_record=True) From 5ceea721cb49ef6d02f51ed5ab4b44852b857803 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 20 May 2024 13:24:35 -0400 Subject: [PATCH 12/34] change gha to black stable --- .github/workflows/black.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml index 15435ea3..342c8430 100644 --- a/.github/workflows/black.yml +++ b/.github/workflows/black.yml @@ -8,7 +8,7 @@ jobs: steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 - - uses: psf/black@20.8b1 + - uses: psf/black@stable with: options: "--check --diff --color --verbose --line-length 99" From 11c67c87d22ab00ca39e52553c138a0fff291865 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 30 May 2024 10:03:32 -0400 Subject: [PATCH 13/34] update manifest to include pephub backend --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST.in b/MANIFEST.in index ddae2813..f1d4aacf 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,5 +4,6 @@ include pipestat/schemas/* include pipestat/backends/* include pipestat/backends/file_backend/* include pipestat/backends/db_backend/* +include pipestat/backends/pephub_backend/* include pipestat/pipestatreader/* include pipestat/jinja_templates/* From 2098bc0759e0c3eb86066eef3e7e2ebf731ab838 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 11 Jul 2024 14:38:09 -0400 Subject: [PATCH 14/34] add pephub url as constant --- tests/conftest.py | 3 +++ tests/test_pipestat.py | 17 ++++++----------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 1476e457..49b31e91 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,6 +22,9 @@ """ STANDARD_TEST_PIPE_ID = "default_pipeline_name" +PEPHUB_URL = "donaldcampbelljr/pipestat_demo:default" +# PEPHUB_URL ="donaldcampbelljr/pipestat_demo_2:default" + try: subprocess.check_output( "docker inspect pipestat_test_db --format '{{.State.Status}}'", shell=True diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index c5a13282..c11a8d80 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -26,6 +26,7 @@ DB_URL, REC_ID, DB_DEPENDENCIES, + PEPHUB_URL, ) from tempfile import NamedTemporaryFile, TemporaryDirectory @@ -2559,9 +2560,7 @@ def test_pephub_backend_report( range_values, ): - pephuburl = "donaldcampbelljr/pipestat_demo:default" - - psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) + psm = PipestatManager(pephub_path=PEPHUB_URL, schema_path=schema_file_path) # Value already exists should give an error unless forcing overwrite @@ -2585,9 +2584,8 @@ def test_pephub_backend_retrieve_one( results_file_path, range_values, ): - pephuburl = "donaldcampbelljr/pipestat_demo:default" - psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) + psm = PipestatManager(pephub_path=PEPHUB_URL, schema_path=schema_file_path) result = psm.retrieve_one(record_identifier=rec_id) @@ -2600,11 +2598,10 @@ def test_pephub_backend_retrieve_many( results_file_path, range_values, ): - pephuburl = "donaldcampbelljr/pipestat_demo:default" rec_ids = ["test_pipestat_01", "test_pipestat_02"] - psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) + psm = PipestatManager(pephub_path=PEPHUB_URL, schema_path=schema_file_path) results = psm.retrieve_many(record_identifiers=rec_ids) @@ -2617,11 +2614,10 @@ def test_pephub_backend_remove( results_file_path, range_values, ): - pephuburl = "donaldcampbelljr/pipestat_demo:default" rec_ids = ["test_pipestat_01"] - psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) + psm = PipestatManager(pephub_path=PEPHUB_URL, schema_path=schema_file_path) results = psm.remove(record_identifier=rec_ids[0], result_identifier="name_of_something") @@ -2634,11 +2630,10 @@ def test_pephub_backend_remove_record( results_file_path, range_values, ): - pephuburl = "donaldcampbelljr/pipestat_demo:default" rec_ids = ["test_pipestat_01"] - psm = PipestatManager(pephub_path=pephuburl, schema_path=schema_file_path) + psm = PipestatManager(pephub_path=PEPHUB_URL, schema_path=schema_file_path) results = psm.remove_record(record_identifier=rec_ids[0], rm_record=False) From 6523c02e1e135c30c0fc0c66220e96238b88f18f Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 11 Jul 2024 18:01:04 -0400 Subject: [PATCH 15/34] work towards setting and getting statuses from pephub --- .../backends/pephub_backend/pephubbackend.py | 128 ++++++++++++++---- pipestat/pipestat.py | 3 +- tests/test_pipestat.py | 30 ++++ 3 files changed, 136 insertions(+), 25 deletions(-) diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py index 44b732c8..7d25a27a 100644 --- a/pipestat/backends/pephub_backend/pephubbackend.py +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -9,12 +9,13 @@ from ubiquerg import parse_registry_path from ...backends.abstract import PipestatBackend -from ...const import PKG_NAME +from ...const import PKG_NAME, STATUS from typing import List, Dict, Any, Optional, Union, NoReturn, Tuple, Literal from pephubclient import PEPHubClient +from ...exceptions import UnrecognizedStatusError, RecordNotFoundError, ColumnNotFoundError _LOGGER = getLogger(PKG_NAME) @@ -28,6 +29,7 @@ def __init__( pipeline_type: Optional[str] = None, parsed_schema: Optional[str] = None, status_schema: Optional[str] = None, + result_formatter: Optional[staticmethod] = None, ): """ ADD DOCSTRINGS! @@ -37,9 +39,11 @@ def __init__( self.phc = PEPHubClient() self.record_identifier = record_identifier + self.pephub_path = pephub_path self.pipeline_name = pipeline_name self.parsed_schema = parsed_schema - self.pephub_path = pephub_path + self.status_schema = status_schema + self.result_formatter = result_formatter # Test Registry Path _LOGGER.warning(f"Is pephub registry path? {pephubclient.is_registry_path(pephub_path)}") @@ -214,7 +218,7 @@ def report( :return bool | list[str] results_formatted: return list of formatted string """ - # record_identifier = record_identifier or self.record_identifier + record_identifier = record_identifier or self.record_identifier record_identifier = record_identifier result_formatter = result_formatter or self.result_formatter @@ -228,39 +232,37 @@ def report( results=result_identifiers, pipeline_type=self.pipeline_type ) - # existing = self.list_results( - # record_identifier=record_identifier, - # restrict_to=result_identifiers, - # ) - existing = False + existing = self.list_results( + record_identifier=record_identifier, + restrict_to=result_identifiers, + ) - if existing: + if not existing: + + # try: + self.phc.sample.create( + namespace=self.pep_registry.namespace, + name=self.pep_registry.item, + tag=self.pep_registry.tag, + sample_name=record_identifier, + sample_dict=values, + overwrite=force_overwrite, + ) + + elif existing: existing_str = ", ".join(existing) _LOGGER.warning(f"These results exist for '{record_identifier}': {existing_str}") if not force_overwrite: return False _LOGGER.info(f"Overwriting existing results: {existing_str}") - if not existing: - # self._config.phc.sample.update( - # namespace=self._config.config.phc.namespace, - # name=self._config.config.phc.name, - # tag=self._config.config.phc.tag, - # sample_name=identifier, - # sample_dict=metadata, - # ) - - # try: - self.phc.sample.create( + self.phc.sample.update( namespace=self.pep_registry.namespace, name=self.pep_registry.item, tag=self.pep_registry.tag, sample_name=record_identifier, sample_dict=values, - overwrite=force_overwrite, ) - # except ResponseError: - # _LOGGER.warning("Login to pephubclient is required. phc login") # results_formatted.append( # result_formatter( @@ -273,6 +275,74 @@ def report( return True + def set_status( + self, + status_identifier: str, + record_identifier: str = None, + ) -> None: + """ + Set pipeline run status. + + The status identifier needs to match one of identifiers specified in + the status schema. A basic, ready to use, status schema is shipped with + this package. + + :param str status_identifier: status to set, one of statuses defined + in the status schema + :param str record_identifier: record identifier to set the + pipeline status for + """ + + record_identifier = record_identifier or self.record_identifier + known_status_identifiers = self.status_schema.keys() + if status_identifier not in known_status_identifiers: + raise UnrecognizedStatusError( + f"'{status_identifier}' is not a defined status identifier. " + f"These are allowed: {known_status_identifiers}" + ) + prev_status = self.get_status(record_identifier) + try: + self.report( + values={STATUS: status_identifier}, + record_identifier=record_identifier, + ) + except Exception as e: + _LOGGER.error( + f"Could not insert into the status table ('{self.table_name}'). Exception: {e}" + ) + raise + if prev_status: + _LOGGER.debug(f"Changed status from '{prev_status}' to '{status_identifier}'") + + def get_status(self, record_identifier: str) -> Optional[str]: + """ + Get pipeline status + + :param str record_identifier: record identifier to set the + pipeline status for + :return str status + """ + + try: + result = self.select_records( + columns=[STATUS], + filter_conditions=[ + { + "key": "record_identifier", + "operator": "eq", + "value": record_identifier, + } + ], + ) + except RecordNotFoundError: + return None + try: + status = result["records"][0][record_identifier]["status"] + except IndexError or KeyError: + status = None + + return status + def select_records( self, columns: Optional[List[str]] = None, @@ -357,7 +427,17 @@ def get_operator(op: Literal["eq", "lt", "ge", "gt", "in"]) -> Any: for i in ["sample_name"]: # Must add id, need it for cursor if i not in columns: columns.insert(0, i) - df = project.sample_table[columns] + try: + df = project.sample_table[columns] + except KeyError: + records_dict = { + "total_size": 0, + "page_size": limit, + "next_page_token": 0, + "records": [], + } + return records_dict + else: df = project.sample_table diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py index 38a6eec3..7dfa6b53 100644 --- a/pipestat/pipestat.py +++ b/pipestat/pipestat.py @@ -399,7 +399,8 @@ def initialize_pephubbackend(self, record_identifier, pephub_path): self.cfg[PIPELINE_NAME], self.cfg[PIPELINE_TYPE], self.cfg[SCHEMA_KEY], - # self.cfg[STATUS_SCHEMA_KEY], + self.cfg[STATUS_SCHEMA_KEY], + self.cfg[RESULT_FORMATTER], ) @check_dependencies( diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index c11a8d80..79bea636 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -2607,6 +2607,36 @@ def test_pephub_backend_retrieve_many( assert len(results["records"]) == 2 + def test_get_status_pephub_backend( + self, + config_file_path, + schema_file_path, + results_file_path, + range_values, + ): + rec_ids = ["test_pipestat_01"] + + psm = PipestatManager(pephub_path=PEPHUB_URL, schema_path=schema_file_path) + + result = psm.get_status(record_identifier=rec_ids[0]) + + assert result is None + + def test_set_status_pephub_backend( + self, + config_file_path, + schema_file_path, + results_file_path, + range_values, + ): + rec_ids = ["test_pipestat_01"] + + psm = PipestatManager(pephub_path=PEPHUB_URL, schema_path=schema_file_path) + + result = psm.set_status(record_identifier=rec_ids[0], status_identifier="completed") + + assert result is None + def test_pephub_backend_remove( self, config_file_path, From 25b0b3f224672406e7c524e4d9f72fe357aa78a4 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 12 Jul 2024 09:13:10 -0400 Subject: [PATCH 16/34] fix returning status for PEPhub backend --- pipestat/backends/pephub_backend/pephubbackend.py | 2 ++ tests/test_pipestat.py | 14 +++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py index 7d25a27a..513f7a01 100644 --- a/pipestat/backends/pephub_backend/pephubbackend.py +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -341,6 +341,8 @@ def get_status(self, record_identifier: str) -> Optional[str]: except IndexError or KeyError: status = None + if status == "": # PEPhub returns '' for empty cell + status = None return status def select_records( diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index 79bea636..225f4802 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -2607,7 +2607,7 @@ def test_pephub_backend_retrieve_many( assert len(results["records"]) == 2 - def test_get_status_pephub_backend( + def test_set_status_pephub_backend( self, config_file_path, schema_file_path, @@ -2618,25 +2618,29 @@ def test_get_status_pephub_backend( psm = PipestatManager(pephub_path=PEPHUB_URL, schema_path=schema_file_path) - result = psm.get_status(record_identifier=rec_ids[0]) + result = psm.set_status(record_identifier=rec_ids[0], status_identifier="completed") assert result is None - def test_set_status_pephub_backend( + def test_get_status_pephub_backend( self, config_file_path, schema_file_path, results_file_path, range_values, ): - rec_ids = ["test_pipestat_01"] + rec_ids = ["sample1", "test_pipestat_01"] psm = PipestatManager(pephub_path=PEPHUB_URL, schema_path=schema_file_path) - result = psm.set_status(record_identifier=rec_ids[0], status_identifier="completed") + result = psm.get_status(record_identifier=rec_ids[0]) assert result is None + result = psm.get_status(record_identifier=rec_ids[1]) + + assert result == "completed" + def test_pephub_backend_remove( self, config_file_path, From 5c95fb02cd0b6dcd7642d916aaa8d58371de489c Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 12 Jul 2024 10:00:39 -0400 Subject: [PATCH 17/34] clean up commented code --- .../backends/pephub_backend/pephubbackend.py | 24 ------------------- tests/test_pipestat.py | 3 +-- 2 files changed, 1 insertion(+), 26 deletions(-) diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py index 513f7a01..1b9db16c 100644 --- a/pipestat/backends/pephub_backend/pephubbackend.py +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -378,26 +378,6 @@ def select_records( # TODO can we support cursor through pephubclient? _LOGGER.warning("Cursor not supported for PEPHubBackend, ignoring cursor") - # def get_operator(op: Literal["eq", "lt", "ge", "gt", "in"]) -> Any: - # """ - # Get python operator for a given string - # - # :param str op: desired operator, "eq", "lt" - # :return: operator function - # """ - # - # if op == "eq": - # return operator.__eq__ - # if op == "lt": - # return operator.__lt__ - # if op == "ge": - # return operator.__ge__ - # if op == "gt": - # return operator.__gt__ - # if op == "in": - # return operator.contains - # raise ValueError(f"Invalid filter operator: {op}") - def get_operator(op: Literal["eq", "lt", "ge", "gt", "in"]) -> Any: """ Get python operator for a given string @@ -445,13 +425,11 @@ def get_operator(op: Literal["eq", "lt", "ge", "gt", "in"]) -> Any: total_count = len(df) - records_list = [] if filter_conditions: filter_expression = "" all_filter_expressions = [] for filter_condition in filter_conditions: retrieved_operator = get_operator(filter_condition["operator"]) - retrieved_results = [] if filter_condition["key"] == "record_identifier": filter_condition["key"] = "sample_name" @@ -464,7 +442,6 @@ def get_operator(op: Literal["eq", "lt", "ge", "gt", "in"]) -> Any: else: filter_expression = f"{key} {retrieved_operator} '{value}'" all_filter_expressions.append(filter_expression) - # filter_expression = str(key) + ' ' + str(retrieved_operator) + ' ' + str(f'{value}') if len(all_filter_expressions) > 1: @@ -482,7 +459,6 @@ def get_operator(op: Literal["eq", "lt", "ge", "gt", "in"]) -> Any: print("done") # Once we have the dataframe (filtered or unfiltered), convert to a dict using the sample_name/record_identifier as the primary key - df2dict = df.set_index("sample_name").transpose().to_dict(orient="dict") # Filter out columns diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index 225f4802..e61d7bc1 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -2518,8 +2518,7 @@ def test_select_history_complex_objects( assert len(history_result["output_image"].keys()) == 2 -@pytest.mark.skipif(not DB_DEPENDENCIES, reason="Requires dependencies") -@pytest.mark.skipif(SERVICE_UNAVAILABLE, reason="requires service X to be available") +@pytest.mark.skip(reason="requires pephub login to function") class TestPEPHUBBackend: """ THESE TESTS WILL FAIL IF YOU ARE NOT SIGNED IN TO PEPHUB From 38804d83c4c8a43522b24fa4916fe419c6aed7bf Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 12 Jul 2024 10:42:07 -0400 Subject: [PATCH 18/34] add results formatting to pephub reported results --- .../backends/pephub_backend/pephubbackend.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py index 1b9db16c..66776b2f 100644 --- a/pipestat/backends/pephub_backend/pephubbackend.py +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -264,16 +264,16 @@ def report( sample_dict=values, ) - # results_formatted.append( - # result_formatter( - # pipeline_name=self.pipeline_name, - # record_identifier=record_identifier, - # res_id=res_id, - # value=val, - # ) - # ) - - return True + for res_id, val in values.items(): + results_formatted.append( + result_formatter( + pipeline_name=self.pipeline_name, + record_identifier=record_identifier, + res_id=res_id, + value=val, + ) + ) + return results_formatted def set_status( self, From 90992d505f421f9ee69ee4ed6f4aaf172abdcc26 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 12 Jul 2024 15:30:49 -0400 Subject: [PATCH 19/34] add warning if history_enabled set to true --- pipestat/backends/pephub_backend/pephubbackend.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py index 66776b2f..214088bb 100644 --- a/pipestat/backends/pephub_backend/pephubbackend.py +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -202,7 +202,7 @@ def report( record_identifier: Optional[str] = None, force_overwrite: bool = True, result_formatter: Optional[staticmethod] = None, - history_enabled: bool = True, + history_enabled: Optional[bool] = False, ) -> Union[List[str], bool]: """ Update the value of a result in a current namespace. @@ -210,6 +210,7 @@ def report( This method overwrites any existing data and creates the required hierarchical mapping structure if needed. + :param history_enabled: this parameter is currently ignored as PEPHub :param Dict[str, Any] values: dict of results identifiers and values to be reported :param str record_identifier: unique identifier of the record @@ -217,13 +218,16 @@ def report( :param str result_formatter: function for formatting result :return bool | list[str] results_formatted: return list of formatted string """ + if history_enabled: + _LOGGER.warning( + msg="history_enabled set to true but this feature is handled by PEPHub and not Pipestat" + ) record_identifier = record_identifier or self.record_identifier record_identifier = record_identifier result_formatter = result_formatter or self.result_formatter results_formatted = [] - current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") result_identifiers = list(values.keys()) From 3be2f72be342d206776949159e143986e6308212 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 12 Jul 2024 15:42:46 -0400 Subject: [PATCH 20/34] add warning for retrieve_history --- pipestat/pipestat.py | 5 ++++- tests/test_pipestat.py | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py index 7dfa6b53..1449850b 100644 --- a/pipestat/pipestat.py +++ b/pipestat/pipestat.py @@ -176,7 +176,7 @@ def __init__( # Load and validate database configuration # If results_file_path exists, backend is a file else backend is database. - + self.cfg["pephub_path"] = pephub_path self.cfg["config_path"] = select_config(config_file, ENV_VARS["config"]) if config_dict is not None: @@ -810,6 +810,9 @@ def retrieve_history( _LOGGER.warning(f"No history available for Record: {record_identifier}") return {} + elif self.cfg["pephub_path"]: + _LOGGER.warning(f"Retrieving history not supported for PEPHub backend") + return None else: if result_identifier: history = self.backend.retrieve_history_db(record_identifier, result_identifier)[ diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index e61d7bc1..d7283985 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -2671,3 +2671,19 @@ def test_pephub_backend_remove_record( results = psm.remove_record(record_identifier=rec_ids[0], rm_record=False) results = psm.remove_record(record_identifier=rec_ids[0], rm_record=True) + + def test_pephub_unsupported_funcs( + self, + config_file_path, + schema_file_path, + results_file_path, + range_values, + ): + + rec_ids = ["test_pipestat_01"] + + psm = PipestatManager(pephub_path=PEPHUB_URL, schema_path=schema_file_path) + + results = psm.retrieve_history(record_identifier=rec_ids[0]) + + assert results is None From 4110c26161669f30de5531cc1a4aab4ab56bff94 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 12 Jul 2024 18:10:53 -0400 Subject: [PATCH 21/34] add warning for linking results and using pephubbackend --- pipestat/pipestat.py | 8 ++++++-- tests/test_pipestat.py | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py index 1449850b..81b02ad2 100644 --- a/pipestat/pipestat.py +++ b/pipestat/pipestat.py @@ -883,12 +883,16 @@ def set_status( self.backend.set_status(status_identifier, r_id) @require_backend - def link(self, link_dir) -> str: + def link(self, link_dir) -> Union[str, None]: """ This function creates a link structure such that results are organized by type. :param str link_dir: path to desired symlink output directory - :return str linked_results_path: path to symlink directory + :return str | None linked_results_path: path to symlink directory or None """ + if self.cfg["pephub_path"]: + _LOGGER.warning(f"Linking results is not supported for PEPHub backend.") + return None + self.check_multi_results() linked_results_path = self.backend.link(link_dir=link_dir) diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index d7283985..aaf4979c 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -2687,3 +2687,5 @@ def test_pephub_unsupported_funcs( results = psm.retrieve_history(record_identifier=rec_ids[0]) assert results is None + + psm.link("somedir") From 70af18f926d4cdaa1c4e9eb1d61bdf8eb5ccb76a Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 12 Jul 2024 18:18:20 -0400 Subject: [PATCH 22/34] add warnings for list recent results and summarize --- pipestat/pipestat.py | 10 +++++++++- tests/test_pipestat.py | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py index 81b02ad2..4b3f9627 100644 --- a/pipestat/pipestat.py +++ b/pipestat/pipestat.py @@ -491,6 +491,9 @@ def list_recent_results( :return dict results: a dict containing start, end, num of records, and list of retrieved records """ + if self.cfg["pephub_path"]: + _LOGGER.warning(f"List recent results not supported for PEPHub backend") + return {} date_format = "%Y-%m-%d %H:%M:%S" if start is None: start = datetime.datetime.now() @@ -904,7 +907,7 @@ def summarize( looper_samples: Optional[list] = None, amendment: Optional[str] = None, portable: Optional[bool] = False, - ) -> None: + ) -> Union[str, None]: """ Builds a browsable html report for reported results. :param Iterable[str] looper_samples: list of looper Samples from PEP @@ -913,6 +916,11 @@ def summarize( :return str: report_path """ + if self.cfg["pephub_path"]: + _LOGGER.warning( + f"Summarize not supported for PEPHub backend. Please generate report via PEPHub website." + ) + return None self.check_multi_results() diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index aaf4979c..a085a990 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -2689,3 +2689,5 @@ def test_pephub_unsupported_funcs( assert results is None psm.link("somedir") + psm.list_recent_results() + psm.summarize() From 4b6771c58d9715fa26753104bdbd59241039cc53 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 15 Jul 2024 11:37:25 -0400 Subject: [PATCH 23/34] add exception for bad registry path --- pipestat/backends/pephub_backend/pephubbackend.py | 9 +++------ pipestat/exceptions.py | 8 ++++++++ tests/test_pipestat.py | 10 ++++++++++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py index 214088bb..0b45ec56 100644 --- a/pipestat/backends/pephub_backend/pephubbackend.py +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -1,21 +1,18 @@ import copy -import datetime -import operator from logging import getLogger import pephubclient from pephubclient.constants import RegistryPath -from pephubclient.exceptions import ResponseError from ubiquerg import parse_registry_path from ...backends.abstract import PipestatBackend from ...const import PKG_NAME, STATUS -from typing import List, Dict, Any, Optional, Union, NoReturn, Tuple, Literal +from typing import List, Dict, Any, Optional, Union, NoReturn, Literal from pephubclient import PEPHubClient -from ...exceptions import UnrecognizedStatusError, RecordNotFoundError, ColumnNotFoundError +from ...exceptions import UnrecognizedStatusError, RecordNotFoundError, PipestatPEPHubError _LOGGER = getLogger(PKG_NAME) @@ -58,7 +55,7 @@ def __init__( ) else: - raise Exception + raise PipestatPEPHubError(msg=f"Registry path to PEP is invalid: {pephub_path}") def check_record_exists( self, diff --git a/pipestat/exceptions.py b/pipestat/exceptions.py index a13c2d18..1f072e8f 100644 --- a/pipestat/exceptions.py +++ b/pipestat/exceptions.py @@ -20,6 +20,7 @@ "PipestatDependencyError", "ColumnNotFoundError", "SchemaValidationErrorDuringReport", + "PipestatPEPHubError", ] @@ -123,6 +124,13 @@ def __init__(self, msg): super(PipestatDatabaseError, self).__init__(msg) +class PipestatPEPHubError(PipestatError): + """PEPHub backend error""" + + def __init__(self, msg): + super(PipestatPEPHubError, self).__init__(msg) + + class InvalidTypeError(PipestatError): """Type of the reported value is not supported""" diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index a085a990..3275500f 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -2691,3 +2691,13 @@ def test_pephub_unsupported_funcs( psm.link("somedir") psm.list_recent_results() psm.summarize() + + def test_pephub_unsupported_funcs( + self, + config_file_path, + schema_file_path, + results_file_path, + range_values, + ): + with pytest.raises(PipestatPEPHubError): + psm = PipestatManager(pephub_path="bogus_path", schema_path=schema_file_path) From 7e4c71669f21dc345b8201383ef9503283827f5f Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 15 Jul 2024 11:56:43 -0400 Subject: [PATCH 24/34] clean up, add docstrings --- .../backends/pephub_backend/pephubbackend.py | 5 +--- pipestat/pipestat.py | 28 +++++++++++++++++-- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py index 0b45ec56..a5caca21 100644 --- a/pipestat/backends/pephub_backend/pephubbackend.py +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -42,12 +42,9 @@ def __init__( self.status_schema = status_schema self.result_formatter = result_formatter - # Test Registry Path - _LOGGER.warning(f"Is pephub registry path? {pephubclient.is_registry_path(pephub_path)}") - if pephubclient.is_registry_path(pephub_path): # Deconstruct registry path so that phc can use it to create/update/delete samples - _LOGGER.warning("Initialize PEPHub Backend") + _LOGGER.debug("Initialize PEPHub Backend") self.pep_registry = RegistryPath(**parse_registry_path(pephub_path)) _LOGGER.warning( diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py index 4b3f9627..c89a602c 100644 --- a/pipestat/pipestat.py +++ b/pipestat/pipestat.py @@ -359,7 +359,19 @@ def resolve_results_file_path(self, results_file_path): return results_file_path return results_file_path - def initialize_filebackend(self, record_identifier, results_file_path, flag_file_dir): + def initialize_filebackend( + self, + record_identifier: str = None, + results_file_path: str = None, + flag_file_dir: str = None, + ): + """ + Initializes the file backend + :param str record_identifier: the record identifier + :param str results_file_path: the path to the results file used for the backend + :param str flag_file_dir: the path to the flag file directory + """ + # Check if there will be multiple results_file_paths _LOGGER.debug(f"Determined file as backend: {results_file_path}") @@ -392,7 +404,12 @@ def initialize_filebackend(self, record_identifier, results_file_path, flag_file return - def initialize_pephubbackend(self, record_identifier, pephub_path): + def initialize_pephubbackend(self, record_identifier: str = None, pephub_path: str = None): + """ + Initializes the pephub backend + :param str record_identifier: the record identifier + :param str pephub_path: the path to the pephub registry + """ self.backend = PEPHUBBACKEND( record_identifier, pephub_path, @@ -407,7 +424,12 @@ def initialize_pephubbackend(self, record_identifier, pephub_path): dependency_list=["DBBackend"], msg="Missing required dependencies for this usage, e.g. try pip install pipestat['dbbackend']", ) - def initialize_dbbackend(self, record_identifier, show_db_logs): + def initialize_dbbackend(self, record_identifier: str = None, show_db_logs: bool = False): + """ + Initializes the database backend + :param str record_identifier: the record identifier + :param bool show_db_logs: boolean to show_db_logs + """ _LOGGER.debug("Determined database as backend") if self.cfg[SCHEMA_KEY] is None: raise SchemaNotFoundError("Output schema must be supplied for DB backends.") From d81456e3324dcb90f312ce430c6952aa52f7dc7f Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 15 Jul 2024 12:09:08 -0400 Subject: [PATCH 25/34] update more docstrings --- pipestat/backends/file_backend/filebackend.py | 2 +- pipestat/backends/pephub_backend/pephubbackend.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/pipestat/backends/file_backend/filebackend.py b/pipestat/backends/file_backend/filebackend.py index 8a22de50..05bef542 100644 --- a/pipestat/backends/file_backend/filebackend.py +++ b/pipestat/backends/file_backend/filebackend.py @@ -46,7 +46,7 @@ def __init__( this object method calls :param str pipeline_name: name of pipeline associated with result :param str pipeline_type: "sample" or "project" - :param str parsed_schema: results output schema. Used to construct DB columns. + :param str parsed_schema: results output schema. :param str status_schema: schema containing pipeline statuses e.g. 'running' :param str status_file_dir: directory for placing status flags :param str result_formatter: function for formatting result diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py index a5caca21..def73f34 100644 --- a/pipestat/backends/pephub_backend/pephubbackend.py +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -29,8 +29,16 @@ def __init__( result_formatter: Optional[staticmethod] = None, ): """ - ADD DOCSTRINGS! - + Class representing a PEPHub backend + :param str record_identifier: record identifier to report for. This + creates a weak bound to the record, which can be overridden in + this object method calls + :param str pephub_path: registry path to PEP + :param str pipeline_name: name of pipeline associated with result + :param str pipeline_type: "sample" or "project" + :param str parsed_schema: results output schema. + :param str status_schema: schema containing pipeline statuses e.g. 'running' + :param str result_formatter: function for formatting result """ super().__init__(pipeline_type) From 2463db1dbe22fda8a4b8ee1f84380edf5b4197ab Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 15 Jul 2024 12:14:09 -0400 Subject: [PATCH 26/34] change warning to debug --- pipestat/backends/pephub_backend/pephubbackend.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py index def73f34..4a3d4329 100644 --- a/pipestat/backends/pephub_backend/pephubbackend.py +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -51,11 +51,13 @@ def __init__( self.result_formatter = result_formatter if pephubclient.is_registry_path(pephub_path): - # Deconstruct registry path so that phc can use it to create/update/delete samples + _LOGGER.debug("Initialize PEPHub Backend") + # Deconstruct registry path so that phc can use it to create/update/delete samples self.pep_registry = RegistryPath(**parse_registry_path(pephub_path)) - _LOGGER.warning( + + _LOGGER.debug( f"Registry namespace: {self.pep_registry.namespace} item: {self.pep_registry.item} tag: {self.pep_registry.tag}" ) From 0f41a6f273244a1a152d79894dbcb0c90a146c0b Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 15 Jul 2024 12:17:14 -0400 Subject: [PATCH 27/34] remove todos --- pipestat/backends/pephub_backend/pephubbackend.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py index 4a3d4329..748b8a6b 100644 --- a/pipestat/backends/pephub_backend/pephubbackend.py +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -111,7 +111,6 @@ def list_results( ] ) try: - # TODO had to add rid to step deeper into data structure vs dbbackend implementation. Why are they different? record = record["records"][0][rid] except IndexError: return [] @@ -383,7 +382,6 @@ def select_records( """ if cursor: - # TODO can we support cursor through pephubclient? _LOGGER.warning("Cursor not supported for PEPHubBackend, ignoring cursor") def get_operator(op: Literal["eq", "lt", "ge", "gt", "in"]) -> Any: From 4c312018e16e5472be616d04c205b03ac2ff393f Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 15 Jul 2024 12:24:04 -0400 Subject: [PATCH 28/34] remove duplicate code for record_identifier --- pipestat/backends/pephub_backend/pephubbackend.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py index 748b8a6b..beb6ab5e 100644 --- a/pipestat/backends/pephub_backend/pephubbackend.py +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -213,7 +213,7 @@ def report( This method overwrites any existing data and creates the required hierarchical mapping structure if needed. - :param history_enabled: this parameter is currently ignored as PEPHub + :param history_enabled: this parameter is currently ignored as PEPHub does not support this :param Dict[str, Any] values: dict of results identifiers and values to be reported :param str record_identifier: unique identifier of the record @@ -227,7 +227,6 @@ def report( ) record_identifier = record_identifier or self.record_identifier - record_identifier = record_identifier result_formatter = result_formatter or self.result_formatter results_formatted = [] From d83adda4d7195276b21abf81f4bb0ab255bbd000 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 15 Jul 2024 12:56:09 -0400 Subject: [PATCH 29/34] remove unused code --- pipestat/backends/pephub_backend/pephubbackend.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py index beb6ab5e..2f93d02e 100644 --- a/pipestat/backends/pephub_backend/pephubbackend.py +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -245,7 +245,6 @@ def report( if not existing: - # try: self.phc.sample.create( namespace=self.pep_registry.namespace, name=self.pep_registry.item, @@ -403,15 +402,11 @@ def get_operator(op: Literal["eq", "lt", "ge", "gt", "in"]) -> Any: return "in" raise ValueError(f"Invalid filter operator: {op}") - # Can we use query_param to do cursor/limit operations if the PEP is very large? project = self.phc.load_project(project_registry_path=self.pephub_path) - print(project) - # PEPHub uses sample_name not record_identifier - # Just get the items from the sample table because it is a dataframe and return the dict to the end user if columns is not None: columns = copy.deepcopy(columns) - for i in ["sample_name"]: # Must add id, need it for cursor + for i in ["sample_name"]: # PEPHub uses sample_name not record_identifier if i not in columns: columns.insert(0, i) try: @@ -440,8 +435,8 @@ def get_operator(op: Literal["eq", "lt", "ge", "gt", "in"]) -> Any: key = filter_condition["key"] value = filter_condition["value"] - # Create querry for df based on filter conditions + # Create querry for df based on filter conditions if isinstance(value, list): filter_expression = f"{key} {retrieved_operator} {value}" else: @@ -449,8 +444,6 @@ def get_operator(op: Literal["eq", "lt", "ge", "gt", "in"]) -> Any: all_filter_expressions.append(filter_expression) if len(all_filter_expressions) > 1: - - # This is for AND logic if bool_operator == "AND": for filter in all_filter_expressions: df = df.query(filter) @@ -466,7 +459,6 @@ def get_operator(op: Literal["eq", "lt", "ge", "gt", "in"]) -> Any: # Once we have the dataframe (filtered or unfiltered), convert to a dict using the sample_name/record_identifier as the primary key df2dict = df.set_index("sample_name").transpose().to_dict(orient="dict") - # Filter out columns # Must do this to align output structure with that of db_backend and file_backends records_list = [] for key, value in df2dict.items(): From cad275fdb3e9d5ede8e144a13a86258ff4d7f685 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 15 Jul 2024 13:10:12 -0400 Subject: [PATCH 30/34] remove extra pephub url --- tests/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 49b31e91..e261f6ca 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,7 +23,6 @@ STANDARD_TEST_PIPE_ID = "default_pipeline_name" PEPHUB_URL = "donaldcampbelljr/pipestat_demo:default" -# PEPHUB_URL ="donaldcampbelljr/pipestat_demo_2:default" try: subprocess.check_output( From 78aefd148c2cb65c39b011db437c4e1e2f4247c2 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 15 Jul 2024 14:08:02 -0400 Subject: [PATCH 31/34] fix for #189 --- pipestat/const.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pipestat/const.py b/pipestat/const.py index cc2804cb..3e6c8f36 100644 --- a/pipestat/const.py +++ b/pipestat/const.py @@ -128,6 +128,7 @@ "string": str, "path": Path, "boolean": bool, + "bool": bool, "file": str, "image": str, "link": str, From 4957dc1c0c1f7dda31324ebaed0af4fa7b3b0503 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 15 Jul 2024 18:38:08 -0400 Subject: [PATCH 32/34] fix for #190 --- pipestat/pipestat.py | 6 +----- pipestat/reports.py | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py index c89a602c..1a4ee46a 100644 --- a/pipestat/pipestat.py +++ b/pipestat/pipestat.py @@ -961,11 +961,7 @@ def summarize( def check_multi_results(self): # Check to see if the user used a path with "{record-identifier}" if self.file: - # TODO this needs rework: remove self.cfg["unresolved_result_path"] and just use self.file - if ( - "{record_identifier}" in self.file - or self.cfg["unresolved_result_path"] != self.file - ): + if "{record_identifier}" in self.cfg["unresolved_result_path"]: # assume there are multiple result files in sub-directories self.cfg["multi_result_files"] = True results_directory = self.cfg["unresolved_result_path"].split( diff --git a/pipestat/reports.py b/pipestat/reports.py index feb80f02..1968be3c 100644 --- a/pipestat/reports.py +++ b/pipestat/reports.py @@ -114,6 +114,12 @@ def __call__( self.create_index_html(navbar, self.create_footer()) return self.index_html_path + def _reset_pipeline_type(self): + """ + The report logic will set the pipeline type when multi results is used. It must be reset or it causes issues. + """ + self.prj.backend.pipeline_type = self.prj.pipeline_type + def _create_copy_for_porting(self, parent_path: str, record_identifier: str) -> str: """ Helper function that assists with copying images (pdfs) @@ -212,6 +218,7 @@ def create_sample_parent_html(self, navbar, footer): pages.append(page_relpath) labels.append(sample_name) + self._reset_pipeline_type() template_vars = dict( navbar=navbar, footer=footer, @@ -481,6 +488,7 @@ def create_object_htmls(self, navbar, footer): html_page_path, render_jinja_template("object.html", self.jinja_env, args=template_vars), ) + self._reset_pipeline_type() def create_glossary_html(self, glossary_table, navbar, footer): template_vars = dict( @@ -755,7 +763,7 @@ def create_index_html(self, navbar, footer): table_cell_data = [[rel_sample_html, sample_name]] table_cell_data += list(sorted_sample_stat_results.values()) table_row_data.append(table_cell_data) - + self._reset_pipeline_type() # Create parent samples page with links to each sample save_html( path=os.path.join(self.pipeline_reports, "records.html"), @@ -921,7 +929,7 @@ def create_project_objects(self): if "description" in self.prj.result_schemas[image_result] else "No description in schema" ) - + self._reset_pipeline_type() template_vars = dict(figures=figures, links=links) return render_jinja_template("project_object.html", self.jinja_env, template_vars) @@ -962,6 +970,7 @@ def _stats_to_json_str(self): inclusion_fun=lambda x: x not in OBJECT_TYPES, casting_fun=str, ) + self._reset_pipeline_type() return dumps(results) def _get_navbar_dropdown_data_objects(self, objs, wd, context): @@ -995,7 +1004,7 @@ def _get_navbar_dropdown_data_samples(self, wd, context): ) relpaths.append(_make_relpath(page_name, wd, context)) sample_names.append(sample_name) - + self._reset_pipeline_type() return relpaths, sample_names @@ -1282,6 +1291,8 @@ def _warn(what, e, sn): times.append(NO_DATA_PLACEHOLDER) mems.append(NO_DATA_PLACEHOLDER) + project.backend.pipeline_type = project.pipeline_type + template_vars = dict( sample_names=sample_names, log_paths=log_paths, @@ -1439,7 +1450,7 @@ def _create_stats_objs_summaries(prj, pipeline_name: str) -> List[str]: reported_stats.append(v) stats.append(reported_stats) - + prj.backend.pipeline_type = prj.pipeline_type # Stats File tsv_outfile_path = get_file_for_table(prj, pipeline_name, "stats_summary.tsv") stats.insert(0, columns) From c0943314f94bf1d7eb4d36b8b8a4df9fc831f856 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 18 Jul 2024 09:22:39 -0400 Subject: [PATCH 33/34] minor polish for #187 --- pipestat/backends/pephub_backend/pephubbackend.py | 4 ++-- requirements/requirements-pephub-backend.txt | 2 +- tests/conftest.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py index 2f93d02e..d7efdcbb 100644 --- a/pipestat/backends/pephub_backend/pephubbackend.py +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -91,14 +91,14 @@ def list_results( self, restrict_to: Optional[List[str]] = None, record_identifier: str = None, - ) -> List[str]: + ) -> Union[List[str], List[None]]: """ Check if the specified results exist in the table :param List[str] restrict_to: results identifiers to check for :param str record_identifier: record to check for :return List[str] existing: if no result identifier specified, return all results for the record - :return List[str]: results identifiers that exist + :return List[str]: results identifiers that exist or an empty list if nothing was found """ rid = record_identifier record = self.select_records( diff --git a/requirements/requirements-pephub-backend.txt b/requirements/requirements-pephub-backend.txt index da4b7856..51564440 100644 --- a/requirements/requirements-pephub-backend.txt +++ b/requirements/requirements-pephub-backend.txt @@ -1 +1 @@ -pephubclient \ No newline at end of file +pephubclient>=0.4.2 \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index e261f6ca..b6ac2c6d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,7 +22,7 @@ """ STANDARD_TEST_PIPE_ID = "default_pipeline_name" -PEPHUB_URL = "donaldcampbelljr/pipestat_demo:default" +PEPHUB_URL = "databio/pipestat_demo:default" try: subprocess.check_output( From d07fbb1d67219dc31f08d228cbb49c425a4847d0 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 18 Jul 2024 09:25:27 -0400 Subject: [PATCH 34/34] run isort and then black --- pipestat/__init__.py | 10 +-- pipestat/argparser.py | 3 +- pipestat/backends/abstract.py | 6 +- pipestat/backends/db_backend/db_helpers.py | 4 +- .../backends/db_backend/db_parsed_schema.py | 7 +- pipestat/backends/db_backend/dbbackend.py | 17 ++-- pipestat/backends/file_backend/filebackend.py | 20 ++--- .../backends/pephub_backend/pephubbackend.py | 9 +- pipestat/cli.py | 28 +++---- pipestat/const.py | 2 +- pipestat/exceptions.py | 1 + pipestat/helpers.py | 18 ++-- pipestat/parsed_schema.py | 3 +- pipestat/pipestat.py | 84 ++++++++----------- pipestat/pipestatreader/reader.py | 10 +-- pipestat/reports.py | 34 ++++---- tests/conftest.py | 7 +- tests/test_db_only_mode.py | 4 +- tests/test_init.py | 15 ++-- tests/test_parsed_schema.py | 13 ++- tests/test_pipestat.py | 25 +++--- tests/test_status.py | 10 +-- 22 files changed, 144 insertions(+), 186 deletions(-) diff --git a/pipestat/__init__.py b/pipestat/__init__.py index 407f62ca..dd436b5e 100644 --- a/pipestat/__init__.py +++ b/pipestat/__init__.py @@ -3,15 +3,9 @@ import logmuse from ._version import __version__ -from .exceptions import PipestatError from .const import PKG_NAME -from .pipestat import ( - PipestatManager, - SamplePipestatManager, - ProjectPipestatManager, - PipestatBoss, -) - +from .exceptions import PipestatError +from .pipestat import PipestatBoss, PipestatManager, ProjectPipestatManager, SamplePipestatManager __all__ = [ "PipestatError", diff --git a/pipestat/argparser.py b/pipestat/argparser.py index f3793056..b24350ac 100644 --- a/pipestat/argparser.py +++ b/pipestat/argparser.py @@ -2,9 +2,10 @@ import argparse import os + from ubiquerg import VersionInHelpParser -from ._version import __version__ +from ._version import __version__ from .const import ENV_VARS, PKG_NAME, STATUS_SCHEMA REPORT_CMD = "report" diff --git a/pipestat/backends/abstract.py b/pipestat/backends/abstract.py index 48db5426..c65e5e63 100644 --- a/pipestat/backends/abstract.py +++ b/pipestat/backends/abstract.py @@ -1,13 +1,13 @@ import os from abc import ABC from logging import getLogger +from typing import Any, Dict, List, Optional, Tuple, Union + from ubiquerg import expandpath -from typing import List, Dict, Any, Optional, Union, Tuple from ..const import PKG_NAME, STATUS -from ..helpers import force_symlink from ..exceptions import SchemaError - +from ..helpers import force_symlink _LOGGER = getLogger(PKG_NAME) diff --git a/pipestat/backends/db_backend/db_helpers.py b/pipestat/backends/db_backend/db_helpers.py index 4addf5fc..0d7b387c 100644 --- a/pipestat/backends/db_backend/db_helpers.py +++ b/pipestat/backends/db_backend/db_helpers.py @@ -2,9 +2,7 @@ from typing import Any, Dict, List, Optional, Union from urllib.parse import quote_plus - -from sqlmodel import and_, or_, Integer, Float, String, Boolean - +from sqlmodel import Boolean, Float, Integer, String, and_, or_ from pipestat.exceptions import MissingConfigDataError diff --git a/pipestat/backends/db_backend/db_parsed_schema.py b/pipestat/backends/db_backend/db_parsed_schema.py index b909b5d6..e324adb4 100644 --- a/pipestat/backends/db_backend/db_parsed_schema.py +++ b/pipestat/backends/db_backend/db_parsed_schema.py @@ -7,11 +7,10 @@ from typing import Any, Dict, List, Mapping, Optional from pydantic import ConfigDict, create_model - - from sqlalchemy import Column, null from sqlalchemy.dialects.postgresql import JSONB from sqlmodel import Field, SQLModel + from pipestat.const import ( CANONICAL_TYPES, CLASSES_BY_TYPE, @@ -22,13 +21,13 @@ PROJECT_NAME, RECORD_IDENTIFIER, SAMPLE_NAME, - STATUS, SCHEMA_DESC_KEY, SCHEMA_ITEMS_KEY, SCHEMA_PROP_KEY, SCHEMA_TYPE_KEY, + STATUS, ) -from pipestat.exceptions import SchemaError, PipestatError +from pipestat.exceptions import PipestatError, SchemaError from pipestat.parsed_schema import ParsedSchema _LOGGER = logging.getLogger(__name__) diff --git a/pipestat/backends/db_backend/dbbackend.py b/pipestat/backends/db_backend/dbbackend.py index 797f5b2e..572192f6 100644 --- a/pipestat/backends/db_backend/dbbackend.py +++ b/pipestat/backends/db_backend/dbbackend.py @@ -1,23 +1,24 @@ import copy import datetime -from logging import getLogger from contextlib import contextmanager -from typing import List, Dict, Any, Optional, Union, NoReturn, Tuple +from logging import getLogger +from typing import Any, Dict, List, NoReturn, Optional, Tuple, Union -from sqlmodel import SQLModel, Session, create_engine, select as sql_select +from sqlmodel import Session, SQLModel, create_engine +from sqlmodel import select as sql_select -from pipestat.backends.db_backend.db_helpers import selection_filter from pipestat.backends.abstract import PipestatBackend +from pipestat.backends.db_backend.db_helpers import selection_filter + +from ...const import CREATED_TIME, MODIFIED_TIME, PKG_NAME, RECORD_IDENTIFIER, STATUS from ...exceptions import ( + ColumnNotFoundError, PipestatDatabaseError, RecordNotFoundError, SchemaError, - ColumnNotFoundError, - UnrecognizedStatusError, SchemaNotFoundError, + UnrecognizedStatusError, ) -from ...const import PKG_NAME, STATUS, RECORD_IDENTIFIER, CREATED_TIME, MODIFIED_TIME - _LOGGER = getLogger(PKG_NAME) diff --git a/pipestat/backends/file_backend/filebackend.py b/pipestat/backends/file_backend/filebackend.py index 05bef542..e6ae935b 100644 --- a/pipestat/backends/file_backend/filebackend.py +++ b/pipestat/backends/file_backend/filebackend.py @@ -1,25 +1,21 @@ import datetime -import os.path import operator +import os.path from copy import deepcopy from functools import reduce -from itertools import chain -from ...helpers import get_all_result_files - - from glob import glob +from itertools import chain from logging import getLogger -from yacman import FutureYAMLConfigManager as YAMLConfigManager -from yacman import read_lock, write_lock +from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union from ubiquerg import create_lock, remove_lock +from yacman import FutureYAMLConfigManager as YAMLConfigManager +from yacman import read_lock, write_lock -from typing import List, Dict, Any, Optional, Union, Literal, Callable, Tuple - -from ...exceptions import UnrecognizedStatusError, PipestatError from ...backends.abstract import PipestatBackend -from ...const import DATE_FORMAT, PKG_NAME, CREATED_TIME, MODIFIED_TIME, META_KEY, HISTORY_KEY - +from ...const import CREATED_TIME, DATE_FORMAT, HISTORY_KEY, META_KEY, MODIFIED_TIME, PKG_NAME +from ...exceptions import PipestatError, UnrecognizedStatusError +from ...helpers import get_all_result_files _LOGGER = getLogger(PKG_NAME) diff --git a/pipestat/backends/pephub_backend/pephubbackend.py b/pipestat/backends/pephub_backend/pephubbackend.py index d7efdcbb..d3df7160 100644 --- a/pipestat/backends/pephub_backend/pephubbackend.py +++ b/pipestat/backends/pephub_backend/pephubbackend.py @@ -1,18 +1,15 @@ import copy from logging import getLogger +from typing import Any, Dict, List, Literal, NoReturn, Optional, Union import pephubclient +from pephubclient import PEPHubClient from pephubclient.constants import RegistryPath from ubiquerg import parse_registry_path from ...backends.abstract import PipestatBackend from ...const import PKG_NAME, STATUS -from typing import List, Dict, Any, Optional, Union, NoReturn, Literal - - -from pephubclient import PEPHubClient - -from ...exceptions import UnrecognizedStatusError, RecordNotFoundError, PipestatPEPHubError +from ...exceptions import PipestatPEPHubError, RecordNotFoundError, UnrecognizedStatusError _LOGGER = getLogger(PKG_NAME) diff --git a/pipestat/cli.py b/pipestat/cli.py index 5d13f8ac..212a1686 100644 --- a/pipestat/cli.py +++ b/pipestat/cli.py @@ -1,37 +1,29 @@ -import sys import os +import sys from logging import getLogger import logmuse from ubiquerg import expandpath from .argparser import ( - build_argparser, - REPORT_CMD, + HISTORY_CMD, + INIT_CMD, INSPECT_CMD, + LINK_CMD, REMOVE_CMD, + REPORT_CMD, RETRIEVE_CMD, + SERVE_CMD, STATUS_CMD, STATUS_GET_CMD, STATUS_SET_CMD, - INIT_CMD, SUMMARIZE_CMD, - SERVE_CMD, - LINK_CMD, - HISTORY_CMD, -) -from .const import ( - SCHEMA_KEY, - SCHEMA_TYPE_KEY, - CANONICAL_TYPES, - PKG_NAME, -) -from .exceptions import ( - SchemaNotFoundError, - PipestatStartupError, + build_argparser, ) -from .pipestat import PipestatManager, check_dependencies +from .const import CANONICAL_TYPES, PKG_NAME, SCHEMA_KEY, SCHEMA_TYPE_KEY +from .exceptions import PipestatStartupError, SchemaNotFoundError from .helpers import init_generic_config +from .pipestat import PipestatManager, check_dependencies try: from pipestat.pipestatreader.reader import main as readermain diff --git a/pipestat/const.py b/pipestat/const.py index 3e6c8f36..2e5d42e9 100644 --- a/pipestat/const.py +++ b/pipestat/const.py @@ -6,7 +6,7 @@ # Can be removed when 3.8 is deprecated if int(sys.version.split(".")[1]) < 9: - from typing import List, Dict + from typing import Dict, List list_of_dicts = List[Dict] else: diff --git a/pipestat/exceptions.py b/pipestat/exceptions.py index 1f072e8f..ecb44cbb 100644 --- a/pipestat/exceptions.py +++ b/pipestat/exceptions.py @@ -1,6 +1,7 @@ """ Package exception types """ from typing import Iterable, Optional + from .const import CLASSES_BY_TYPE, ENV_VARS __all__ = [ diff --git a/pipestat/helpers.py b/pipestat/helpers.py index 9f99b604..5f5a6473 100644 --- a/pipestat/helpers.py +++ b/pipestat/helpers.py @@ -1,25 +1,19 @@ """Assorted project utilities""" -import logging +import errno import glob +import logging import os -import errno - -import jsonschema from json import dumps from pathlib import Path from shutil import make_archive -from typing import Any, Dict, Optional, Tuple, Union, List +from typing import Any, Dict, List, Optional, Tuple, Union +import jsonschema from yaml import dump -from .exceptions import SchemaValidationErrorDuringReport -from .const import ( - PIPESTAT_GENERIC_CONFIG, - SCHEMA_PROP_KEY, - SCHEMA_TYPE_KEY, - CLASSES_BY_TYPE, -) +from .const import CLASSES_BY_TYPE, PIPESTAT_GENERIC_CONFIG, SCHEMA_PROP_KEY, SCHEMA_TYPE_KEY +from .exceptions import SchemaValidationErrorDuringReport _LOGGER = logging.getLogger(__name__) diff --git a/pipestat/parsed_schema.py b/pipestat/parsed_schema.py index 022fb231..ed4ea6c8 100644 --- a/pipestat/parsed_schema.py +++ b/pipestat/parsed_schema.py @@ -4,7 +4,9 @@ import logging from pathlib import Path from typing import Any, Dict, List, Mapping, Optional, Union + import yacman + from .const import ( CANONICAL_TYPES, CLASSES_BY_TYPE, @@ -16,7 +18,6 @@ ) from .exceptions import SchemaError - _LOGGER = logging.getLogger(__name__) diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py index 1a4ee46a..3fd48801 100644 --- a/pipestat/pipestat.py +++ b/pipestat/pipestat.py @@ -1,77 +1,67 @@ -import os import datetime -from logging import getLogger -from copy import deepcopy - +import os from abc import ABC from collections.abc import MutableMapping +from copy import deepcopy +from logging import getLogger +from typing import Any, Dict, Iterator, List, Optional, Union from jsonschema import validate -from yacman import FutureYAMLConfigManager as YAMLConfigManager -from yacman.yacman_future import select_config from ubiquerg import mkabs +from yacman import FutureYAMLConfigManager as YAMLConfigManager from yacman import load_yaml +from yacman.yacman_future import select_config - -from typing import Optional, Union, Dict, Any, List, Iterator - - -from .exceptions import ( - ColumnNotFoundError, - NoBackendSpecifiedError, - InvalidTimeFormatError, - PipestatDependencyError, - PipestatDatabaseError, - RecordNotFoundError, - SchemaNotFoundError, -) from pipestat.backends.file_backend.filebackend import FileBackend -from .reports import HTMLReportBuilder, _create_stats_objs_summaries -from .helpers import ( - validate_type, - default_formatter, - zip_report, - make_subdirectories, -) + from .const import ( - PKG_NAME, - DEFAULT_PIPELINE_NAME, - ENV_VARS, CFG_DATABASE_KEY, - SCHEMA_PATH, - STATUS_SCHEMA, - STATUS_SCHEMA_SOURCE_KEY, - STATUS_SCHEMA_KEY, - STATUS_FILE_DIR, - FILE_KEY, + CFG_SCHEMA, + CONFIG_KEY, + CREATED_TIME, + DATA_KEY, DB_ONLY_KEY, DB_URL, + DEFAULT_PIPELINE_NAME, + ENV_VARS, + FILE_KEY, + MODIFIED_TIME, + MULTI_PIPELINE, + OUTPUT_DIR, PIPELINE_NAME, PIPELINE_TYPE, + PKG_NAME, PROJECT_NAME, RECORD_IDENTIFIER, RESULT_FORMATTER, - MULTI_PIPELINE, - OUTPUT_DIR, - CREATED_TIME, - MODIFIED_TIME, - CFG_SCHEMA, - CONFIG_KEY, - SCHEMA_KEY, SAMPLE_NAME_ID_KEY, - DATA_KEY, + SCHEMA_KEY, + SCHEMA_PATH, + STATUS_FILE_DIR, + STATUS_SCHEMA, + STATUS_SCHEMA_KEY, + STATUS_SCHEMA_SOURCE_KEY, +) +from .exceptions import ( + ColumnNotFoundError, + InvalidTimeFormatError, + NoBackendSpecifiedError, + PipestatDatabaseError, + PipestatDependencyError, + RecordNotFoundError, + SchemaNotFoundError, ) +from .helpers import default_formatter, make_subdirectories, validate_type, zip_report +from .reports import HTMLReportBuilder, _create_stats_objs_summaries try: - from pipestat.backends.db_backend.db_parsed_schema import ( - ParsedSchemaDB as ParsedSchema, - ) + from pipestat.backends.db_backend.db_parsed_schema import ParsedSchemaDB as ParsedSchema except ImportError: from .parsed_schema import ParsedSchema try: - from pipestat.backends.db_backend.dbbackend import DBBackend from pipestat.backends.db_backend.db_helpers import construct_db_url + from pipestat.backends.db_backend.dbbackend import DBBackend except ImportError: # We let this pass, but if the user attempts to create DBBackend, check_dependencies raises exception. pass diff --git a/pipestat/pipestatreader/reader.py b/pipestat/pipestatreader/reader.py index e384fc86..a10973d2 100644 --- a/pipestat/pipestatreader/reader.py +++ b/pipestat/pipestatreader/reader.py @@ -1,14 +1,14 @@ -import fastapi -import os import logging +import os +from typing import List, Optional, Tuple, Union + +import fastapi import uvicorn -from typing import Optional, List, Union, Tuple +from pydantic import BaseModel from pipestat import SamplePipestatManager from pipestat.exceptions import RecordNotFoundError from pipestat.reports import fetch_pipeline_results -from pydantic import BaseModel - _LOGGER = logging.getLogger(__name__) diff --git a/pipestat/reports.py b/pipestat/reports.py index 1968be3c..4f7a6109 100644 --- a/pipestat/reports.py +++ b/pipestat/reports.py @@ -1,39 +1,37 @@ """ Generate HTML reports """ -import shutil - -import jinja2 -import os -import pandas as _pd -import sys import csv -import yaml import glob - +import os +import shutil +import sys +from copy import deepcopy from datetime import timedelta -from eido import read_schema from json import dumps from logging import getLogger -from peppy.const import AMENDMENTS_KEY from typing import List -from copy import deepcopy +import jinja2 +import pandas as _pd +import yaml +from eido import read_schema +from peppy.const import AMENDMENTS_KEY from ubiquerg import mkabs from ._version import __version__ from .const import ( - PIPELINE_NAME, - PKG_NAME, - OUTPUT_DIR, - OBJECT_TYPES, BUTTON_APPEARANCE_BY_FLAG, + FILE_KEY, NO_DATA_PLACEHOLDER, + OBJECT_TYPES, + OUTPUT_DIR, + PIPELINE_NAME, PIPELINE_TYPE, - PROJECT_NAME, - TEMPLATES_DIRNAME, + PKG_NAME, PROFILE_COLNAMES, + PROJECT_NAME, STATUS_FILE_DIR, - FILE_KEY, + TEMPLATES_DIRNAME, ) from .helpers import make_subdirectories diff --git a/tests/conftest.py b/tests/conftest.py index b6ac2c6d..6be8c349 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,12 +1,13 @@ """Test fixtures and helpers to make widely available in the package""" import os -import pytest import subprocess +from atexit import register -from pipestat.const import STATUS_SCHEMA +import pytest from yacman import load_yaml -from atexit import register + +from pipestat.const import STATUS_SCHEMA REC_ID = "constant_record_id" BACKEND_KEY_DB = "db" diff --git a/tests/test_db_only_mode.py b/tests/test_db_only_mode.py index 28f316cf..af648f20 100644 --- a/tests/test_db_only_mode.py +++ b/tests/test_db_only_mode.py @@ -2,13 +2,11 @@ from pipestat import SamplePipestatManager from pipestat.const import * -from .conftest import DB_URL -from .conftest import SERVICE_UNAVAILABLE, DB_DEPENDENCIES +from .conftest import DB_DEPENDENCIES, DB_URL, SERVICE_UNAVAILABLE try: from sqlmodel import SQLModel, create_engine - from sqlmodel.main import default_registry except ModuleNotFoundError: pass diff --git a/tests/test_init.py b/tests/test_init.py index 80cb5f12..3ec18956 100644 --- a/tests/test_init.py +++ b/tests/test_init.py @@ -1,17 +1,16 @@ -from tempfile import mkdtemp +import os +from tempfile import NamedTemporaryFile, TemporaryDirectory, mkdtemp import pytest -import os from yaml import dump -from pipestat import PipestatManager, SamplePipestatManager, ProjectPipestatManager +from pipestat import PipestatManager, ProjectPipestatManager, SamplePipestatManager +from pipestat.const import PIPESTAT_GENERIC_CONFIG, SCHEMA_KEY from pipestat.exceptions import * -from pipestat.parsed_schema import SCHEMA_PIPELINE_NAME_KEY -from tempfile import NamedTemporaryFile, TemporaryDirectory -from .conftest import STANDARD_TEST_PIPE_ID, DB_DEPENDENCIES -from .conftest import SERVICE_UNAVAILABLE from pipestat.helpers import init_generic_config -from pipestat.const import PIPESTAT_GENERIC_CONFIG, SCHEMA_KEY +from pipestat.parsed_schema import SCHEMA_PIPELINE_NAME_KEY + +from .conftest import DB_DEPENDENCIES, SERVICE_UNAVAILABLE, STANDARD_TEST_PIPE_ID @pytest.mark.skipif(not DB_DEPENDENCIES, reason="Requires dependencies") diff --git a/tests/test_parsed_schema.py b/tests/test_parsed_schema.py index 7c636112..c1c2cf96 100644 --- a/tests/test_parsed_schema.py +++ b/tests/test_parsed_schema.py @@ -3,17 +3,16 @@ from functools import partial from pathlib import Path from typing import * + import pytest import yaml -from pipestat.const import SAMPLE_NAME, STATUS, RECORD_IDENTIFIER + +from pipestat.const import RECORD_IDENTIFIER, SAMPLE_NAME, STATUS from pipestat.exceptions import SchemaError, SchemaValidationErrorDuringReport -from pipestat.parsed_schema import ( - NULL_MAPPING_VALUE, - ParsedSchema, - SCHEMA_PIPELINE_NAME_KEY, -) -from .conftest import COMMON_CUSTOM_STATUS_DATA, DEFAULT_STATUS_DATA, get_data_file_path from pipestat.helpers import validate_type +from pipestat.parsed_schema import NULL_MAPPING_VALUE, SCHEMA_PIPELINE_NAME_KEY, ParsedSchema + +from .conftest import COMMON_CUSTOM_STATUS_DATA, DEFAULT_STATUS_DATA, get_data_file_path TEMP_SCHEMA_FILENAME = "schema.tmp.yaml" diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index 3275500f..c9fd17bf 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -2,34 +2,33 @@ import os.path import time from collections.abc import Mapping +from tempfile import NamedTemporaryFile, TemporaryDirectory import pephubclient.exceptions -from yacman import YAMLConfigManager - import pytest from jsonschema import ValidationError +from yacman import YAMLConfigManager -from pipestat import SamplePipestatManager, ProjectPipestatManager, PipestatBoss, PipestatManager +from pipestat import PipestatBoss, PipestatManager, ProjectPipestatManager, SamplePipestatManager +from pipestat.cli import main from pipestat.const import * from pipestat.exceptions import * -from pipestat.parsed_schema import ParsedSchema from pipestat.helpers import default_formatter, markdown_formatter -from pipestat.cli import main +from pipestat.parsed_schema import ParsedSchema + from .conftest import ( - get_data_file_path, BACKEND_KEY_DB, BACKEND_KEY_FILE, COMMON_CUSTOM_STATUS_DATA, - DEFAULT_STATUS_DATA, - STANDARD_TEST_PIPE_ID, - SERVICE_UNAVAILABLE, - DB_URL, - REC_ID, DB_DEPENDENCIES, + DB_URL, + DEFAULT_STATUS_DATA, PEPHUB_URL, + REC_ID, + SERVICE_UNAVAILABLE, + STANDARD_TEST_PIPE_ID, + get_data_file_path, ) -from tempfile import NamedTemporaryFile, TemporaryDirectory - from .test_db_only_mode import ContextManagerDBTesting CONST_REC_ID = "constant_record_id" diff --git a/tests/test_status.py b/tests/test_status.py index 27ad7c72..26e0f9cd 100644 --- a/tests/test_status.py +++ b/tests/test_status.py @@ -1,22 +1,22 @@ """Tests for pipestat's status checking/management functionality""" import os +from tempfile import NamedTemporaryFile + import pytest from pipestat import SamplePipestatManager +from pipestat.const import FILE_KEY, STATUS_FILE_DIR +from pipestat.exceptions import UnrecognizedStatusError -from pipestat.const import STATUS_FILE_DIR, FILE_KEY from .conftest import ( BACKEND_KEY_DB, BACKEND_KEY_FILE, + DB_DEPENDENCIES, DB_URL, SERVICE_UNAVAILABLE, - DB_DEPENDENCIES, ) - from .test_db_only_mode import ContextManagerDBTesting -from pipestat.exceptions import UnrecognizedStatusError -from tempfile import NamedTemporaryFile @pytest.mark.skipif(not DB_DEPENDENCIES, reason="Requires dependencies")