From 6744af2d5393aec482e3dd544c734b8e9a44f960 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 4 Sep 2024 15:58:37 -0400 Subject: [PATCH 01/10] added tar project metadata storing --- docs/changelog.md | 3 + pepdbagent/_version.py | 2 +- pepdbagent/db_utils.py | 32 ++++---- pepdbagent/models.py | 25 +++++- pepdbagent/modules/project.py | 141 ++++++++++++++++++++++------------ tests/test_tar_meta.py | 50 ++++++++++++ 6 files changed, 187 insertions(+), 66 deletions(-) create mode 100644 tests/test_tar_meta.py diff --git a/docs/changelog.md b/docs/changelog.md index 204c54d..207ff2a 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,9 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [0.11.1] -- 2024-09-04 +- Added metadata table for namespace tar files + ## [0.11.0] -- 2024-07-24 - Added validation schemas diff --git a/pepdbagent/_version.py b/pepdbagent/_version.py index ae6db5f..fee46bd 100644 --- a/pepdbagent/_version.py +++ b/pepdbagent/_version.py @@ -1 +1 @@ -__version__ = "0.11.0" +__version__ = "0.11.1" diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index de1a9f3..17094b3 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -3,24 +3,15 @@ import logging from typing import List, Optional -from sqlalchemy import ( - TIMESTAMP, - BigInteger, - Enum, - FetchedValue, - ForeignKey, - Result, - Select, - String, - UniqueConstraint, - event, - select, -) +from sqlalchemy import (TIMESTAMP, BigInteger, Enum, FetchedValue, ForeignKey, + Result, Select, String, UniqueConstraint, event, + select) from sqlalchemy.dialects.postgresql import JSON from sqlalchemy.engine import URL, create_engine from sqlalchemy.exc import ProgrammingError from sqlalchemy.ext.compiler import compiles -from sqlalchemy.orm import DeclarativeBase, Mapped, Session, mapped_column, relationship +from sqlalchemy.orm import (DeclarativeBase, Mapped, Session, mapped_column, + relationship) from pepdbagent.const import PKG_NAME, POSTGRES_DIALECT from pepdbagent.exceptions import SchemaError @@ -364,6 +355,19 @@ class SchemaGroupRelations(Base): ) +class TarNamespace(Base): + + __tablename__ = "tar_namespace" + + id: Mapped[int] = mapped_column(primary_key=True) + namespace: Mapped[str] = mapped_column(ForeignKey("users.namespace", ondelete="CASCADE")) + file_path: Mapped[str] = mapped_column(nullable=False) + submission_date: Mapped[datetime.datetime] = mapped_column(default=deliver_update_date) + start_period: Mapped[datetime.datetime] = mapped_column(nullable=True) + end_period: Mapped[datetime.datetime] = mapped_column(nullable=True) + number_of_projects: Mapped[int] = mapped_column(default=0) + + class BaseEngine: """ A class with base methods, that are used in several classes. e.g. fetch_one or fetch_all diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 78269fb..3a5e40f 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -280,7 +280,7 @@ class SchemaGroupAnnotation(BaseModel): namespace: str name: str - description: Optional[str] + description: Optional[str] = "" schemas: List[SchemaAnnotation] @@ -293,3 +293,26 @@ class SchemaGroupSearchResult(BaseModel): limit: int offset: int results: List[SchemaGroupAnnotation] + + +class GeoTarModel(BaseModel): + """ + Geo project model + """ + + identifier: int = None + namespace: str + file_path: str + submission_date: datetime.datetime + start_period: datetime.datetime + end_period: datetime.datetime + number_of_projects: int = 0 + +class GeoTarModelReturn(BaseModel): + + """ + Geo project model + """ + + count: int + results: List[GeoTarModel] \ No newline at end of file diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index d7cb181..34cea80 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -5,60 +5,30 @@ import numpy as np import peppy -from peppy.const import ( - CONFIG_KEY, - SAMPLE_NAME_ATTR, - SAMPLE_RAW_DICT_KEY, - SAMPLE_TABLE_INDEX_KEY, - SUBSAMPLE_RAW_LIST_KEY, -) +from peppy.const import (CONFIG_KEY, SAMPLE_NAME_ATTR, SAMPLE_RAW_DICT_KEY, + SAMPLE_TABLE_INDEX_KEY, SUBSAMPLE_RAW_LIST_KEY) from sqlalchemy import Select, and_, delete, select from sqlalchemy.exc import IntegrityError, NoResultFound from sqlalchemy.orm import Session from sqlalchemy.orm.attributes import flag_modified -from pepdbagent.const import ( - DEFAULT_TAG, - DESCRIPTION_KEY, - MAX_HISTORY_SAMPLES_NUMBER, - NAME_KEY, - PEPHUB_SAMPLE_ID_KEY, - PKG_NAME, -) -from pepdbagent.db_utils import ( - BaseEngine, - HistoryProjects, - HistorySamples, - Projects, - Samples, - Schemas, - Subsamples, - UpdateTypes, - User, -) -from pepdbagent.exceptions import ( - HistoryNotFoundError, - PEPDatabaseAgentError, - ProjectDuplicatedSampleGUIDsError, - ProjectNotFoundError, - ProjectUniqueNameError, - SampleTableUpdateError, - SchemaDoesNotExistError, -) -from pepdbagent.models import ( - HistoryAnnotationModel, - HistoryChangeModel, - ProjectDict, - UpdateItems, - UpdateModel, -) -from pepdbagent.utils import ( - create_digest, - generate_guid, - order_samples, - registry_path_converter, - schema_path_converter, -) +from pepdbagent.const import (DEFAULT_TAG, DESCRIPTION_KEY, + MAX_HISTORY_SAMPLES_NUMBER, NAME_KEY, + PEPHUB_SAMPLE_ID_KEY, PKG_NAME) +from pepdbagent.db_utils import (BaseEngine, HistoryProjects, HistorySamples, + Projects, Samples, Schemas, Subsamples, + TarNamespace, UpdateTypes, User) +from pepdbagent.exceptions import (HistoryNotFoundError, PEPDatabaseAgentError, + ProjectDuplicatedSampleGUIDsError, + ProjectNotFoundError, + ProjectUniqueNameError, + SampleTableUpdateError, + SchemaDoesNotExistError) +from pepdbagent.models import (GeoTarModel, GeoTarModelReturn, + HistoryAnnotationModel, HistoryChangeModel, + ProjectDict, UpdateItems, UpdateModel) +from pepdbagent.utils import (create_digest, generate_guid, order_samples, + registry_path_converter, schema_path_converter) _LOGGER = logging.getLogger(PKG_NAME) @@ -1412,7 +1382,7 @@ def restore( def clean_history(self, days: int = 90) -> None: """ - Delete all history data that is older then 3 month, or specific number of days + Delete all history data that is older than 3 month, or specific number of days :param days: number of days to keep history data :return: None @@ -1427,3 +1397,74 @@ def clean_history(self, days: int = 90) -> None: ) session.commit() _LOGGER.info("History was cleaned successfully!") + + + def geo_upload_tar_info(self, tar_info: GeoTarModel) -> None: + """ + Upload metadata of tar GEO files + + tar_info: GeoTarModel + :return: None + """ + + with Session(self._sa_engine) as session: + new_tar = TarNamespace( + file_path=tar_info.file_path, + namespace=tar_info.namespace, + start_period=tar_info.start_period, + end_period=tar_info.end_period, + number_of_projects=tar_info.number_of_projects, + ) + session.add(new_tar) + session.commit() + + _LOGGER.info("Geo tar info was uploaded successfully!") + + def geo_get_tar_info(self, namespace: str) -> GeoTarModelReturn: + """ + Get metadata of tar GEO files + + :param namespace: namespace of the tar files + + :return: list with geo data + """ + + with Session(self._sa_engine) as session: + tar_info = session.scalars(select(TarNamespace).where(TarNamespace.namespace == namespace).order_by(TarNamespace.submission_date.desc())) + + results = [] + for result in tar_info: + results.append( + GeoTarModel( + identifier=result.id, + namespace=result.namespace, + file_path=result.file_path, + start_period=result.start_period, + end_period=result.end_period, + submission_date=result.submission_date, + number_of_projects=result.number_of_projects, + ) + ) + + return GeoTarModelReturn( + count=len(results), + results=results + ) + + def geo_delete_tar_info(self, namespace: str = None) -> None: + """ + Delete all metadata of tar GEO files + + :param namespace: namespace of the tar files + + :return: None + """ + + with Session(self._sa_engine) as session: + + delete_statement = delete(TarNamespace) + if namespace: + delete_statement = delete_statement.where(TarNamespace.namespace == namespace) + session.execute(delete_statement) + session.commit() + _LOGGER.info("Geo tar info was deleted successfully!") \ No newline at end of file diff --git a/tests/test_tar_meta.py b/tests/test_tar_meta.py new file mode 100644 index 0000000..06caa43 --- /dev/null +++ b/tests/test_tar_meta.py @@ -0,0 +1,50 @@ +from datetime import datetime + +import pytest + +from pepdbagent.models import GeoTarModel + +from .utils import PEPDBAgentContextManager + + +@pytest.mark.skipif( + not PEPDBAgentContextManager().db_setup(), + reason="DB is not setup", +) +class TestGeoTar: + """ + Test project methods + """ + test_namespace = "namespace1" + + tar_info = GeoTarModel( + namespace=test_namespace, + submission_date=datetime.now(), + start_period=datetime.now(), + end_period=datetime.now(), + number_of_projects=1, + file_path="blabla/test.tar", + ) + + + def test_create_meta_tar(self): + with PEPDBAgentContextManager(add_data=True) as agent: + + agent.project.geo_upload_tar_info(tar_info=self.tar_info) + + result = agent.project.geo_get_tar_info(namespace=self.test_namespace) + + assert result.count == 1 + assert result.results[0].end_period.strftime("%Y:%m:%d") == self.tar_info.end_period.strftime("%Y:%m:%d") + + def test_delete_meta_tar(self): + with PEPDBAgentContextManager(add_data=True) as agent: + agent.project.geo_upload_tar_info(tar_info=self.tar_info) + + result = agent.project.geo_get_tar_info(namespace=self.test_namespace) + assert result.count == 1 + + agent.project.geo_delete_tar_info() + + result = agent.project.geo_get_tar_info(namespace=self.test_namespace) + assert result.count == 0 \ No newline at end of file From 15ba273412f43b643e0360997349db087bd3be17 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 4 Sep 2024 15:58:55 -0400 Subject: [PATCH 02/10] lint --- pepdbagent/db_utils.py | 19 +++++--- pepdbagent/models.py | 4 +- pepdbagent/modules/project.py | 85 ++++++++++++++++++++++++----------- tests/test_tar_meta.py | 8 ++-- 4 files changed, 80 insertions(+), 36 deletions(-) diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index 17094b3..fe5ab6c 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -3,15 +3,24 @@ import logging from typing import List, Optional -from sqlalchemy import (TIMESTAMP, BigInteger, Enum, FetchedValue, ForeignKey, - Result, Select, String, UniqueConstraint, event, - select) +from sqlalchemy import ( + TIMESTAMP, + BigInteger, + Enum, + FetchedValue, + ForeignKey, + Result, + Select, + String, + UniqueConstraint, + event, + select, +) from sqlalchemy.dialects.postgresql import JSON from sqlalchemy.engine import URL, create_engine from sqlalchemy.exc import ProgrammingError from sqlalchemy.ext.compiler import compiles -from sqlalchemy.orm import (DeclarativeBase, Mapped, Session, mapped_column, - relationship) +from sqlalchemy.orm import DeclarativeBase, Mapped, Session, mapped_column, relationship from pepdbagent.const import PKG_NAME, POSTGRES_DIALECT from pepdbagent.exceptions import SchemaError diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 3a5e40f..60db8b2 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -308,11 +308,11 @@ class GeoTarModel(BaseModel): end_period: datetime.datetime number_of_projects: int = 0 -class GeoTarModelReturn(BaseModel): +class GeoTarModelReturn(BaseModel): """ Geo project model """ count: int - results: List[GeoTarModel] \ No newline at end of file + results: List[GeoTarModel] diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 34cea80..e88b530 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -5,30 +5,63 @@ import numpy as np import peppy -from peppy.const import (CONFIG_KEY, SAMPLE_NAME_ATTR, SAMPLE_RAW_DICT_KEY, - SAMPLE_TABLE_INDEX_KEY, SUBSAMPLE_RAW_LIST_KEY) +from peppy.const import ( + CONFIG_KEY, + SAMPLE_NAME_ATTR, + SAMPLE_RAW_DICT_KEY, + SAMPLE_TABLE_INDEX_KEY, + SUBSAMPLE_RAW_LIST_KEY, +) from sqlalchemy import Select, and_, delete, select from sqlalchemy.exc import IntegrityError, NoResultFound from sqlalchemy.orm import Session from sqlalchemy.orm.attributes import flag_modified -from pepdbagent.const import (DEFAULT_TAG, DESCRIPTION_KEY, - MAX_HISTORY_SAMPLES_NUMBER, NAME_KEY, - PEPHUB_SAMPLE_ID_KEY, PKG_NAME) -from pepdbagent.db_utils import (BaseEngine, HistoryProjects, HistorySamples, - Projects, Samples, Schemas, Subsamples, - TarNamespace, UpdateTypes, User) -from pepdbagent.exceptions import (HistoryNotFoundError, PEPDatabaseAgentError, - ProjectDuplicatedSampleGUIDsError, - ProjectNotFoundError, - ProjectUniqueNameError, - SampleTableUpdateError, - SchemaDoesNotExistError) -from pepdbagent.models import (GeoTarModel, GeoTarModelReturn, - HistoryAnnotationModel, HistoryChangeModel, - ProjectDict, UpdateItems, UpdateModel) -from pepdbagent.utils import (create_digest, generate_guid, order_samples, - registry_path_converter, schema_path_converter) +from pepdbagent.const import ( + DEFAULT_TAG, + DESCRIPTION_KEY, + MAX_HISTORY_SAMPLES_NUMBER, + NAME_KEY, + PEPHUB_SAMPLE_ID_KEY, + PKG_NAME, +) +from pepdbagent.db_utils import ( + BaseEngine, + HistoryProjects, + HistorySamples, + Projects, + Samples, + Schemas, + Subsamples, + TarNamespace, + UpdateTypes, + User, +) +from pepdbagent.exceptions import ( + HistoryNotFoundError, + PEPDatabaseAgentError, + ProjectDuplicatedSampleGUIDsError, + ProjectNotFoundError, + ProjectUniqueNameError, + SampleTableUpdateError, + SchemaDoesNotExistError, +) +from pepdbagent.models import ( + GeoTarModel, + GeoTarModelReturn, + HistoryAnnotationModel, + HistoryChangeModel, + ProjectDict, + UpdateItems, + UpdateModel, +) +from pepdbagent.utils import ( + create_digest, + generate_guid, + order_samples, + registry_path_converter, + schema_path_converter, +) _LOGGER = logging.getLogger(PKG_NAME) @@ -1398,7 +1431,6 @@ def clean_history(self, days: int = 90) -> None: session.commit() _LOGGER.info("History was cleaned successfully!") - def geo_upload_tar_info(self, tar_info: GeoTarModel) -> None: """ Upload metadata of tar GEO files @@ -1430,7 +1462,11 @@ def geo_get_tar_info(self, namespace: str) -> GeoTarModelReturn: """ with Session(self._sa_engine) as session: - tar_info = session.scalars(select(TarNamespace).where(TarNamespace.namespace == namespace).order_by(TarNamespace.submission_date.desc())) + tar_info = session.scalars( + select(TarNamespace) + .where(TarNamespace.namespace == namespace) + .order_by(TarNamespace.submission_date.desc()) + ) results = [] for result in tar_info: @@ -1446,10 +1482,7 @@ def geo_get_tar_info(self, namespace: str) -> GeoTarModelReturn: ) ) - return GeoTarModelReturn( - count=len(results), - results=results - ) + return GeoTarModelReturn(count=len(results), results=results) def geo_delete_tar_info(self, namespace: str = None) -> None: """ @@ -1467,4 +1500,4 @@ def geo_delete_tar_info(self, namespace: str = None) -> None: delete_statement = delete_statement.where(TarNamespace.namespace == namespace) session.execute(delete_statement) session.commit() - _LOGGER.info("Geo tar info was deleted successfully!") \ No newline at end of file + _LOGGER.info("Geo tar info was deleted successfully!") diff --git a/tests/test_tar_meta.py b/tests/test_tar_meta.py index 06caa43..bf7b45e 100644 --- a/tests/test_tar_meta.py +++ b/tests/test_tar_meta.py @@ -15,6 +15,7 @@ class TestGeoTar: """ Test project methods """ + test_namespace = "namespace1" tar_info = GeoTarModel( @@ -26,7 +27,6 @@ class TestGeoTar: file_path="blabla/test.tar", ) - def test_create_meta_tar(self): with PEPDBAgentContextManager(add_data=True) as agent: @@ -35,7 +35,9 @@ def test_create_meta_tar(self): result = agent.project.geo_get_tar_info(namespace=self.test_namespace) assert result.count == 1 - assert result.results[0].end_period.strftime("%Y:%m:%d") == self.tar_info.end_period.strftime("%Y:%m:%d") + assert result.results[0].end_period.strftime( + "%Y:%m:%d" + ) == self.tar_info.end_period.strftime("%Y:%m:%d") def test_delete_meta_tar(self): with PEPDBAgentContextManager(add_data=True) as agent: @@ -47,4 +49,4 @@ def test_delete_meta_tar(self): agent.project.geo_delete_tar_info() result = agent.project.geo_get_tar_info(namespace=self.test_namespace) - assert result.count == 0 \ No newline at end of file + assert result.count == 0 From e9482329fb512e82718cd67945573b94e275b994 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 5 Sep 2024 12:26:05 -0400 Subject: [PATCH 03/10] fixed tar model --- pepdbagent/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 60db8b2..ea176a9 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -303,7 +303,7 @@ class GeoTarModel(BaseModel): identifier: int = None namespace: str file_path: str - submission_date: datetime.datetime + submission_date: datetime.datetime = None start_period: datetime.datetime end_period: datetime.datetime number_of_projects: int = 0 From cecded14eb99cd116b8fef90b8249eb61f9b795c Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 5 Sep 2024 13:37:15 -0400 Subject: [PATCH 04/10] fixed #145 --- pepdbagent/modules/project.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index e88b530..45ed2a8 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -660,6 +660,7 @@ def update( ), history_sa_model=new_history, ) + found_prj.number_of_samples = len(update_dict["samples"]) if "subsamples" in update_dict: if found_prj.subsamples_mapping: From b56e7c8b230bed7ebc4f1f444fb118ecedd297a7 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 5 Sep 2024 14:08:09 -0400 Subject: [PATCH 05/10] moved taring to namespace module --- pepdbagent/models.py | 6 +-- pepdbagent/modules/namespace.py | 77 ++++++++++++++++++++++++++++++++- pepdbagent/modules/project.py | 75 +------------------------------- tests/test_tar_meta.py | 16 +++---- 4 files changed, 88 insertions(+), 86 deletions(-) diff --git a/pepdbagent/models.py b/pepdbagent/models.py index ea176a9..0d3d025 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -295,7 +295,7 @@ class SchemaGroupSearchResult(BaseModel): results: List[SchemaGroupAnnotation] -class GeoTarModel(BaseModel): +class TarNamespaceModel(BaseModel): """ Geo project model """ @@ -309,10 +309,10 @@ class GeoTarModel(BaseModel): number_of_projects: int = 0 -class GeoTarModelReturn(BaseModel): +class TarNamespaceModelReturn(BaseModel): """ Geo project model """ count: int - results: List[GeoTarModel] + results: List[TarNamespaceModel] diff --git a/pepdbagent/modules/namespace.py b/pepdbagent/modules/namespace.py index 5af4c90..1f34975 100644 --- a/pepdbagent/modules/namespace.py +++ b/pepdbagent/modules/namespace.py @@ -3,12 +3,12 @@ from datetime import datetime, timedelta from typing import List, Tuple, Union -from sqlalchemy import distinct, func, or_, select +from sqlalchemy import distinct, func, or_, select, delete from sqlalchemy.orm import Session from sqlalchemy.sql.selectable import Select from pepdbagent.const import DEFAULT_LIMIT, DEFAULT_LIMIT_INFO, DEFAULT_OFFSET, PKG_NAME -from pepdbagent.db_utils import BaseEngine, Projects, User +from pepdbagent.db_utils import BaseEngine, Projects, User, TarNamespace from pepdbagent.exceptions import NamespaceNotFoundError from pepdbagent.models import ( ListOfNamespaceInfo, @@ -16,6 +16,8 @@ NamespaceInfo, NamespaceList, NamespaceStats, + TarNamespaceModel, + TarNamespaceModelReturn, ) from pepdbagent.utils import tuple_converter @@ -300,3 +302,74 @@ def stats(self, namespace: str = None, monthly: bool = False) -> NamespaceStats: projects_updated=counts_last_update, projects_created=counts_submission, ) + + def upload_tar_info(self, tar_info: TarNamespaceModel) -> None: + """ + Upload metadata of tar GEO files + + tar_info: TarNamespaceModel + :return: None + """ + + with Session(self._sa_engine) as session: + new_tar = TarNamespace( + file_path=tar_info.file_path, + namespace=tar_info.namespace, + start_period=tar_info.start_period, + end_period=tar_info.end_period, + number_of_projects=tar_info.number_of_projects, + ) + session.add(new_tar) + session.commit() + + _LOGGER.info("Geo tar info was uploaded successfully!") + + def get_tar_info(self, namespace: str) -> TarNamespaceModelReturn: + """ + Get metadata of tar GEO files + + :param namespace: namespace of the tar files + + :return: list with geo data + """ + + with Session(self._sa_engine) as session: + tar_info = session.scalars( + select(TarNamespace) + .where(TarNamespace.namespace == namespace) + .order_by(TarNamespace.submission_date.desc()) + ) + + results = [] + for result in tar_info: + results.append( + TarNamespaceModel( + identifier=result.id, + namespace=result.namespace, + file_path=result.file_path, + start_period=result.start_period, + end_period=result.end_period, + submission_date=result.submission_date, + number_of_projects=result.number_of_projects, + ) + ) + + return TarNamespaceModelReturn(count=len(results), results=results) + + def delete_tar_info(self, namespace: str = None) -> None: + """ + Delete all metadata of tar GEO files + + :param namespace: namespace of the tar files + + :return: None + """ + + with Session(self._sa_engine) as session: + + delete_statement = delete(TarNamespace) + if namespace: + delete_statement = delete_statement.where(TarNamespace.namespace == namespace) + session.execute(delete_statement) + session.commit() + _LOGGER.info("Geo tar info was deleted successfully!") diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 45ed2a8..cf2cfbe 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -47,8 +47,8 @@ SchemaDoesNotExistError, ) from pepdbagent.models import ( - GeoTarModel, - GeoTarModelReturn, + TarNamespaceModel, + TarNamespaceModelReturn, HistoryAnnotationModel, HistoryChangeModel, ProjectDict, @@ -1431,74 +1431,3 @@ def clean_history(self, days: int = 90) -> None: ) session.commit() _LOGGER.info("History was cleaned successfully!") - - def geo_upload_tar_info(self, tar_info: GeoTarModel) -> None: - """ - Upload metadata of tar GEO files - - tar_info: GeoTarModel - :return: None - """ - - with Session(self._sa_engine) as session: - new_tar = TarNamespace( - file_path=tar_info.file_path, - namespace=tar_info.namespace, - start_period=tar_info.start_period, - end_period=tar_info.end_period, - number_of_projects=tar_info.number_of_projects, - ) - session.add(new_tar) - session.commit() - - _LOGGER.info("Geo tar info was uploaded successfully!") - - def geo_get_tar_info(self, namespace: str) -> GeoTarModelReturn: - """ - Get metadata of tar GEO files - - :param namespace: namespace of the tar files - - :return: list with geo data - """ - - with Session(self._sa_engine) as session: - tar_info = session.scalars( - select(TarNamespace) - .where(TarNamespace.namespace == namespace) - .order_by(TarNamespace.submission_date.desc()) - ) - - results = [] - for result in tar_info: - results.append( - GeoTarModel( - identifier=result.id, - namespace=result.namespace, - file_path=result.file_path, - start_period=result.start_period, - end_period=result.end_period, - submission_date=result.submission_date, - number_of_projects=result.number_of_projects, - ) - ) - - return GeoTarModelReturn(count=len(results), results=results) - - def geo_delete_tar_info(self, namespace: str = None) -> None: - """ - Delete all metadata of tar GEO files - - :param namespace: namespace of the tar files - - :return: None - """ - - with Session(self._sa_engine) as session: - - delete_statement = delete(TarNamespace) - if namespace: - delete_statement = delete_statement.where(TarNamespace.namespace == namespace) - session.execute(delete_statement) - session.commit() - _LOGGER.info("Geo tar info was deleted successfully!") diff --git a/tests/test_tar_meta.py b/tests/test_tar_meta.py index bf7b45e..0b1a675 100644 --- a/tests/test_tar_meta.py +++ b/tests/test_tar_meta.py @@ -2,7 +2,7 @@ import pytest -from pepdbagent.models import GeoTarModel +from pepdbagent.models import TarNamespaceModel from .utils import PEPDBAgentContextManager @@ -18,7 +18,7 @@ class TestGeoTar: test_namespace = "namespace1" - tar_info = GeoTarModel( + tar_info = TarNamespaceModel( namespace=test_namespace, submission_date=datetime.now(), start_period=datetime.now(), @@ -30,9 +30,9 @@ class TestGeoTar: def test_create_meta_tar(self): with PEPDBAgentContextManager(add_data=True) as agent: - agent.project.geo_upload_tar_info(tar_info=self.tar_info) + agent.namespace.upload_tar_info(tar_info=self.tar_info) - result = agent.project.geo_get_tar_info(namespace=self.test_namespace) + result = agent.namespace.get_tar_info(namespace=self.test_namespace) assert result.count == 1 assert result.results[0].end_period.strftime( @@ -41,12 +41,12 @@ def test_create_meta_tar(self): def test_delete_meta_tar(self): with PEPDBAgentContextManager(add_data=True) as agent: - agent.project.geo_upload_tar_info(tar_info=self.tar_info) + agent.namespace.upload_tar_info(tar_info=self.tar_info) - result = agent.project.geo_get_tar_info(namespace=self.test_namespace) + result = agent.namespace.get_tar_info(namespace=self.test_namespace) assert result.count == 1 - agent.project.geo_delete_tar_info() + agent.namespace.delete_tar_info() - result = agent.project.geo_get_tar_info(namespace=self.test_namespace) + result = agent.namespace.get_tar_info(namespace=self.test_namespace) assert result.count == 0 From 516de6efdb94febec8428f403d1e682d466897c3 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 6 Sep 2024 12:48:03 -0400 Subject: [PATCH 06/10] updated archive table --- pepdbagent/db_utils.py | 6 ++---- pepdbagent/models.py | 4 +--- pepdbagent/modules/namespace.py | 9 +++------ tests/test_tar_meta.py | 3 --- 4 files changed, 6 insertions(+), 16 deletions(-) diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index fe5ab6c..45c151c 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -366,14 +366,12 @@ class SchemaGroupRelations(Base): class TarNamespace(Base): - __tablename__ = "tar_namespace" + __tablename__ = "namespace_archives" id: Mapped[int] = mapped_column(primary_key=True) namespace: Mapped[str] = mapped_column(ForeignKey("users.namespace", ondelete="CASCADE")) file_path: Mapped[str] = mapped_column(nullable=False) - submission_date: Mapped[datetime.datetime] = mapped_column(default=deliver_update_date) - start_period: Mapped[datetime.datetime] = mapped_column(nullable=True) - end_period: Mapped[datetime.datetime] = mapped_column(nullable=True) + creation_date: Mapped[datetime.datetime] = mapped_column(default=deliver_update_date) number_of_projects: Mapped[int] = mapped_column(default=0) diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 0d3d025..462d283 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -303,9 +303,7 @@ class TarNamespaceModel(BaseModel): identifier: int = None namespace: str file_path: str - submission_date: datetime.datetime = None - start_period: datetime.datetime - end_period: datetime.datetime + creation_date: datetime.datetime = None number_of_projects: int = 0 diff --git a/pepdbagent/modules/namespace.py b/pepdbagent/modules/namespace.py index 1f34975..e1ec80b 100644 --- a/pepdbagent/modules/namespace.py +++ b/pepdbagent/modules/namespace.py @@ -315,8 +315,7 @@ def upload_tar_info(self, tar_info: TarNamespaceModel) -> None: new_tar = TarNamespace( file_path=tar_info.file_path, namespace=tar_info.namespace, - start_period=tar_info.start_period, - end_period=tar_info.end_period, + creation_date=tar_info.creation_date, number_of_projects=tar_info.number_of_projects, ) session.add(new_tar) @@ -337,7 +336,7 @@ def get_tar_info(self, namespace: str) -> TarNamespaceModelReturn: tar_info = session.scalars( select(TarNamespace) .where(TarNamespace.namespace == namespace) - .order_by(TarNamespace.submission_date.desc()) + .order_by(TarNamespace.creation_date.desc()) ) results = [] @@ -347,9 +346,7 @@ def get_tar_info(self, namespace: str) -> TarNamespaceModelReturn: identifier=result.id, namespace=result.namespace, file_path=result.file_path, - start_period=result.start_period, - end_period=result.end_period, - submission_date=result.submission_date, + creation_date=result.creation_date, number_of_projects=result.number_of_projects, ) ) diff --git a/tests/test_tar_meta.py b/tests/test_tar_meta.py index 0b1a675..2488a4b 100644 --- a/tests/test_tar_meta.py +++ b/tests/test_tar_meta.py @@ -35,9 +35,6 @@ def test_create_meta_tar(self): result = agent.namespace.get_tar_info(namespace=self.test_namespace) assert result.count == 1 - assert result.results[0].end_period.strftime( - "%Y:%m:%d" - ) == self.tar_info.end_period.strftime("%Y:%m:%d") def test_delete_meta_tar(self): with PEPDBAgentContextManager(add_data=True) as agent: From 8b43a07e1051c3a7076511bac8621184c34ea8f2 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 9 Sep 2024 10:39:53 -0400 Subject: [PATCH 07/10] added file size to namespace archive --- pepdbagent/db_utils.py | 1 + pepdbagent/models.py | 1 + pepdbagent/modules/namespace.py | 2 ++ 3 files changed, 4 insertions(+) diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index 45c151c..a2c2aa5 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -373,6 +373,7 @@ class TarNamespace(Base): file_path: Mapped[str] = mapped_column(nullable=False) creation_date: Mapped[datetime.datetime] = mapped_column(default=deliver_update_date) number_of_projects: Mapped[int] = mapped_column(default=0) + file_size: Mapped[int] = mapped_column(nullable=False) class BaseEngine: diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 462d283..1a163a6 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -305,6 +305,7 @@ class TarNamespaceModel(BaseModel): file_path: str creation_date: datetime.datetime = None number_of_projects: int = 0 + file_size: int = 0 class TarNamespaceModelReturn(BaseModel): diff --git a/pepdbagent/modules/namespace.py b/pepdbagent/modules/namespace.py index e1ec80b..332c1cb 100644 --- a/pepdbagent/modules/namespace.py +++ b/pepdbagent/modules/namespace.py @@ -317,6 +317,7 @@ def upload_tar_info(self, tar_info: TarNamespaceModel) -> None: namespace=tar_info.namespace, creation_date=tar_info.creation_date, number_of_projects=tar_info.number_of_projects, + file_size=tar_info.file_size, ) session.add(new_tar) session.commit() @@ -348,6 +349,7 @@ def get_tar_info(self, namespace: str) -> TarNamespaceModelReturn: file_path=result.file_path, creation_date=result.creation_date, number_of_projects=result.number_of_projects, + file_size=result.file_size, ) ) From b8d9ffbd3aee06c37015b5e124400856002b910e Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 11 Sep 2024 12:57:28 -0400 Subject: [PATCH 08/10] added bedbase table stats --- pepdbagent/db_utils.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index a2c2aa5..45d7391 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -376,6 +376,18 @@ class TarNamespace(Base): file_size: Mapped[int] = mapped_column(nullable=False) +class BedBaseStats(Base): + __tablename__ = "bedbase_stats" + + id: Mapped[int] = mapped_column(primary_key=True) + gse: Mapped[str] = mapped_column() + gsm: Mapped[str] = mapped_column() + sample_name: Mapped[str] = mapped_column(nullable=True) + genome: Mapped[Optional[str]] = mapped_column(nullable=True, default="") + last_update_date: Mapped[Optional[str]] = mapped_column() + submission_date: Mapped[Optional[str]] = mapped_column() + + class BaseEngine: """ A class with base methods, that are used in several classes. e.g. fetch_one or fetch_all From 8811a65163b6b83c1d252a17a3fb1887b16d76c1 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 16 Sep 2024 10:30:39 -0400 Subject: [PATCH 09/10] added order by stars --- docs/changelog.md | 3 ++- pepdbagent/modules/annotation.py | 8 +++++--- requirements/requirements-all.txt | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 207ff2a..76ac388 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -3,7 +3,8 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. ## [0.11.1] -- 2024-09-04 -- Added metadata table for namespace tar files +- Added archive table of namespaces +- Added sort by stars ## [0.11.0] -- 2024-07-24 - Added validation schemas diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index c9c7612..8f755b0 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -300,7 +300,7 @@ def _get_projects( :param limit: limit of return results :param offset: number of results off set (that were already showed) :param order_by: sort the result-set by the information - Options: ["name", "update_date", "submission_date"] + Options: ["update_date", "name", "submission_date", "stars"] [Default: "update_date"] :param order_desc: Sort the records in descending order. [Default: False] :param filter_by: data to use filter on. @@ -371,7 +371,7 @@ def _add_order_by_keyword( :param statement: sqlalchemy representation of a SELECT statement. :param by: sort the result-set by the information - Options: ["name", "update_date", "submission_date"] + Options: ["name", "update_date", "submission_date", "stars"] [Default: "update_date"] :param desc: Sort the records in descending order. [Default: False] :return: sqlalchemy representation of a SELECT statement with order by keyword @@ -382,6 +382,8 @@ def _add_order_by_keyword( order_by_obj = Projects.name elif by == SUBMISSION_DATE_KEY: order_by_obj = Projects.submission_date + elif by == "stars": + order_by_obj = Projects.number_of_stars else: _LOGGER.warning( f"order by: '{by}' statement is unavailable. Projects are sorted by 'update_date'" @@ -614,7 +616,7 @@ def get_projects_list( :param limit: limit of return results :param offset: number of results off set (that were already showed) :param order_by: sort the result-set by the information - Options: ["name", "update_date", "submission_date"] + Options: ["name", "update_date", "submission_date", "stars"] [Default: "update_date"] :param order_desc: Sort the records in descending order. [Default: False] :param filter_by: data to use filter on. diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index cba088f..82073a6 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -1,6 +1,6 @@ sqlalchemy>=2.0.0 logmuse>=0.2.7 -peppy>=0.40.4 +peppy>=0.40.6 ubiquerg>=0.6.2 coloredlogs>=15.0.1 pytest-mock From c18a7b3a5d787311128fa836549bcdedf6e896d8 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 18 Sep 2024 12:24:37 -0400 Subject: [PATCH 10/10] Fixed model description --- pepdbagent/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 1a163a6..3bcd402 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -297,7 +297,7 @@ class SchemaGroupSearchResult(BaseModel): class TarNamespaceModel(BaseModel): """ - Geo project model + Namespace archive model """ identifier: int = None @@ -310,7 +310,7 @@ class TarNamespaceModel(BaseModel): class TarNamespaceModelReturn(BaseModel): """ - Geo project model + Namespace archive search model """ count: int