Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements in returning pephub stats info #138

Merged
merged 4 commits into from
Jul 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 19 additions & 4 deletions pepdbagent/db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class Projects(Base):
__tablename__ = "projects"

id: Mapped[int] = mapped_column(primary_key=True)
namespace: Mapped[str] = mapped_column()
namespace: Mapped[str] = mapped_column(ForeignKey("users.namespace", ondelete="CASCADE"))
name: Mapped[str] = mapped_column()
tag: Mapped[str] = mapped_column()
digest: Mapped[str] = mapped_column(String(32))
Expand Down Expand Up @@ -108,13 +108,17 @@ class Projects(Base):
back_populates="forked_to_mapping",
remote_side=[id],
single_parent=True,
cascade="all",
cascade="save-update, merge, refresh-expire",
)

forked_to_mapping = relationship(
"Projects", back_populates="forked_from_mapping", cascade="all"
"Projects",
back_populates="forked_from_mapping",
cascade="save-update, merge, refresh-expire",
)

namespace_mapping: Mapped["User"] = relationship("User", back_populates="projects_mapping")

__table_args__ = (UniqueConstraint("namespace", "name", "tag"),)


Expand All @@ -133,6 +137,12 @@ class Samples(Base):
sample_name: Mapped[Optional[str]] = mapped_column()
guid: Mapped[Optional[str]] = mapped_column(nullable=False, unique=True)

submission_date: Mapped[datetime.datetime] = mapped_column(default=deliver_update_date)
last_update_date: Mapped[Optional[datetime.datetime]] = mapped_column(
default=deliver_update_date,
onupdate=deliver_update_date,
)

parent_guid: Mapped[Optional[str]] = mapped_column(
ForeignKey("samples.guid", ondelete="CASCADE"),
nullable=True,
Expand Down Expand Up @@ -172,12 +182,17 @@ class User(Base):
__tablename__ = "users"

id: Mapped[int] = mapped_column(primary_key=True)
namespace: Mapped[str]
namespace: Mapped[str] = mapped_column(nullable=False, unique=True)
stars_mapping: Mapped[List["Stars"]] = relationship(
back_populates="user_mapping",
cascade="all, delete-orphan",
order_by="Stars.star_date.desc()",
)
number_of_projects: Mapped[int] = mapped_column(default=0)

projects_mapping: Mapped[List["Projects"]] = relationship(
"Projects", back_populates="namespace_mapping"
)


class Stars(Base):
Expand Down
89 changes: 61 additions & 28 deletions pepdbagent/modules/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
from datetime import datetime, timedelta
from typing import List, Tuple, Union

from sqlalchemy import distinct, func, or_, select, text
from sqlalchemy import distinct, func, or_, select
from sqlalchemy.orm import Session
from sqlalchemy.sql.selectable import Select

from pepdbagent.const import DEFAULT_LIMIT, DEFAULT_LIMIT_INFO, DEFAULT_OFFSET, PKG_NAME
from pepdbagent.db_utils import BaseEngine, Projects
from pepdbagent.db_utils import BaseEngine, Projects, User
from pepdbagent.exceptions import NamespaceNotFoundError
from pepdbagent.models import (
ListOfNamespaceInfo,
Expand Down Expand Up @@ -172,9 +172,54 @@ def _add_condition(
)
return statement

# old function, that counts namespace info based on Projects table
# def info(self, limit: int = DEFAULT_LIMIT_INFO) -> ListOfNamespaceInfo:
# """
# Get list of top n namespaces in the database
#
# :param limit: limit of results (top namespace )
# :return: number_of_namespaces: int
# limit: int
# results: { namespace: str
# number_of_projects: int
# }
# """
# total_number_of_namespaces = self._count_namespace()
#
# statement = (
# select(
# func.count(Projects.namespace).label("number_of_projects"),
# Projects.namespace,
# )
# .select_from(Projects)
# .where(Projects.private.is_(False))
# .limit(limit)
# .order_by(text("number_of_projects desc"))
# .group_by(Projects.namespace)
# )
#
# with Session(self._sa_engine) as session:
# query_results = session.execute(statement).all()
#
# list_of_results = []
# for result in query_results:
# list_of_results.append(
# NamespaceInfo(
# namespace=result.namespace,
# number_of_projects=result.number_of_projects,
# )
# )
# return ListOfNamespaceInfo(
# number_of_namespaces=total_number_of_namespaces,
# limit=limit,
# results=list_of_results,
# )
donaldcampbelljr marked this conversation as resolved.
Show resolved Hide resolved

def info(self, limit: int = DEFAULT_LIMIT_INFO) -> ListOfNamespaceInfo:
"""
Get list of top n namespaces in the database
! Warning: this function counts number of all projects in namespaces.
! it does not filter private projects (It was done for efficiency reasons)

:param limit: limit of results (top namespace )
:return: number_of_namespaces: int
Expand All @@ -183,36 +228,24 @@ def info(self, limit: int = DEFAULT_LIMIT_INFO) -> ListOfNamespaceInfo:
number_of_projects: int
}
"""
total_number_of_namespaces = self._count_namespace()

statement = (
select(
func.count(Projects.namespace).label("number_of_projects"),
Projects.namespace,
)
.select_from(Projects)
.where(Projects.private.is_(False))
.limit(limit)
.order_by(text("number_of_projects desc"))
.group_by(Projects.namespace)
)

with Session(self._sa_engine) as session:
query_results = session.execute(statement).all()
results = session.scalars(
select(User).limit(limit).order_by(User.number_of_projects.desc())
)

list_of_results = []
for result in query_results:
list_of_results.append(
NamespaceInfo(
namespace=result.namespace,
number_of_projects=result.number_of_projects,
list_of_results = []
for result in results:
list_of_results.append(
NamespaceInfo(
namespace=result.namespace,
number_of_projects=result.number_of_projects,
)
)
return ListOfNamespaceInfo(
number_of_namespaces=len(list_of_results),
limit=limit,
results=list_of_results,
)
return ListOfNamespaceInfo(
number_of_namespaces=total_number_of_namespaces,
limit=limit,
results=list_of_results,
)

def stats(self, namespace: str = None, monthly: bool = False) -> NamespaceStats:
"""
Expand Down
22 changes: 18 additions & 4 deletions pepdbagent/modules/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from sqlalchemy.orm.attributes import flag_modified

from pepdbagent.const import DEFAULT_TAG, DESCRIPTION_KEY, NAME_KEY, PEPHUB_SAMPLE_ID_KEY, PKG_NAME
from pepdbagent.db_utils import BaseEngine, Projects, Samples, Subsamples
from pepdbagent.db_utils import BaseEngine, Projects, Samples, Subsamples, User
from pepdbagent.exceptions import (
PEPDatabaseAgentError,
ProjectDuplicatedSampleGUIDsError,
Expand Down Expand Up @@ -208,8 +208,9 @@ def delete(
raise ProjectNotFoundError(
f"Can't delete unexciting project: '{namespace}/{name}:{tag}'."
)
with self._sa_engine.begin() as conn:
conn.execute(

with Session(self._sa_engine) as session:
session.execute(
delete(Projects).where(
and_(
Projects.namespace == namespace,
Expand All @@ -219,7 +220,11 @@ def delete(
)
)

_LOGGER.info(f"Project '{namespace}/{name}:{tag} was successfully deleted'")
statement = select(User).where(User.namespace == namespace)
user = session.scalar(statement)
if user:
user.number_of_projects -= 1
session.commit()

def delete_by_rp(
self,
Expand Down Expand Up @@ -352,6 +357,15 @@ def create(
self._add_subsamples_to_project(new_prj, subsamples)

with Session(self._sa_engine) as session:
user = session.scalar(select(User).where(User.namespace == namespace))

if not user:
user = User(namespace=namespace)
session.add(user)
session.commit()

user.number_of_projects += 1

session.add(new_prj)
session.commit()

Expand Down
1 change: 0 additions & 1 deletion pepdbagent/modules/updates.py

This file was deleted.

4 changes: 2 additions & 2 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
```txt
docker run --rm -it --name pep-db \
-e POSTGRES_USER=postgres \
-e POSTGRES_PASSWORD=docker \
-e POSTGRES_PASSWORD=pass8743hf9h23f87h437 \
-e POSTGRES_DB=pep-db \
-p 5432:5432 postgres
-p 127.0.0.1:5432:5432 postgres
```


Expand Down
17 changes: 16 additions & 1 deletion tests/test_namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ def test_annotation_private(self):
result = agent.namespace.get(admin="private_test")
assert len(result.results) == 4

def test_namespace_info(self):
@pytest.mark.skip(
"Skipping test because we are not taking into account the private projects (We are counting all of them)"
)
def test_namespace_info_private(self):
with PEPDBAgentContextManager(add_data=True) as agent:
agent.project.update(
namespace="private_test",
Expand All @@ -35,6 +38,18 @@ def test_namespace_info(self):
assert len(result.results) == 4
assert result.results[3].number_of_projects == 1

def test_namespace_info_all(self):
with PEPDBAgentContextManager(add_data=True) as agent:
agent.project.update(
namespace="private_test",
name="derive",
tag="default",
update_dict={"is_private": False},
)
result = agent.namespace.info()
assert len(result.results) == 4
assert result.results[3].number_of_projects == 6

def test_namespace_stats(self):
with PEPDBAgentContextManager(add_data=True) as agent:
stat_result = agent.namespace.stats(monthly=True)
Expand Down
8 changes: 5 additions & 3 deletions tests/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,7 @@ def test_overwrite_project(self, namespace, name):
"namespace, name",
[
["namespace1", "amendments1"],
["namespace1", "amendments2"],
["namespace2", "derive"],
["namespace2", "imply"],
],
)
def test_delete_project(self, namespace, name):
Expand All @@ -182,10 +180,14 @@ def test_delete_project(self, namespace, name):
with pytest.raises(ProjectNotFoundError, match="Project does not exist."):
agent.project.get(namespace=namespace, name=name, tag="default")

def test_delete_not_existing_project(self):
with PEPDBAgentContextManager(add_data=True) as agent:
with pytest.raises(ProjectNotFoundError, match="Project does not exist."):
agent.project.delete(namespace="namespace1", name="nothing", tag="default")

@pytest.mark.parametrize(
"namespace, name",
[
["namespace1", "amendments1"],
["namespace1", "amendments2"],
["namespace2", "derive"],
["namespace2", "imply"],
Expand Down
2 changes: 1 addition & 1 deletion tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from pepdbagent import PEPDatabaseAgent

DSN = "postgresql+psycopg://postgres:docker@localhost:5432/pep-db"
DSN = "postgresql+psycopg://postgres:pass8743hf9h23f87h437@localhost:5432/pep-db"

DATA_PATH = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
Expand Down
Loading