Skip to content

Commit

Permalink
Merge pull request #138 from pepkit/namespace_efficiency
Browse files Browse the repository at this point in the history
Improvements in returning pephub stats info
  • Loading branch information
khoroshevskyi authored Jul 2, 2024
2 parents 7129552 + d8b6c70 commit 2137f1f
Show file tree
Hide file tree
Showing 8 changed files with 122 additions and 44 deletions.
23 changes: 19 additions & 4 deletions pepdbagent/db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class Projects(Base):
__tablename__ = "projects"

id: Mapped[int] = mapped_column(primary_key=True)
namespace: Mapped[str] = mapped_column()
namespace: Mapped[str] = mapped_column(ForeignKey("users.namespace", ondelete="CASCADE"))
name: Mapped[str] = mapped_column()
tag: Mapped[str] = mapped_column()
digest: Mapped[str] = mapped_column(String(32))
Expand Down Expand Up @@ -108,13 +108,17 @@ class Projects(Base):
back_populates="forked_to_mapping",
remote_side=[id],
single_parent=True,
cascade="all",
cascade="save-update, merge, refresh-expire",
)

forked_to_mapping = relationship(
"Projects", back_populates="forked_from_mapping", cascade="all"
"Projects",
back_populates="forked_from_mapping",
cascade="save-update, merge, refresh-expire",
)

namespace_mapping: Mapped["User"] = relationship("User", back_populates="projects_mapping")

__table_args__ = (UniqueConstraint("namespace", "name", "tag"),)


Expand All @@ -133,6 +137,12 @@ class Samples(Base):
sample_name: Mapped[Optional[str]] = mapped_column()
guid: Mapped[Optional[str]] = mapped_column(nullable=False, unique=True)

submission_date: Mapped[datetime.datetime] = mapped_column(default=deliver_update_date)
last_update_date: Mapped[Optional[datetime.datetime]] = mapped_column(
default=deliver_update_date,
onupdate=deliver_update_date,
)

parent_guid: Mapped[Optional[str]] = mapped_column(
ForeignKey("samples.guid", ondelete="CASCADE"),
nullable=True,
Expand Down Expand Up @@ -172,12 +182,17 @@ class User(Base):
__tablename__ = "users"

id: Mapped[int] = mapped_column(primary_key=True)
namespace: Mapped[str]
namespace: Mapped[str] = mapped_column(nullable=False, unique=True)
stars_mapping: Mapped[List["Stars"]] = relationship(
back_populates="user_mapping",
cascade="all, delete-orphan",
order_by="Stars.star_date.desc()",
)
number_of_projects: Mapped[int] = mapped_column(default=0)

projects_mapping: Mapped[List["Projects"]] = relationship(
"Projects", back_populates="namespace_mapping"
)


class Stars(Base):
Expand Down
89 changes: 61 additions & 28 deletions pepdbagent/modules/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
from datetime import datetime, timedelta
from typing import List, Tuple, Union

from sqlalchemy import distinct, func, or_, select, text
from sqlalchemy import distinct, func, or_, select
from sqlalchemy.orm import Session
from sqlalchemy.sql.selectable import Select

from pepdbagent.const import DEFAULT_LIMIT, DEFAULT_LIMIT_INFO, DEFAULT_OFFSET, PKG_NAME
from pepdbagent.db_utils import BaseEngine, Projects
from pepdbagent.db_utils import BaseEngine, Projects, User
from pepdbagent.exceptions import NamespaceNotFoundError
from pepdbagent.models import (
ListOfNamespaceInfo,
Expand Down Expand Up @@ -172,9 +172,54 @@ def _add_condition(
)
return statement

# old function, that counts namespace info based on Projects table
# def info(self, limit: int = DEFAULT_LIMIT_INFO) -> ListOfNamespaceInfo:
# """
# Get list of top n namespaces in the database
#
# :param limit: limit of results (top namespace )
# :return: number_of_namespaces: int
# limit: int
# results: { namespace: str
# number_of_projects: int
# }
# """
# total_number_of_namespaces = self._count_namespace()
#
# statement = (
# select(
# func.count(Projects.namespace).label("number_of_projects"),
# Projects.namespace,
# )
# .select_from(Projects)
# .where(Projects.private.is_(False))
# .limit(limit)
# .order_by(text("number_of_projects desc"))
# .group_by(Projects.namespace)
# )
#
# with Session(self._sa_engine) as session:
# query_results = session.execute(statement).all()
#
# list_of_results = []
# for result in query_results:
# list_of_results.append(
# NamespaceInfo(
# namespace=result.namespace,
# number_of_projects=result.number_of_projects,
# )
# )
# return ListOfNamespaceInfo(
# number_of_namespaces=total_number_of_namespaces,
# limit=limit,
# results=list_of_results,
# )

def info(self, limit: int = DEFAULT_LIMIT_INFO) -> ListOfNamespaceInfo:
"""
Get list of top n namespaces in the database
! Warning: this function counts number of all projects in namespaces.
! it does not filter private projects (It was done for efficiency reasons)
:param limit: limit of results (top namespace )
:return: number_of_namespaces: int
Expand All @@ -183,36 +228,24 @@ def info(self, limit: int = DEFAULT_LIMIT_INFO) -> ListOfNamespaceInfo:
number_of_projects: int
}
"""
total_number_of_namespaces = self._count_namespace()

statement = (
select(
func.count(Projects.namespace).label("number_of_projects"),
Projects.namespace,
)
.select_from(Projects)
.where(Projects.private.is_(False))
.limit(limit)
.order_by(text("number_of_projects desc"))
.group_by(Projects.namespace)
)

with Session(self._sa_engine) as session:
query_results = session.execute(statement).all()
results = session.scalars(
select(User).limit(limit).order_by(User.number_of_projects.desc())
)

list_of_results = []
for result in query_results:
list_of_results.append(
NamespaceInfo(
namespace=result.namespace,
number_of_projects=result.number_of_projects,
list_of_results = []
for result in results:
list_of_results.append(
NamespaceInfo(
namespace=result.namespace,
number_of_projects=result.number_of_projects,
)
)
return ListOfNamespaceInfo(
number_of_namespaces=len(list_of_results),
limit=limit,
results=list_of_results,
)
return ListOfNamespaceInfo(
number_of_namespaces=total_number_of_namespaces,
limit=limit,
results=list_of_results,
)

def stats(self, namespace: str = None, monthly: bool = False) -> NamespaceStats:
"""
Expand Down
22 changes: 18 additions & 4 deletions pepdbagent/modules/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from sqlalchemy.orm.attributes import flag_modified

from pepdbagent.const import DEFAULT_TAG, DESCRIPTION_KEY, NAME_KEY, PEPHUB_SAMPLE_ID_KEY, PKG_NAME
from pepdbagent.db_utils import BaseEngine, Projects, Samples, Subsamples
from pepdbagent.db_utils import BaseEngine, Projects, Samples, Subsamples, User
from pepdbagent.exceptions import (
PEPDatabaseAgentError,
ProjectDuplicatedSampleGUIDsError,
Expand Down Expand Up @@ -208,8 +208,9 @@ def delete(
raise ProjectNotFoundError(
f"Can't delete unexciting project: '{namespace}/{name}:{tag}'."
)
with self._sa_engine.begin() as conn:
conn.execute(

with Session(self._sa_engine) as session:
session.execute(
delete(Projects).where(
and_(
Projects.namespace == namespace,
Expand All @@ -219,7 +220,11 @@ def delete(
)
)

_LOGGER.info(f"Project '{namespace}/{name}:{tag} was successfully deleted'")
statement = select(User).where(User.namespace == namespace)
user = session.scalar(statement)
if user:
user.number_of_projects -= 1
session.commit()

def delete_by_rp(
self,
Expand Down Expand Up @@ -352,6 +357,15 @@ def create(
self._add_subsamples_to_project(new_prj, subsamples)

with Session(self._sa_engine) as session:
user = session.scalar(select(User).where(User.namespace == namespace))

if not user:
user = User(namespace=namespace)
session.add(user)
session.commit()

user.number_of_projects += 1

session.add(new_prj)
session.commit()

Expand Down
1 change: 0 additions & 1 deletion pepdbagent/modules/updates.py

This file was deleted.

4 changes: 2 additions & 2 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
```txt
docker run --rm -it --name pep-db \
-e POSTGRES_USER=postgres \
-e POSTGRES_PASSWORD=docker \
-e POSTGRES_PASSWORD=pass8743hf9h23f87h437 \
-e POSTGRES_DB=pep-db \
-p 5432:5432 postgres
-p 127.0.0.1:5432:5432 postgres
```


Expand Down
17 changes: 16 additions & 1 deletion tests/test_namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ def test_annotation_private(self):
result = agent.namespace.get(admin="private_test")
assert len(result.results) == 4

def test_namespace_info(self):
@pytest.mark.skip(
"Skipping test because we are not taking into account the private projects (We are counting all of them)"
)
def test_namespace_info_private(self):
with PEPDBAgentContextManager(add_data=True) as agent:
agent.project.update(
namespace="private_test",
Expand All @@ -35,6 +38,18 @@ def test_namespace_info(self):
assert len(result.results) == 4
assert result.results[3].number_of_projects == 1

def test_namespace_info_all(self):
with PEPDBAgentContextManager(add_data=True) as agent:
agent.project.update(
namespace="private_test",
name="derive",
tag="default",
update_dict={"is_private": False},
)
result = agent.namespace.info()
assert len(result.results) == 4
assert result.results[3].number_of_projects == 6

def test_namespace_stats(self):
with PEPDBAgentContextManager(add_data=True) as agent:
stat_result = agent.namespace.stats(monthly=True)
Expand Down
8 changes: 5 additions & 3 deletions tests/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,7 @@ def test_overwrite_project(self, namespace, name):
"namespace, name",
[
["namespace1", "amendments1"],
["namespace1", "amendments2"],
["namespace2", "derive"],
["namespace2", "imply"],
],
)
def test_delete_project(self, namespace, name):
Expand All @@ -182,10 +180,14 @@ def test_delete_project(self, namespace, name):
with pytest.raises(ProjectNotFoundError, match="Project does not exist."):
agent.project.get(namespace=namespace, name=name, tag="default")

def test_delete_not_existing_project(self):
with PEPDBAgentContextManager(add_data=True) as agent:
with pytest.raises(ProjectNotFoundError, match="Project does not exist."):
agent.project.delete(namespace="namespace1", name="nothing", tag="default")

@pytest.mark.parametrize(
"namespace, name",
[
["namespace1", "amendments1"],
["namespace1", "amendments2"],
["namespace2", "derive"],
["namespace2", "imply"],
Expand Down
2 changes: 1 addition & 1 deletion tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from pepdbagent import PEPDatabaseAgent

DSN = "postgresql+psycopg://postgres:docker@localhost:5432/pep-db"
DSN = "postgresql+psycopg://postgres:pass8743hf9h23f87h437@localhost:5432/pep-db"

DATA_PATH = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
Expand Down

0 comments on commit 2137f1f

Please sign in to comment.