Skip to content

Commit

Permalink
Merge pull request #123 from pepkit/dev
Browse files Browse the repository at this point in the history
release 0.7.2
  • Loading branch information
khoroshevskyi authored Feb 2, 2024
2 parents 9100a7a + 82e3c97 commit f5257a1
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 26 deletions.
3 changes: 2 additions & 1 deletion docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.

## [0.7.2] -- 2023-02-XX
## [0.7.2] -- 2023-02-02
- Fixed Unique Constraint in the Views
- Fixed update project pop method
- Fixed bug in duplicating samples


## [0.7.1] -- 2023-01-22
Expand Down
14 changes: 14 additions & 0 deletions pepdbagent/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ def __init__(self, msg=""):
super().__init__(f"""Sample does not exist. {msg}""")


class SampleAlreadyExistsError(PEPDatabaseAgentError):
def __init__(self, msg=""):
super().__init__(f"""Sample already exists. {msg}""")


class ViewNotFoundError(PEPDatabaseAgentError):
def __init__(self, msg=""):
super().__init__(f"""View does not exist. {msg}""")
Expand All @@ -73,3 +78,12 @@ class SampleAlreadyInView(PEPDatabaseAgentError):

def __init__(self, msg=""):
super().__init__(f"""Sample is already in the view. {msg}""")


class ViewAlreadyExistsError(PEPDatabaseAgentError):
"""
View is already in the project exception
"""

def __init__(self, msg=""):
super().__init__(f"""View already in the project. {msg}""")
109 changes: 91 additions & 18 deletions pepdbagent/modules/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,17 @@ def update(
else:
raise ProjectNotFoundError("No items will be updated!")

@staticmethod
def _find_duplicates(sample_name_list: List[str]) -> List[str]:
seen = set()
duplicates = set()
for name in sample_name_list:
if name in seen:
duplicates.add(name)
else:
seen.add(name)
return list(duplicates)

def _update_samples(
self,
namespace: str,
Expand All @@ -527,6 +538,8 @@ def _update_samples(
:param sample_name_key: key of the sample name
:return: None
"""
# TODO: This function is not ideal and is really slow. We should brainstorm this implementation

new_sample_names = [sample[sample_name_key] for sample in samples_list]
with Session(self._sa_engine) as session:
project = session.scalar(
Expand All @@ -537,38 +550,98 @@ def _update_samples(
)
)
old_sample_names = [sample.sample_name for sample in project.samples_mapping]

# delete samples that are not in the new list
sample_names_copy = new_sample_names.copy()
for old_sample in old_sample_names:
if old_sample not in new_sample_names:
session.execute(
delete(Samples).where(
if old_sample not in sample_names_copy:
this_sample = session.scalars(
select(Samples).where(
and_(
Samples.sample_name == old_sample, Samples.project_id == project.id
)
)
)
delete_samples_list = [k for k in this_sample]
session.delete(delete_samples_list[-1])
else:
sample_names_copy.remove(old_sample)

# update or add samples
order_number = 0
added_sample_list = []
for new_sample in samples_list:
order_number += 1
if new_sample[sample_name_key] not in old_sample_names:
project.samples_mapping.append(
Samples(
sample=new_sample,
sample_name=new_sample[sample_name_key],
row_number=order_number,

if new_sample[sample_name_key] not in added_sample_list:
added_sample_list.append(new_sample[sample_name_key])

if new_sample[sample_name_key] not in old_sample_names:
project.samples_mapping.append(
Samples(
sample=new_sample,
sample_name=new_sample[sample_name_key],
row_number=order_number,
)
)
)
else:
sample_mapping = session.scalar(
select(Samples).where(
and_(
Samples.sample_name == new_sample[sample_name_key],
Samples.project_id == project.id,
)
)
)
sample_mapping.sample = new_sample
sample_mapping.row_number = order_number
else:
sample_mapping = session.scalar(
select(Samples).where(
and_(
Samples.sample_name == new_sample[sample_name_key],
Samples.project_id == project.id,
# if sample_name is duplicated is sample table, find second sample and update or add it.
if new_sample[sample_name_key] in old_sample_names:
sample_mappings = session.scalars(
select(Samples).where(
and_(
Samples.sample_name == new_sample[sample_name_key],
Samples.project_id == project.id,
)
)
)
)
sample_mapping.sample = new_sample
sample_mapping.row_number = order_number
sample_mappings = [sample_mapping for sample_mapping in sample_mappings]
if len(sample_mappings) <= 1:
project.samples_mapping.append(
Samples(
sample=new_sample,
sample_name=new_sample[sample_name_key],
row_number=order_number,
)
)
else:
try:
sample_mapping = sample_mappings[
added_sample_list.count(new_sample[sample_name_key])
]
sample_mapping.sample = new_sample
sample_mapping.row_number = order_number

except Exception:
project.samples_mapping.append(
Samples(
sample=new_sample,
sample_name=new_sample[sample_name_key],
row_number=order_number,
)
)
added_sample_list.append(new_sample[sample_name_key])
else:
project.samples_mapping.append(
Samples(
sample=new_sample,
sample_name=new_sample[sample_name_key],
row_number=order_number,
)
)
added_sample_list.append(new_sample[sample_name_key])

session.commit()

@staticmethod
Expand Down
4 changes: 2 additions & 2 deletions pepdbagent/modules/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
DEFAULT_TAG,
PKG_NAME,
)
from pepdbagent.exceptions import SampleNotFoundError
from pepdbagent.exceptions import SampleNotFoundError, SampleAlreadyExistsError

from pepdbagent.db_utils import BaseEngine, Samples, Projects

Expand Down Expand Up @@ -241,7 +241,7 @@ def add(
)

if sample_mapping and not overwrite:
raise ValueError(
raise SampleAlreadyExistsError(
f"Sample {namespace}/{name}:{tag}?{sample_name} already exists in the database"
)
elif sample_mapping and overwrite:
Expand Down
20 changes: 16 additions & 4 deletions pepdbagent/modules/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
SampleAlreadyInView,
ProjectNotFoundError,
SampleNotFoundError,
ViewAlreadyExistsError,
)

from pepdbagent.db_utils import BaseEngine, Samples, Projects, Views, ViewSampleAssociation
Expand Down Expand Up @@ -188,7 +189,13 @@ def create(
raise SampleNotFoundError(
f"Sample {view_dict.project_namespace}/{view_dict.project_name}:{view_dict.project_tag}:{sample_name} does not exist"
)
sa_session.add(ViewSampleAssociation(sample_id=sample_id, view=view))
try:
sa_session.add(ViewSampleAssociation(sample_id=sample_id, view=view))

except IntegrityError:
raise ViewAlreadyExistsError(
f"View {view_name} of the project {view_dict.project_namespace}/{view_dict.project_name}:{view_dict.project_tag} already exists"
)

sa_session.commit()

Expand Down Expand Up @@ -334,10 +341,15 @@ def remove_sample(
ViewSampleAssociation.view_id == view.id,
)
)
sa_session.execute(delete_statement)
sa_session.commit()
try:
sa_session.execute(delete_statement)
sa_session.commit()
except IntegrityError:
raise SampleNotFoundError(
f"Sample {namespace}/{name}:{tag}:{sample_name} does not exist in view {view_name}"
)

return None
return None

def get_snap_view(
self, namespace: str, name: str, tag: str, sample_name_list: List[str], raw: bool = False
Expand Down
39 changes: 38 additions & 1 deletion tests/test_pepagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def test_update_project_description(
"namespace, name",
[
["namespace1", "amendments1"],
# ["namespace1", "amendments2"],
["namespace3", "subtable1"],
# ["namespace2", "derive"],
# ["namespace2", "imply"],
],
Expand Down Expand Up @@ -384,6 +384,43 @@ def test_update_project_private(self, initiate_pepdb_con, namespace, name):
)
assert is_private is True

@pytest.mark.parametrize(
"namespace, name",
[
["namespace1", "basic"],
],
)
def test_project_can_have_2_sample_names(self, initiate_pepdb_con, namespace, name):
"""
In PEP 2.1.0 project can have 2 rows with same sample name,
ensure that update works correctly
"""
new_prj = initiate_pepdb_con.project.get(namespace=namespace, name=name)
prj_dict = new_prj.to_dict(extended=True, orient="records")

prj_dict["_sample_dict"].append(
{"file": "data/frog23_data.txt", "protocol": "anySample3Type", "sample_name": "frog_2"}
)
prj_dict["_sample_dict"].append(
{
"file": "data/frog23_data.txt4",
"protocol": "anySample3Type4",
"sample_name": "frog_2",
}
)

new_prj.description = "new_description"
initiate_pepdb_con.project.update(
namespace=namespace,
name=name,
tag="default",
update_dict={"project": peppy.Project.from_dict(prj_dict)},
)

prj = initiate_pepdb_con.project.get(namespace=namespace, name=name, raw=True)

assert len(prj["_sample_dict"]) == 4


@pytest.mark.skipif(
not db_setup(),
Expand Down

0 comments on commit f5257a1

Please sign in to comment.