Skip to content

Commit

Permalink
WIP: sample sheets...
Browse files Browse the repository at this point in the history
  • Loading branch information
jmchilton committed Dec 11, 2024
1 parent 2cb4561 commit cbdfc69
Show file tree
Hide file tree
Showing 23 changed files with 628 additions and 15 deletions.
3 changes: 3 additions & 0 deletions client/src/api/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -312,4 +312,7 @@ export type ObjectExportTaskResponse = components["schemas"]["ObjectExportTaskRe
export type ExportObjectRequestMetadata = components["schemas"]["ExportObjectRequestMetadata"];
export type ExportObjectResultMetadata = components["schemas"]["ExportObjectResultMetadata"];

export type SampleSheetColumnDefinition = components["schemas"]["SampleSheetColumnDefinition"];
export type SampleSheetColumnDefinitions = SampleSheetColumnDefinition[] | null;

export type AsyncTaskResultSummary = components["schemas"]["AsyncTaskResultSummary"];
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ const collectionTypeOptions = [
{ value: "list", label: "List of Datasets" },
{ value: "paired", label: "Dataset Pair" },
{ value: "list:paired", label: "List of Dataset Pairs" },
{ value: "sample_sheet", label: "Sample Sheet of Datasets" },
];
function updateValue(newValue: string | undefined) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
<script setup lang="ts">
import { computed, toRef } from "vue";
import { type SampleSheetColumnDefinitions } from "@/api";
import type { DatatypesMapperModel } from "@/components/Datatypes/model";
import type { Step } from "@/stores/workflowStepStore";
import { useToolState } from "../composables/useToolState";
import FormElement from "@/components/Form/FormElement.vue";
import FormCollectionType from "@/components/Workflow/Editor/Forms/FormCollectionType.vue";
import FormColumnDefinitions from "@/components/Workflow/Editor/Forms/FormColumnDefinitions.vue";
import FormDatatype from "@/components/Workflow/Editor/Forms/FormDatatype.vue";
interface ToolState {
collection_type: string | null;
optional: boolean;
format: string | null;
tag: string | null;
column_definitions: SampleSheetColumnDefinitions;
}
const props = defineProps<{
Expand All @@ -34,6 +37,7 @@ function cleanToolState(): ToolState {
optional: false,
tag: null,
format: null,
column_definitions: null,
};
}
}
Expand Down Expand Up @@ -64,6 +68,13 @@ function onCollectionType(newCollectionType: string | null) {
emit("onChange", state);
}
function onColumnDefinitions(newColumnDefinitions: SampleSheetColumnDefinitions) {
const state = cleanToolState();
console.log(newColumnDefinitions);
state.column_definitions = newColumnDefinitions;
emit("onChange", state);
}
const formatsAsList = computed(() => {
const formatStr = toolState.value?.format as string | null;
if (formatStr) {
Expand Down Expand Up @@ -97,5 +108,9 @@ emit("onChange", cleanToolState());
type="text"
help="Tags to automatically filter inputs"
@input="onTags" />
<FormColumnDefinitions
v-if="toolState?.collection_type == 'sample_sheet'"
:value="toolState?.column_definitions"
@onChange="onColumnDefinitions" />
</div>
</template>
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ export class CollectionTypeDescription implements CollectionTypeDescriptor {
}
}

const collectionTypeRegex = /^(list|paired)(:(list|paired))*$/;
const collectionTypeRegex = /^((list|paired)(:(list|paired))*|sample_sheet)$/;

export function isValidCollectionTypeStr(collectionType: string | undefined) {
if (collectionType) {
Expand Down
17 changes: 16 additions & 1 deletion lib/galaxy/managers/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ def create(
completed_job=None,
output_name=None,
fields=None,
column_definitions=None,
rows=None,
):
"""
PRECONDITION: security checks on ability to add to parent
Expand All @@ -201,6 +203,8 @@ def create(
copy_elements=copy_elements,
history=history,
fields=fields,
column_definitions=column_definitions,
rows=rows,
)

implicit_inputs = []
Expand Down Expand Up @@ -288,6 +292,8 @@ def create_dataset_collection(
copy_elements=False,
history=None,
fields=None,
column_definitions=None,
rows=None,
):
# Make sure at least one of these is None.
assert element_identifiers is None or elements is None
Expand Down Expand Up @@ -324,9 +330,12 @@ def create_dataset_collection(

if elements is not self.ELEMENTS_UNINITIALIZED:
type_plugin = collection_type_description.rank_type_plugin()
dataset_collection = builder.build_collection(type_plugin, elements, fields=fields)
dataset_collection = builder.build_collection(
type_plugin, elements, fields=fields, column_definitions=column_definitions, rows=rows
)
else:
# TODO: Pass fields here - need test case first.
# TODO: same with column definitions I think.
dataset_collection = model.DatasetCollection(populated=False)
dataset_collection.collection_type = collection_type
return dataset_collection
Expand Down Expand Up @@ -783,10 +792,16 @@ def __init_rule_data(self, elements, collection_type_description, parent_identif
identifiers = parent_identifiers + [element.element_identifier]
if not element.is_collection:
data.append([])
columns = None
collection_type_str = collection_type_description.collection_type
if collection_type_str == "sample_sheet":
columns = element.columns
assert isinstance(columns, list)
source = {
"identifiers": identifiers,
"dataset": element_object,
"tags": element_object.make_tag_string_list(),
"columns": columns,
}
sources.append(source)
else:
Expand Down
6 changes: 6 additions & 0 deletions lib/galaxy/managers/collections_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
exceptions,
model,
)
from galaxy.model.dataset_collections.types.sample_sheet_util import validate_column_definitions
from galaxy.util import string_as_bool

log = logging.getLogger(__name__)
Expand All @@ -33,13 +34,18 @@ def api_payload_to_create_params(payload):
message = f"Missing required parameters {missing_parameters}"
raise exceptions.ObjectAttributeMissingException(message)

column_definitions = payload.get("column_definitions", None)
validate_column_definitions(column_definitions)

params = dict(
collection_type=payload.get("collection_type"),
element_identifiers=payload.get("element_identifiers"),
name=payload.get("name", None),
hide_source_items=string_as_bool(payload.get("hide_source_items", False)),
copy_elements=string_as_bool(payload.get("copy_elements", False)),
fields=payload.get("fields", None),
column_definitions=column_definitions,
rows=payload.get("rows", None),
)
return params

Expand Down
16 changes: 14 additions & 2 deletions lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,8 @@
DatasetValidatedState,
InvocationsStateCounts,
JobState,
SampleSheetColumnDefinitions,
SampleSheetRow,
ToolRequestState,
)
from galaxy.schema.workflow.comments import WorkflowCommentModel
Expand Down Expand Up @@ -260,6 +262,7 @@ class ConfigurationTemplateEnvironmentVariable(TypedDict):
CONFIGURATION_TEMPLATE_CONFIGURATION_VARIABLES_TYPE = Dict[str, CONFIGURATION_TEMPLATE_CONFIGURATION_VALUE_TYPE]
CONFIGURATION_TEMPLATE_CONFIGURATION_SECRET_NAMES_TYPE = List[str]
CONFIGURATION_TEMPLATE_DEFINITION_TYPE = Dict[str, Any]
DATA_COLLECTION_FIELDS = List[Dict[str, Any]]


class TransformAction(TypedDict):
Expand Down Expand Up @@ -6521,6 +6524,10 @@ class DatasetCollection(Base, Dictifiable, UsesAnnotations, Serializable):
element_count: Mapped[Optional[int]]
create_time: Mapped[datetime] = mapped_column(default=now, nullable=True)
update_time: Mapped[datetime] = mapped_column(default=now, onupdate=now, nullable=True)
# if collection_type is 'record' (heterogenous collection)
fields: Mapped[Optional[DATA_COLLECTION_FIELDS]] = mapped_column(JSONType)
# if collection_type is 'sample_sheet' (collection of rows that datasets with extra column metadata)
column_definitions: Mapped[Optional[SampleSheetColumnDefinitions]] = mapped_column(JSONType)

elements: Mapped[List["DatasetCollectionElement"]] = relationship(
primaryjoin=(lambda: DatasetCollection.id == DatasetCollectionElement.dataset_collection_id),
Expand All @@ -6540,14 +6547,15 @@ def __init__(
populated=True,
element_count=None,
fields=None,
column_definitions=None,
):
self.id = id
self.collection_type = collection_type
if not populated:
self.populated_state = DatasetCollection.populated_states.NEW
self.element_count = element_count
# TODO: persist fields...
self.fields = fields
self.column_definitions = column_definitions

def _build_nested_collection_attributes_stmt(
self,
Expand Down Expand Up @@ -6956,6 +6964,7 @@ def _base_to_dict(self, view):
name=self.name,
collection_id=self.collection_id,
collection_type=self.collection.collection_type,
column_definitions=self.collection.column_definitions,
populated=self.populated,
populated_state=self.collection.populated_state,
populated_state_message=self.collection.populated_state_message,
Expand Down Expand Up @@ -7443,6 +7452,7 @@ class DatasetCollectionElement(Base, Dictifiable, Serializable):
# Element index and identifier to define this parent-child relationship.
element_index: Mapped[Optional[int]]
element_identifier: Mapped[Optional[str]] = mapped_column(Unicode(255))
columns: Mapped[Optional[SampleSheetRow]] = mapped_column(JSONType)

hda = relationship(
"HistoryDatasetAssociation",
Expand All @@ -7463,7 +7473,7 @@ class DatasetCollectionElement(Base, Dictifiable, Serializable):

# actionable dataset id needs to be available via API...
dict_collection_visible_keys = ["id", "element_type", "element_index", "element_identifier"]
dict_element_visible_keys = ["id", "element_type", "element_index", "element_identifier"]
dict_element_visible_keys = ["id", "element_type", "element_index", "element_identifier", "columns"]

UNINITIALIZED_ELEMENT = object()

Expand All @@ -7474,6 +7484,7 @@ def __init__(
element=None,
element_index=None,
element_identifier=None,
columns: Optional[SampleSheetRow] = None,
):
if isinstance(element, HistoryDatasetAssociation):
self.hda = element
Expand All @@ -7489,6 +7500,7 @@ def __init__(
self.collection = collection
self.element_index = element_index
self.element_identifier = element_identifier or str(element_index)
self.columns = columns

def __strict_check_before_flush__(self):
if self.collection.populated_optimized:
Expand Down
25 changes: 20 additions & 5 deletions lib/galaxy/model/dataset_collections/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,42 @@
from .type_description import COLLECTION_TYPE_DESCRIPTION_FACTORY


def build_collection(type, dataset_instances, collection=None, associated_identifiers=None, fields=None):
def build_collection(
type,
dataset_instances,
collection=None,
associated_identifiers=None,
fields=None,
column_definitions=None,
rows=None,
):
"""
Build DatasetCollection with populated DatasetcollectionElement objects
corresponding to the supplied dataset instances or throw exception if
this is not a valid collection of the specified type.
"""
dataset_collection = collection or model.DatasetCollection(fields=fields)
dataset_collection = collection or model.DatasetCollection(fields=fields, column_definitions=column_definitions)
associated_identifiers = associated_identifiers or set()
set_collection_elements(dataset_collection, type, dataset_instances, associated_identifiers, fields=fields)
set_collection_elements(
dataset_collection, type, dataset_instances, associated_identifiers, fields=fields, rows=rows
)
return dataset_collection


def set_collection_elements(dataset_collection, type, dataset_instances, associated_identifiers, fields=None):
def set_collection_elements(
dataset_collection, type, dataset_instances, associated_identifiers, fields=None, rows=None
):
new_element_keys = OrderedSet(dataset_instances.keys()) - associated_identifiers
new_dataset_instances = {k: dataset_instances[k] for k in new_element_keys}
dataset_collection.element_count = dataset_collection.element_count or 0
element_index = dataset_collection.element_count
elements = []
if fields == "auto":
fields = guess_fields(dataset_instances)
for element in type.generate_elements(new_dataset_instances, fields=fields):
column_definitions = dataset_collection.column_definitions
for element in type.generate_elements(
new_dataset_instances, fields=fields, rows=rows, column_definitions=column_definitions
):
element.element_index = element_index
add_object_to_object_session(element, dataset_collection)
element.collection = dataset_collection
Expand Down
2 changes: 2 additions & 0 deletions lib/galaxy/model/dataset_collections/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
list,
paired,
record,
sample_sheet,
)

PLUGIN_CLASSES = [
list.ListDatasetCollectionType,
paired.PairedDatasetCollectionType,
record.RecordDatasetCollectionType,
sample_sheet.SampleSheetDatasetCollectionType,
]


Expand Down
30 changes: 30 additions & 0 deletions lib/galaxy/model/dataset_collections/types/sample_sheet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from galaxy.exceptions import RequestParameterMissingException
from galaxy.model import DatasetCollectionElement
from . import BaseDatasetCollectionType
from .sample_sheet_util import validate_row


class SampleSheetDatasetCollectionType(BaseDatasetCollectionType):
"""A flat list of named elements starting rows with column metadata."""

collection_type = "sample_sheet"

def generate_elements(self, dataset_instances, **kwds):
rows = kwds.get("rows", None)
column_definitions = kwds.get("column_definitions", None)
if rows is None:
raise RequestParameterMissingException(
"Missing or null parameter 'rows' required for 'sample_sheet' collection types."
)
if len(dataset_instances) != len(rows):
self._validation_failed("Supplied element do not match 'rows'.")

for identifier, element in dataset_instances.items():
columns = rows[identifier]
validate_row(columns, column_definitions)
association = DatasetCollectionElement(
element=element,
element_identifier=identifier,
columns=columns,
)
yield association
Loading

0 comments on commit cbdfc69

Please sign in to comment.