Skip to content

Commit

Permalink
Merge pull request #14 from linkml/pv-multi-source
Browse files Browse the repository at this point in the history
Allow non 1:1 mappings for PVs
  • Loading branch information
cmungall authored Jan 3, 2024
2 parents 6e40c27 + 290d94a commit 265f3ed
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 12 deletions.
2 changes: 0 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,6 @@ $(DOCDIR):
mkdir -p $@

gendoc: $(DOCDIR)
cp $(SRC)/docs/*md $(DOCDIR) ; \
cp -pr $(SRC)/docs/img $(DOCDIR) ; \
$(RUN) gen-doc -d $(DOCDIR) $(SOURCE_SCHEMA_PATH)

testdoc: gendoc serve
Expand Down
16 changes: 15 additions & 1 deletion src/linkml_transformer/datamodel/transformer_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ class ElementDerivation(SpecificationComponent):
default_factory=dict,
description="""A mapping table in which the keys and values are expressions""",
)
mirror_source: Optional[bool] = Field(None)
description: Optional[str] = Field(
None, description="""description of the specification component"""
)
Expand All @@ -154,6 +155,7 @@ class ClassDerivation(ElementDerivation):
populated_from: Optional[str] = Field(
None, description="""Name of the class in the source schema"""
)
sources: Optional[List[str]] = Field(default_factory=list)
joins: Optional[Dict[str, AliasedClass]] = Field(
default_factory=dict,
description="""Additional classes to be joined to derive instances of the target class""",
Expand All @@ -175,6 +177,7 @@ class ClassDerivation(ElementDerivation):
default_factory=dict,
description="""A mapping table in which the keys and values are expressions""",
)
mirror_source: Optional[bool] = Field(None)
description: Optional[str] = Field(
None, description="""description of the specification component"""
)
Expand Down Expand Up @@ -204,6 +207,11 @@ class SlotDerivation(ElementDerivation):

name: str = Field(..., description="""Target slot name""")
populated_from: Optional[str] = Field(None, description="""Source slot name""")
sources: Optional[List[str]] = Field(default_factory=list)
derived_from: Optional[List[str]] = Field(
default_factory=list,
description="""Source slots that are used to derive this slot. This can be computed from the expr, if the expr is declarative.""",
)
expr: Optional[str] = Field(
None,
description="""An expression to be evaluated on the source object to derive the target slot. Should be specified using the LinkML expression language.""",
Expand Down Expand Up @@ -233,6 +241,7 @@ class SlotDerivation(ElementDerivation):
default_factory=dict,
description="""A mapping table in which the keys and values are expressions""",
)
mirror_source: Optional[bool] = Field(None)
description: Optional[str] = Field(
None, description="""description of the specification component"""
)
Expand All @@ -253,6 +262,7 @@ class EnumDerivation(ElementDerivation):

name: str = Field(..., description="""Target enum name""")
populated_from: Optional[str] = Field(None, description="""Source enum name""")
sources: Optional[List[str]] = Field(default_factory=list)
expr: Optional[str] = Field(
None,
description="""An expression to be evaluated on the source object to derive the target slot. Should be specified using the LinkML expression language.""",
Expand All @@ -277,6 +287,7 @@ class EnumDerivation(ElementDerivation):
default_factory=dict,
description="""A mapping table in which the keys and values are expressions""",
)
mirror_source: Optional[bool] = Field(None)
description: Optional[str] = Field(
None, description="""description of the specification component"""
)
Expand All @@ -298,6 +309,7 @@ class PermissibleValueDerivation(ElementDerivation):
name: str = Field(..., description="""Target permissible value text""")
expr: Optional[str] = Field(None)
populated_from: Optional[str] = Field(None, description="""Source permissible value""")
sources: Optional[List[str]] = Field(default_factory=list)
hide: Optional[bool] = Field(None)
copy_directives: Optional[Dict[str, CopyDirective]] = Field(default_factory=dict)
overrides: Optional[Any] = Field(None, description="""overrides source schema slots""")
Expand All @@ -314,6 +326,7 @@ class PermissibleValueDerivation(ElementDerivation):
default_factory=dict,
description="""A mapping table in which the keys and values are expressions""",
)
mirror_source: Optional[bool] = Field(None)
description: Optional[str] = Field(
None, description="""description of the specification component"""
)
Expand Down Expand Up @@ -344,6 +357,7 @@ class PrefixDerivation(ElementDerivation):
default_factory=dict,
description="""A mapping table in which the keys and values are expressions""",
)
mirror_source: Optional[bool] = Field(None)
description: Optional[str] = Field(
None, description="""description of the specification component"""
)
Expand Down Expand Up @@ -376,7 +390,7 @@ class StringificationConfiguration(ConfiguredBaseModel):

class Inverse(ConfiguredBaseModel):
"""
Used for back references
Used for back references in mapping to relational model
"""

slot_name: Optional[str] = Field(None)
Expand Down
14 changes: 14 additions & 0 deletions src/linkml_transformer/datamodel/transformer_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ classes:
inlined: true
description: >-
A mapping table in which the keys and values are expressions
mirror_source:
range: boolean


ClassDerivation:
Expand All @@ -162,6 +164,9 @@ classes:
populated_from:
range: ClassReference
description: Name of the class in the source schema
sources:
range: ClassReference
multivalued: true
joins:
range: AliasedClass
multivalued: true
Expand Down Expand Up @@ -193,6 +198,9 @@ classes:
populated_from:
range: SlotReference
description: Source slot name
sources:
range: SlotReference
multivalued: true
derived_from:
range: SlotReference
multivalued: true
Expand Down Expand Up @@ -235,6 +243,9 @@ classes:
populated_from:
range: EnumReference
description: Source enum name
sources:
range: EnumReference
multivalued: true
expr:
range: string
description: >-
Expand Down Expand Up @@ -263,6 +274,9 @@ classes:
populated_from:
range: string
description: Source permissible value
sources:
range: string
multivalued: true
hide:
range: boolean

Expand Down
15 changes: 13 additions & 2 deletions src/linkml_transformer/inference/schema_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,19 @@ def _derive_enum(self, enum_derivation: EnumDerivation) -> ClassDefinition:
target_enum.attributes = {}
target_enum.slot_usage = {}
for pv_derivation in enum_derivation.permissible_value_derivations.values():
pv = PermissibleValue(text=pv_derivation.populated_from)
target_enum.permissible_values[pv.text] = pv
if pv_derivation.populated_from:
pv = PermissibleValue(text=pv_derivation.populated_from)
target_enum.permissible_values[pv.text] = pv
elif pv_derivation.sources:
for source in pv_derivation.sources:
pv = PermissibleValue(text=source)
target_enum.permissible_values[pv.text] = pv
else:
raise ValueError(f"Missing populated_from or sources for {pv_derivation}")
if enum_derivation.mirror_source:
for pv in source_enum.permissible_values.values():
if pv.text not in target_enum.permissible_values:
target_enum.permissible_values[pv.text] = copy(pv)
self.source_to_target_class_mappings[populated_from].append(target_enum.name)
return target_enum

Expand Down
23 changes: 22 additions & 1 deletion src/linkml_transformer/transformer/object_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,22 @@ def transform(
elif slot_derivation.populated_from:
v = source_obj.get(slot_derivation.populated_from, None)
source_class_slot = sv.induced_slot(slot_derivation.populated_from, source_type)
logger.debug(
f"Pop slot {slot_derivation.name} => {v} using {slot_derivation.populated_from} // {source_obj}"
)
elif slot_derivation.sources:
vmap = {s: source_obj.get(s, None) for s in slot_derivation.sources}
vmap = {k: v for k, v in vmap.items() if v is not None}
if len(vmap.keys()) > 1:
raise ValueError(f"Multiple sources for {slot_derivation.name}: {vmap}")
elif len(vmap.keys()) == 1:
v = list(vmap.values())[0]
source_class_slot_name = list(vmap.keys())[0]
source_class_slot = sv.induced_slot(source_class_slot_name, source_type)
else:
v = None
source_class_slot = None

logger.debug(
f"Pop slot {slot_derivation.name} => {v} using {slot_derivation.populated_from} // {source_obj}"
)
Expand Down Expand Up @@ -321,4 +337,9 @@ def transform_enum(self, source_value: str, enum_name: str, source_obj: Any) ->
for pv_deriv in enum_deriv.permissible_value_derivations.values():
if source_value == pv_deriv.populated_from:
return pv_deriv.name
return str(source_value)
if source_value in pv_deriv.sources:
return pv_deriv.name
if enum_deriv.mirror_source:
return str(source_value)
else:
return None
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ class_derivations:
enum_derivations:
MyFamilialRelationshipType:
populated_from: FamilialRelationshipType
mirror_source: true
permissible_value_derivations:
SIBLING_OF:
populated_from: SIBLING_OF
Expand Down
32 changes: 26 additions & 6 deletions tests/test_compliance/test_compliance_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ def map_object(
print("**Target Schema (Derived)**:\n\n")
print_yaml(target_schema)
if target_object is not None:
# remove `foo: None` entries
target_object = {k: v for k, v in target_object.items() if v is not None}
ensure_validates(target_sv.schema, target_object)
if invertible and target_object is not None:
inverter = TransformationSpecificationInverter(
Expand Down Expand Up @@ -842,12 +844,16 @@ def test_join(invocation_tracker, source_object, target_object, inlined):


@pytest.mark.parametrize(
"source_value,mapping,target_value",
"source_value,mapping,target_value,mirror_source",
[
("A", {"B": "A"}, "B"),
("A", {"B": "A"}, "B", False),
("Z", {"B": "A"}, None, False),
("C", {"B": "A"}, "C", True),
("A", {"B": ["A", "C"]}, "B", False),
("C", {"B": ["A", "C"]}, "B", False),
],
)
def test_map_enum(invocation_tracker, source_value, mapping, target_value):
def test_map_enum(invocation_tracker, source_value, mapping, target_value, mirror_source):
"""
Test mapping between enum values.
Expand All @@ -872,7 +878,7 @@ def test_map_enum(invocation_tracker, source_value, mapping, target_value):
}
}
}
enums = {"E": {"permissible_values": ["A", "B"]}}
enums = {"E": {"permissible_values": ["A", "B", "C"]}}
schema = build_schema("enums", classes=classes, enums=enums)
source_sv = SchemaView(schema)
cds = {
Expand All @@ -884,20 +890,34 @@ def test_map_enum(invocation_tracker, source_value, mapping, target_value):
}
}
}
pv_derivs = {}
invertible = True
for k, v in mapping.items():
if isinstance(v, list):
pv_derivs[k] = {"sources": v}
invertible = False
else:
pv_derivs[k] = {"populated_from": v}

eds = {
"E": {
"populated_from": "E",
"permissible_value_derivations": {k: {"populated_from": v} for k, v in mapping.items()},
"mirror_source": mirror_source,
"permissible_value_derivations": pv_derivs,
},
}
spec = build_transformer(class_derivations=cds, enum_derivations=eds)
source_object = {"s1": source_value}
if target_value is None:
invertible = False
if mirror_source:
pytest.skip("TODO: mirror_source")
map_object(
spec=spec,
source_object=source_object,
expected_target_object={"s1": target_value},
source_sv=source_sv,
invertible=True,
invertible=invertible,
)


Expand Down

0 comments on commit 265f3ed

Please sign in to comment.