diff --git a/Makefile b/Makefile index 46d2067..4cb9c6e 100644 --- a/Makefile +++ b/Makefile @@ -95,8 +95,6 @@ $(DOCDIR): mkdir -p $@ gendoc: $(DOCDIR) - cp $(SRC)/docs/*md $(DOCDIR) ; \ - cp -pr $(SRC)/docs/img $(DOCDIR) ; \ $(RUN) gen-doc -d $(DOCDIR) $(SOURCE_SCHEMA_PATH) testdoc: gendoc serve diff --git a/src/linkml_transformer/datamodel/transformer_model.py b/src/linkml_transformer/datamodel/transformer_model.py index 7aa5246..e933abc 100644 --- a/src/linkml_transformer/datamodel/transformer_model.py +++ b/src/linkml_transformer/datamodel/transformer_model.py @@ -133,6 +133,7 @@ class ElementDerivation(SpecificationComponent): default_factory=dict, description="""A mapping table in which the keys and values are expressions""", ) + mirror_source: Optional[bool] = Field(None) description: Optional[str] = Field( None, description="""description of the specification component""" ) @@ -154,6 +155,7 @@ class ClassDerivation(ElementDerivation): populated_from: Optional[str] = Field( None, description="""Name of the class in the source schema""" ) + sources: Optional[List[str]] = Field(default_factory=list) joins: Optional[Dict[str, AliasedClass]] = Field( default_factory=dict, description="""Additional classes to be joined to derive instances of the target class""", @@ -175,6 +177,7 @@ class ClassDerivation(ElementDerivation): default_factory=dict, description="""A mapping table in which the keys and values are expressions""", ) + mirror_source: Optional[bool] = Field(None) description: Optional[str] = Field( None, description="""description of the specification component""" ) @@ -204,6 +207,11 @@ class SlotDerivation(ElementDerivation): name: str = Field(..., description="""Target slot name""") populated_from: Optional[str] = Field(None, description="""Source slot name""") + sources: Optional[List[str]] = Field(default_factory=list) + derived_from: Optional[List[str]] = Field( + default_factory=list, + description="""Source slots that are used to derive this slot. This can be computed from the expr, if the expr is declarative.""", + ) expr: Optional[str] = Field( None, description="""An expression to be evaluated on the source object to derive the target slot. Should be specified using the LinkML expression language.""", @@ -233,6 +241,7 @@ class SlotDerivation(ElementDerivation): default_factory=dict, description="""A mapping table in which the keys and values are expressions""", ) + mirror_source: Optional[bool] = Field(None) description: Optional[str] = Field( None, description="""description of the specification component""" ) @@ -253,6 +262,7 @@ class EnumDerivation(ElementDerivation): name: str = Field(..., description="""Target enum name""") populated_from: Optional[str] = Field(None, description="""Source enum name""") + sources: Optional[List[str]] = Field(default_factory=list) expr: Optional[str] = Field( None, description="""An expression to be evaluated on the source object to derive the target slot. Should be specified using the LinkML expression language.""", @@ -277,6 +287,7 @@ class EnumDerivation(ElementDerivation): default_factory=dict, description="""A mapping table in which the keys and values are expressions""", ) + mirror_source: Optional[bool] = Field(None) description: Optional[str] = Field( None, description="""description of the specification component""" ) @@ -298,6 +309,7 @@ class PermissibleValueDerivation(ElementDerivation): name: str = Field(..., description="""Target permissible value text""") expr: Optional[str] = Field(None) populated_from: Optional[str] = Field(None, description="""Source permissible value""") + sources: Optional[List[str]] = Field(default_factory=list) hide: Optional[bool] = Field(None) copy_directives: Optional[Dict[str, CopyDirective]] = Field(default_factory=dict) overrides: Optional[Any] = Field(None, description="""overrides source schema slots""") @@ -314,6 +326,7 @@ class PermissibleValueDerivation(ElementDerivation): default_factory=dict, description="""A mapping table in which the keys and values are expressions""", ) + mirror_source: Optional[bool] = Field(None) description: Optional[str] = Field( None, description="""description of the specification component""" ) @@ -344,6 +357,7 @@ class PrefixDerivation(ElementDerivation): default_factory=dict, description="""A mapping table in which the keys and values are expressions""", ) + mirror_source: Optional[bool] = Field(None) description: Optional[str] = Field( None, description="""description of the specification component""" ) @@ -376,7 +390,7 @@ class StringificationConfiguration(ConfiguredBaseModel): class Inverse(ConfiguredBaseModel): """ - Used for back references + Used for back references in mapping to relational model """ slot_name: Optional[str] = Field(None) diff --git a/src/linkml_transformer/datamodel/transformer_model.yaml b/src/linkml_transformer/datamodel/transformer_model.yaml index a293465..5686f7f 100644 --- a/src/linkml_transformer/datamodel/transformer_model.yaml +++ b/src/linkml_transformer/datamodel/transformer_model.yaml @@ -149,6 +149,8 @@ classes: inlined: true description: >- A mapping table in which the keys and values are expressions + mirror_source: + range: boolean ClassDerivation: @@ -162,6 +164,9 @@ classes: populated_from: range: ClassReference description: Name of the class in the source schema + sources: + range: ClassReference + multivalued: true joins: range: AliasedClass multivalued: true @@ -193,6 +198,9 @@ classes: populated_from: range: SlotReference description: Source slot name + sources: + range: SlotReference + multivalued: true derived_from: range: SlotReference multivalued: true @@ -235,6 +243,9 @@ classes: populated_from: range: EnumReference description: Source enum name + sources: + range: EnumReference + multivalued: true expr: range: string description: >- @@ -263,6 +274,9 @@ classes: populated_from: range: string description: Source permissible value + sources: + range: string + multivalued: true hide: range: boolean diff --git a/src/linkml_transformer/inference/schema_mapper.py b/src/linkml_transformer/inference/schema_mapper.py index dd1f3d6..b0e98cd 100644 --- a/src/linkml_transformer/inference/schema_mapper.py +++ b/src/linkml_transformer/inference/schema_mapper.py @@ -135,8 +135,19 @@ def _derive_enum(self, enum_derivation: EnumDerivation) -> ClassDefinition: target_enum.attributes = {} target_enum.slot_usage = {} for pv_derivation in enum_derivation.permissible_value_derivations.values(): - pv = PermissibleValue(text=pv_derivation.populated_from) - target_enum.permissible_values[pv.text] = pv + if pv_derivation.populated_from: + pv = PermissibleValue(text=pv_derivation.populated_from) + target_enum.permissible_values[pv.text] = pv + elif pv_derivation.sources: + for source in pv_derivation.sources: + pv = PermissibleValue(text=source) + target_enum.permissible_values[pv.text] = pv + else: + raise ValueError(f"Missing populated_from or sources for {pv_derivation}") + if enum_derivation.mirror_source: + for pv in source_enum.permissible_values.values(): + if pv.text not in target_enum.permissible_values: + target_enum.permissible_values[pv.text] = copy(pv) self.source_to_target_class_mappings[populated_from].append(target_enum.name) return target_enum diff --git a/src/linkml_transformer/transformer/object_transformer.py b/src/linkml_transformer/transformer/object_transformer.py index 9f2c432..a7b9f35 100644 --- a/src/linkml_transformer/transformer/object_transformer.py +++ b/src/linkml_transformer/transformer/object_transformer.py @@ -142,6 +142,22 @@ def transform( elif slot_derivation.populated_from: v = source_obj.get(slot_derivation.populated_from, None) source_class_slot = sv.induced_slot(slot_derivation.populated_from, source_type) + logger.debug( + f"Pop slot {slot_derivation.name} => {v} using {slot_derivation.populated_from} // {source_obj}" + ) + elif slot_derivation.sources: + vmap = {s: source_obj.get(s, None) for s in slot_derivation.sources} + vmap = {k: v for k, v in vmap.items() if v is not None} + if len(vmap.keys()) > 1: + raise ValueError(f"Multiple sources for {slot_derivation.name}: {vmap}") + elif len(vmap.keys()) == 1: + v = list(vmap.values())[0] + source_class_slot_name = list(vmap.keys())[0] + source_class_slot = sv.induced_slot(source_class_slot_name, source_type) + else: + v = None + source_class_slot = None + logger.debug( f"Pop slot {slot_derivation.name} => {v} using {slot_derivation.populated_from} // {source_obj}" ) @@ -321,4 +337,9 @@ def transform_enum(self, source_value: str, enum_name: str, source_obj: Any) -> for pv_deriv in enum_deriv.permissible_value_derivations.values(): if source_value == pv_deriv.populated_from: return pv_deriv.name - return str(source_value) + if source_value in pv_deriv.sources: + return pv_deriv.name + if enum_deriv.mirror_source: + return str(source_value) + else: + return None diff --git a/tests/input/examples/personinfo_basic/transform/personinfo-to-agent.transform.yaml b/tests/input/examples/personinfo_basic/transform/personinfo-to-agent.transform.yaml index d10061e..d16593b 100644 --- a/tests/input/examples/personinfo_basic/transform/personinfo-to-agent.transform.yaml +++ b/tests/input/examples/personinfo_basic/transform/personinfo-to-agent.transform.yaml @@ -148,6 +148,7 @@ class_derivations: enum_derivations: MyFamilialRelationshipType: populated_from: FamilialRelationshipType + mirror_source: true permissible_value_derivations: SIBLING_OF: populated_from: SIBLING_OF diff --git a/tests/test_compliance/test_compliance_suite.py b/tests/test_compliance/test_compliance_suite.py index f2469ad..53705cd 100644 --- a/tests/test_compliance/test_compliance_suite.py +++ b/tests/test_compliance/test_compliance_suite.py @@ -159,6 +159,8 @@ def map_object( print("**Target Schema (Derived)**:\n\n") print_yaml(target_schema) if target_object is not None: + # remove `foo: None` entries + target_object = {k: v for k, v in target_object.items() if v is not None} ensure_validates(target_sv.schema, target_object) if invertible and target_object is not None: inverter = TransformationSpecificationInverter( @@ -842,12 +844,16 @@ def test_join(invocation_tracker, source_object, target_object, inlined): @pytest.mark.parametrize( - "source_value,mapping,target_value", + "source_value,mapping,target_value,mirror_source", [ - ("A", {"B": "A"}, "B"), + ("A", {"B": "A"}, "B", False), + ("Z", {"B": "A"}, None, False), + ("C", {"B": "A"}, "C", True), + ("A", {"B": ["A", "C"]}, "B", False), + ("C", {"B": ["A", "C"]}, "B", False), ], ) -def test_map_enum(invocation_tracker, source_value, mapping, target_value): +def test_map_enum(invocation_tracker, source_value, mapping, target_value, mirror_source): """ Test mapping between enum values. @@ -872,7 +878,7 @@ def test_map_enum(invocation_tracker, source_value, mapping, target_value): } } } - enums = {"E": {"permissible_values": ["A", "B"]}} + enums = {"E": {"permissible_values": ["A", "B", "C"]}} schema = build_schema("enums", classes=classes, enums=enums) source_sv = SchemaView(schema) cds = { @@ -884,20 +890,34 @@ def test_map_enum(invocation_tracker, source_value, mapping, target_value): } } } + pv_derivs = {} + invertible = True + for k, v in mapping.items(): + if isinstance(v, list): + pv_derivs[k] = {"sources": v} + invertible = False + else: + pv_derivs[k] = {"populated_from": v} + eds = { "E": { "populated_from": "E", - "permissible_value_derivations": {k: {"populated_from": v} for k, v in mapping.items()}, + "mirror_source": mirror_source, + "permissible_value_derivations": pv_derivs, }, } spec = build_transformer(class_derivations=cds, enum_derivations=eds) source_object = {"s1": source_value} + if target_value is None: + invertible = False + if mirror_source: + pytest.skip("TODO: mirror_source") map_object( spec=spec, source_object=source_object, expected_target_object={"s1": target_value}, source_sv=source_sv, - invertible=True, + invertible=invertible, )