From 27dbfcc23799903767924c29c0094f6ee85c45c6 Mon Sep 17 00:00:00 2001 From: Binh Vu Date: Wed, 8 Nov 2023 00:40:06 -0800 Subject: [PATCH] wip -- having trouble that writer do not support writing specific link. --- drepr/__main__.py | 2 +- .../classes_map_plan/class_map_plan.rs | 549 +++++++++--------- .../classes_map/generic_algo/class_map.rs | 227 ++++++-- .../turtle/class_writers/generic_writer.rs | 87 ++- 4 files changed, 530 insertions(+), 335 deletions(-) diff --git a/drepr/__main__.py b/drepr/__main__.py index b991e3a..5d21530 100644 --- a/drepr/__main__.py +++ b/drepr/__main__.py @@ -86,7 +86,7 @@ else: output = MemoryOutput(OutputFormat(args.format)) - print(ds_model.resources) + # print(ds_model.resources) start = time.time() result = execute(ds_model, resources, output, args.verbose > 1) end = time.time() diff --git a/src/execution_plans/classes_map_plan/class_map_plan.rs b/src/execution_plans/classes_map_plan/class_map_plan.rs index a17a476..ebf615d 100644 --- a/src/execution_plans/classes_map_plan/class_map_plan.rs +++ b/src/execution_plans/classes_map_plan/class_map_plan.rs @@ -7,7 +7,7 @@ use crate::execution_plans::classes_map_plan::data_prop::DataProp; use crate::execution_plans::classes_map_plan::literal_prop::LiteralProp; use crate::execution_plans::classes_map_plan::object_prop::{BlankObject, IDObject, ObjectProp}; use crate::execution_plans::classes_map_plan::subject::{ - BlankSubject, ExternalIDSubject, InternalIDSubject, Subject, + BlankSubject, ExternalIDSubject, InternalIDSubject, Subject, }; use crate::execution_plans::pseudo_id::ClassPseudoID; use crate::lang::{Description, GraphNode, DREPR_URI}; @@ -18,307 +18,320 @@ use crate::executors::classes_map::specific_algo::specific_class_map::analyze_sp #[derive(Serialize, Debug)] pub struct ClassMapPlan<'a> { - pub class_id: usize, - pub subject: Subject<'a>, - pub data_props: Vec>, - pub literal_props: Vec, - pub object_props: Vec>, - pub buffered_object_props: Vec>, - pub exec_strategy: ClassMapExecStrategy, + pub class_id: usize, + pub subject: Subject<'a>, + pub data_props: Vec>, + pub literal_props: Vec, + pub literal_inverse_props: Vec, + pub object_props: Vec>, + pub buffered_object_props: Vec>, + pub exec_strategy: ClassMapExecStrategy, } #[derive(Deserialize, Serialize, Debug)] pub enum ClassMapExecStrategy { - Generic, - Macro(String), + Generic, + Macro(String), } impl<'a> ClassMapPlan<'a> { - pub fn new( - desc: &'a Description, - _output_format: &OutputFormat, - class_id: usize, - class2subj: &[usize], - inference: &AlignmentInference, - edges_optional: &[bool], - removed_edges: &[bool], - ) -> ClassMapPlan<'a> { - let subj = class2subj[class_id]; - let uri_dnode = match desc.semantic_model.outgoing_edges[class_id] - .iter() - .find(|eid| desc.semantic_model.edges[**eid].rel_label == DREPR_URI) - { - None => None, - Some(eid) => Some( - desc.semantic_model.nodes[desc.semantic_model.edges[*eid].target].as_data_node(), - ), - }; + pub fn new( + desc: &'a Description, + _output_format: &OutputFormat, + class_id: usize, + class2subj: &[usize], + inference: &AlignmentInference, + edges_optional: &[bool], + removed_edges: &[bool], + ) -> ClassMapPlan<'a> { + let subj = class2subj[class_id]; + let uri_dnode = match desc.semantic_model.outgoing_edges[class_id] + .iter() + .find(|eid| desc.semantic_model.edges[**eid].rel_label == DREPR_URI) + { + None => None, + Some(eid) => { + Some(desc.semantic_model.nodes[desc.semantic_model.edges[*eid].target].as_data_node()) + } + }; - // generate other properties - let mut literal_props = vec![]; - let mut data_props = vec![]; - let mut object_props = vec![]; - let mut buffered_object_props = vec![]; - - for &eid in &desc.semantic_model.outgoing_edges[class_id] { - match desc.semantic_model.get_target(eid) { - GraphNode::DataNode(n) => { - let attribute = &desc.attributes[n.attr_id]; - let edge = desc.semantic_model.get_edge(class_id, n.node_id).unwrap(); + // generate other properties + let mut literal_props = vec![]; + let mut literal_inverse_props = vec![]; + let mut data_props = vec![]; + let mut object_props = vec![]; + let mut buffered_object_props = vec![]; - if edge.rel_label != DREPR_URI { - data_props.push(DataProp { - alignments: inference.get_alignments(subj, n.attr_id), - predicate_id: edge.edge_id, - attribute, - is_optional: edges_optional[edge.edge_id], - missing_values: attribute - .missing_values - .iter() - .map(|v| v.clone()) - .collect::>(), - }); - } - } - GraphNode::LiteralNode(n) => { - literal_props.push(LiteralProp { - predicate_id: eid, - value: n.val.clone(), - }); - } - GraphNode::ClassNode(n) => { - let attribute = &desc.attributes[class2subj[n.node_id]]; - let predicate_id = eid; - // a class node is optional if all of its properties are optional - let is_target_optional = desc.semantic_model.outgoing_edges[n.node_id] - .iter() - .all(|&eid| edges_optional[eid]); - let alignments = inference.get_alignments(subj, attribute.id); + for &eid in &desc.semantic_model.incoming_edges[class_id] { + if let GraphNode::LiteralNode(n) = desc.semantic_model.get_source(eid) { + literal_inverse_props.push(LiteralProp { + predicate_id: eid, + value: n.val.clone(), + }); + } + } - let prop = if n.is_blank_node(&desc.semantic_model) { - ObjectProp::BlankObject(BlankObject { - attribute, - alignments_cardinality: inference.estimate_cardinality(&alignments), - alignments, - pseudo_id: ClassPseudoID::new( - format!("_:{}", n.get_pseudo_prefix()), - attribute.path.get_nary_steps(), - ), - predicate_id, - class_id, - is_optional: edges_optional[predicate_id], - is_target_optional, - }) - } else { - ObjectProp::IDObject(IDObject { - attribute, - alignments_cardinality: inference.estimate_cardinality(&alignments), - alignments, - pseudo_id: ClassPseudoID::new( - format!("_:{}", n.get_pseudo_prefix()), - attribute.path.get_nary_steps(), - ), - predicate_id, - class_id, - is_optional: edges_optional[predicate_id], - is_target_optional, - missing_values: attribute - .missing_values - .iter() - .map(|v| v.clone()) - .collect::>(), - }) - }; + for &eid in &desc.semantic_model.outgoing_edges[class_id] { + match desc.semantic_model.get_target(eid) { + GraphNode::DataNode(n) => { + let attribute = &desc.attributes[n.attr_id]; + let edge = desc.semantic_model.get_edge(class_id, n.node_id).unwrap(); - if removed_edges[predicate_id] { - buffered_object_props.push(prop); - } else { - object_props.push(prop); - } - } - } + if edge.rel_label != DREPR_URI { + data_props.push(DataProp { + alignments: inference.get_alignments(subj, n.attr_id), + predicate_id: edge.edge_id, + attribute, + is_optional: edges_optional[edge.edge_id], + missing_values: attribute + .missing_values + .iter() + .map(|v| v.clone()) + .collect::>(), + }); + } } + GraphNode::LiteralNode(n) => { + literal_props.push(LiteralProp { + predicate_id: eid, + value: n.val.clone(), + }); + } + GraphNode::ClassNode(n) => { + let attribute = &desc.attributes[class2subj[n.node_id]]; + let predicate_id = eid; + // a class node is optional if all of its properties are optional + let is_target_optional = desc.semantic_model.outgoing_edges[n.node_id] + .iter() + .all(|&eid| edges_optional[eid]); + let alignments = inference.get_alignments(subj, attribute.id); - let subj_attr = &desc.attributes[subj]; - let subj_pseudo_id = ClassPseudoID::new( - format!( - "_:{}", - desc.semantic_model.nodes[class_id] - .as_class_node() - .get_pseudo_prefix() - ), - desc.attributes[subj].path.get_nary_steps(), - ); - let subject = match uri_dnode { - None => Subject::BlankSubject(BlankSubject { - attr: subj_attr, - pseudo_id: subj_pseudo_id, - }), - Some(uri_dnode) => { - // get missing values from the real subjects - let missing_values = desc.attributes[subj] - .missing_values - .iter() - .map(|v| v.clone()) - .collect::>(); + let prop = if n.is_blank_node(&desc.semantic_model) { + ObjectProp::BlankObject(BlankObject { + attribute, + alignments_cardinality: inference.estimate_cardinality(&alignments), + alignments, + pseudo_id: ClassPseudoID::new( + format!("_:{}", n.get_pseudo_prefix()), + attribute.path.get_nary_steps(), + ), + predicate_id, + class_id, + is_optional: edges_optional[predicate_id], + is_target_optional, + }) + } else { + ObjectProp::IDObject(IDObject { + attribute, + alignments_cardinality: inference.estimate_cardinality(&alignments), + alignments, + pseudo_id: ClassPseudoID::new( + format!("_:{}", n.get_pseudo_prefix()), + attribute.path.get_nary_steps(), + ), + predicate_id, + class_id, + is_optional: edges_optional[predicate_id], + is_target_optional, + missing_values: attribute + .missing_values + .iter() + .map(|v| v.clone()) + .collect::>(), + }) + }; - if uri_dnode.attr_id == subj { - Subject::InternalIDSubject(InternalIDSubject { - attr: subj_attr, - pseudo_id: subj_pseudo_id, - is_optional: edges_optional[desc - .semantic_model - .get_edge(class_id, uri_dnode.node_id) - .unwrap() - .edge_id], - missing_values, - }) - } else { - Subject::ExternalIDSubject(ExternalIDSubject { - attr: subj_attr, - pseudo_id: subj_pseudo_id, - real_id: ( - &desc.attributes[uri_dnode.attr_id], - inference.get_alignments(subj, uri_dnode.attr_id), - ), - is_optional: edges_optional[desc - .semantic_model - .get_edge(class_id, uri_dnode.attr_id) - .unwrap() - .edge_id], - missing_values, - }) - } - } - }; + if removed_edges[predicate_id] { + buffered_object_props.push(prop); + } else { + object_props.push(prop); + } + } + } + } - #[allow(unused_mut)] - let mut plan = ClassMapPlan { - class_id, - subject, - data_props, - literal_props, - object_props, - buffered_object_props, - exec_strategy: ClassMapExecStrategy::Generic, - }; + let subj_attr = &desc.attributes[subj]; + let subj_pseudo_id = ClassPseudoID::new( + format!( + "_:{}", + desc.semantic_model.nodes[class_id] + .as_class_node() + .get_pseudo_prefix() + ), + desc.attributes[subj].path.get_nary_steps(), + ); + let subject = match uri_dnode { + None => Subject::BlankSubject(BlankSubject { + attr: subj_attr, + pseudo_id: subj_pseudo_id, + }), + Some(uri_dnode) => { + // get missing values from the real subjects + let missing_values = desc.attributes[subj] + .missing_values + .iter() + .map(|v| v.clone()) + .collect::>(); - #[cfg(feature = "enable-exec-macro-cls-map")] - { - if let Some(explanation) = analyze_specific_algo_strategy(&plan) { - plan.exec_strategy = ClassMapExecStrategy::Macro(explanation) - } + if uri_dnode.attr_id == subj { + Subject::InternalIDSubject(InternalIDSubject { + attr: subj_attr, + pseudo_id: subj_pseudo_id, + is_optional: edges_optional[desc + .semantic_model + .get_edge(class_id, uri_dnode.node_id) + .unwrap() + .edge_id], + missing_values, + }) + } else { + Subject::ExternalIDSubject(ExternalIDSubject { + attr: subj_attr, + pseudo_id: subj_pseudo_id, + real_id: ( + &desc.attributes[uri_dnode.attr_id], + inference.get_alignments(subj, uri_dnode.attr_id), + ), + is_optional: edges_optional[desc + .semantic_model + .get_edge(class_id, uri_dnode.attr_id) + .unwrap() + .edge_id], + missing_values, + }) } + } + }; - plan - } + #[allow(unused_mut)] + let mut plan = ClassMapPlan { + class_id, + subject, + data_props, + literal_props, + literal_inverse_props, + object_props, + buffered_object_props, + exec_strategy: ClassMapExecStrategy::Generic, + }; - /// Find the subject of the class among the attributes of the class. - /// - /// The subject has *-to-one relationship with other attributes. - pub fn find_subject( - desc: &Description, - class_id: usize, - class2subj: &[usize], - inference: &AlignmentInference, - ) -> usize { - // get data nodes, attributes, and the attribute that contains URIs of the class - let mut data_nodes = vec![]; - let mut attrs = vec![]; - let mut uri_attr = None; + #[cfg(feature = "enable-exec-macro-cls-map")] + { + if let Some(explanation) = analyze_specific_algo_strategy(&plan) { + plan.exec_strategy = ClassMapExecStrategy::Macro(explanation) + } + } - for &eid in &desc.semantic_model.outgoing_edges[class_id] { - let target = desc.semantic_model.edges[eid].get_target(&desc.semantic_model); + plan + } - if target.is_data_node() { - let n = target.as_data_node(); - data_nodes.push(n); - attrs.push(n.attr_id); + /// Find the subject of the class among the attributes of the class. + /// + /// The subject has *-to-one relationship with other attributes. + pub fn find_subject( + desc: &Description, + class_id: usize, + class2subj: &[usize], + inference: &AlignmentInference, + ) -> usize { + // get data nodes, attributes, and the attribute that contains URIs of the class + let mut data_nodes = vec![]; + let mut attrs = vec![]; + let mut uri_attr = None; - if desc.semantic_model.edges[eid].rel_label == DREPR_URI { - uri_attr = Some(n.attr_id); - } - } - } + for &eid in &desc.semantic_model.outgoing_edges[class_id] { + let target = desc.semantic_model.edges[eid].get_target(&desc.semantic_model); - // if the subject attribute is provided, then, we will use it - let mut subjs = data_nodes - .iter() - .filter(|&n| { - desc.semantic_model - .get_edge(class_id, n.node_id) - .unwrap() - .is_subject - }) - .map(|n| n.attr_id) - .collect::>(); + if target.is_data_node() { + let n = target.as_data_node(); + data_nodes.push(n); + attrs.push(n.attr_id); - if subjs.len() == 0 { - if attrs.len() == 0 { - // there is a special case where the class has no data node, but only links to other classes - // we need to get the subject from the other classes - // inference.infer_subject() - let mut other_attrs = Vec::new(); - for &eid in &desc.semantic_model.outgoing_edges[class_id] { - let target = desc.semantic_model.edges[eid].get_target(&desc.semantic_model); - if target.is_class_node() { - let target_subj = class2subj[target.get_node_id()]; - // we must have inferred the subject of the target class before (because of the topological sorting) - assert!(target_subj < desc.attributes.len()); - other_attrs.push(target_subj); - } - } - subjs = inference.infer_subject(&other_attrs); - } else { - // invoke the inference to find the subject attribute - subjs = inference.infer_subject(&attrs); - } + if desc.semantic_model.edges[eid].rel_label == DREPR_URI { + uri_attr = Some(n.attr_id); } + } + } + + // if the subject attribute is provided, then, we will use it + let mut subjs = data_nodes + .iter() + .filter(|&n| { + desc + .semantic_model + .get_edge(class_id, n.node_id) + .unwrap() + .is_subject + }) + .map(|n| n.attr_id) + .collect::>(); - if subjs.len() == 0 { - panic!( - "There is no subject attribute of class: {}. Users need to specify it explicitly", - desc.semantic_model.nodes[class_id] - .as_class_node() - .rel_label - ); + if subjs.len() == 0 { + if attrs.len() == 0 { + // there is a special case where the class has no data node, but only links to other classes + // we need to get the subject from the other classes + // inference.infer_subject() + let mut other_attrs = Vec::new(); + for &eid in &desc.semantic_model.outgoing_edges[class_id] { + let target = desc.semantic_model.edges[eid].get_target(&desc.semantic_model); + if target.is_class_node() { + let target_subj = class2subj[target.get_node_id()]; + // we must have inferred the subject of the target class before (because of the topological sorting) + assert!(target_subj < desc.attributes.len()); + other_attrs.push(target_subj); + } } + subjs = inference.infer_subject(&other_attrs); + } else { + // invoke the inference to find the subject attribute + subjs = inference.infer_subject(&attrs); + } + } - ClassMapPlan::select_subject(desc, class_id, &subjs, &attrs, &uri_attr) + if subjs.len() == 0 { + panic!( + "There is no subject attribute of class: {}. Users need to specify it explicitly", + desc.semantic_model.nodes[class_id] + .as_class_node() + .rel_label + ); } - /// Select the best subject from a list of possible subjects. In the current approach, we pick - /// the attribute that is associated with `drepr:uri` predicate. - pub fn select_subject( - _desc: &Description, - _class_id: usize, - subjs: &[usize], - _attrs: &[usize], - uri_attr: &Option, - ) -> usize { - if let Some(aid) = uri_attr { - for &subj in subjs { - if subj == *aid { - return subj; - } - } - } + ClassMapPlan::select_subject(desc, class_id, &subjs, &attrs, &uri_attr) + } - subjs[0] + /// Select the best subject from a list of possible subjects. In the current approach, we pick + /// the attribute that is associated with `drepr:uri` predicate. + pub fn select_subject( + _desc: &Description, + _class_id: usize, + subjs: &[usize], + _attrs: &[usize], + uri_attr: &Option, + ) -> usize { + if let Some(aid) = uri_attr { + for &subj in subjs { + if subj == *aid { + return subj; + } + } } - pub fn is_optional(&self) -> bool { - self.subject.is_optional() - && self.data_props.iter().all(|p| p.is_optional) - && self - .object_props - .iter() - .chain(self.buffered_object_props.iter()) - .all(|p| p.is_optional()) - } + subjs[0] + } + + pub fn is_optional(&self) -> bool { + self.subject.is_optional() + && self.data_props.iter().all(|p| p.is_optional) + && self + .object_props + .iter() + .chain(self.buffered_object_props.iter()) + .all(|p| p.is_optional()) + } } impl ClassMapExecStrategy { - is_enum_type_impl!(ClassMapExecStrategy::is_generic(Generic)); - is_enum_type_impl!(ClassMapExecStrategy::is_macro(Macro(_))); + is_enum_type_impl!(ClassMapExecStrategy::is_generic(Generic)); + is_enum_type_impl!(ClassMapExecStrategy::is_macro(Macro(_))); } diff --git a/src/executors/classes_map/generic_algo/class_map.rs b/src/executors/classes_map/generic_algo/class_map.rs index b4c0857..c8151dc 100644 --- a/src/executors/classes_map/generic_algo/class_map.rs +++ b/src/executors/classes_map/generic_algo/class_map.rs @@ -1,54 +1,95 @@ -use crate::writers::stream_writer::StreamClassWriter; -use readers::prelude::{RAReader}; use crate::alignments::func_builder::build_align_func; -use crate::executors::classes_map::buffer_writer::BufferWriter; use crate::execution_plans::classes_map_plan::subject::Subject; -use crate::executors::classes_map::generic_algo::{generic_optional_dprop_map, generic_optional_oprop_map, generic_optional_buffered_oprop_map, generic_mandatory_dprop_map, generic_mandatory_oprop_map}; -use crate::lang::Description; use crate::execution_plans::ClassMapPlan; +use crate::executors::classes_map::buffer_writer::BufferWriter; +use crate::executors::classes_map::generic_algo::{ + generic_mandatory_dprop_map, generic_mandatory_oprop_map, generic_optional_buffered_oprop_map, + generic_optional_dprop_map, generic_optional_oprop_map, +}; +use crate::lang::Description; +use crate::writers::stream_writer::StreamClassWriter; +use readers::prelude::RAReader; /// Execute mapping for just one class. Handle all cases. -pub fn generic_class_map(readers: &[Box], cls_writer: &mut dyn StreamClassWriter, desc: &Description, class_plan: &ClassMapPlan) { - let mut dprop_aligns = class_plan.data_props.iter() +pub fn generic_class_map( + readers: &[Box], + cls_writer: &mut dyn StreamClassWriter, + desc: &Description, + class_plan: &ClassMapPlan, +) { + let mut dprop_aligns = class_plan + .data_props + .iter() .map(|a| build_align_func(&readers, desc, &a.alignments)) .collect::>(); - let mut dprop_indices = class_plan.data_props.iter() - .map(|p| p.attribute.path.get_initial_step(readers[p.attribute.resource_id].as_ref())) + let mut dprop_indices = class_plan + .data_props + .iter() + .map(|p| { + p.attribute + .path + .get_initial_step(readers[p.attribute.resource_id].as_ref()) + }) .collect::>(); - let mut oprop_aligns = class_plan.object_props.iter() + let mut oprop_aligns = class_plan + .object_props + .iter() .map(|a| build_align_func(&readers, desc, a.get_alignments())) .collect::>(); - let mut oprop_indices = class_plan.object_props.iter() - .map(|a| a.get_attr().path.get_initial_step(readers[a.get_attr().resource_id].as_ref())) + let mut oprop_indices = class_plan + .object_props + .iter() + .map(|a| { + a.get_attr() + .path + .get_initial_step(readers[a.get_attr().resource_id].as_ref()) + }) .collect::>(); - - let mut buffered_oprop_aligns = class_plan.buffered_object_props.iter() + + let mut buffered_oprop_aligns = class_plan + .buffered_object_props + .iter() .map(|a| build_align_func(&readers, desc, a.get_alignments())) .collect::>(); - let mut buffered_oprop_indices = class_plan.buffered_object_props.iter() - .map(|a| a.get_attr().path.get_initial_step(readers[a.get_attr().resource_id].as_ref())) + let mut buffered_oprop_indices = class_plan + .buffered_object_props + .iter() + .map(|a| { + a.get_attr() + .path + .get_initial_step(readers[a.get_attr().resource_id].as_ref()) + }) .collect::>(); - + let mut external_subj = if let Subject::ExternalIDSubject(subj) = &class_plan.subject { Some(( - subj.real_id.0.path.get_initial_step(readers[subj.real_id.0.resource_id].as_ref()), - build_align_func(&readers, desc, &subj.real_id.1).into_single() + subj + .real_id + .0 + .path + .get_initial_step(readers[subj.real_id.0.resource_id].as_ref()), + build_align_func(&readers, desc, &subj.real_id.1).into_single(), )) } else { None }; let subj_attr = class_plan.subject.get_attr(); let mut subj_iter = readers[subj_attr.resource_id].iter_index(&subj_attr.path); - + // not handle the third case - assert!(class_plan.buffered_object_props.iter().all(|op| op.is_optional())); - - if class_plan.data_props.iter().any(|dp| !dp.is_optional) || - class_plan.object_props.iter().any(|op| !op.is_optional()) { + assert!(class_plan + .buffered_object_props + .iter() + .all(|op| op.is_optional())); + + if class_plan.data_props.iter().any(|dp| !dp.is_optional) + || class_plan.object_props.iter().any(|op| !op.is_optional()) + { // missing values will lead to drop of the record // if a subject has missing values then we clear it - let mut buf_writer = BufferWriter::with_capacity(class_plan.data_props.len(), class_plan.object_props.len()); + let mut buf_writer = + BufferWriter::with_capacity(class_plan.data_props.len(), class_plan.object_props.len()); loop { buf_writer.clear(); @@ -81,7 +122,7 @@ pub fn generic_class_map(readers: &[Box], cls_writer: &mut dyn Str let idx = &mut esubj.0; esubj.1.align(subj_iter.value(), subj_val, idx); let real_id = readers[subj.real_id.0.resource_id].get_value(idx, 0); - + if subj.missing_values.contains(real_id) { if subj.is_optional { is_subj_blank = true; @@ -104,7 +145,16 @@ pub fn generic_class_map(readers: &[Box], cls_writer: &mut dyn Str if is_new_subject { for (di, dplan) in class_plan.data_props.iter().enumerate() { - should_keep_record = generic_mandatory_dprop_map(&readers, &mut buf_writer, dplan, &mut dprop_aligns[di], &subj_id, subj_val, subj_iter.value(), &mut dprop_indices[di]); + should_keep_record = generic_mandatory_dprop_map( + &readers, + &mut buf_writer, + dplan, + &mut dprop_aligns[di], + &subj_id, + subj_val, + subj_iter.value(), + &mut dprop_indices[di], + ); if !should_keep_record { break; } @@ -121,8 +171,16 @@ pub fn generic_class_map(readers: &[Box], cls_writer: &mut dyn Str for (oi, oplan) in class_plan.object_props.iter().enumerate() { should_keep_record = generic_mandatory_oprop_map( - &readers, &mut buf_writer, cls_writer, - oplan, &mut oprop_aligns[oi], &subj_id, subj_val, subj_iter.value(), &mut oprop_indices[oi]); + &readers, + &mut buf_writer, + cls_writer, + oplan, + &mut oprop_aligns[oi], + &subj_id, + subj_val, + subj_iter.value(), + &mut oprop_indices[oi], + ); if !should_keep_record { break; @@ -137,17 +195,38 @@ pub fn generic_class_map(readers: &[Box], cls_writer: &mut dyn Str } for (target_cls_id, pred_id, object, is_object_blank) in buf_writer.object_props.iter() { - cls_writer.write_object_property(*target_cls_id, &subj_id, *pred_id, object, is_subj_blank, *is_object_blank, is_new_subject); + cls_writer.write_object_property( + *target_cls_id, + &subj_id, + *pred_id, + object, + is_subj_blank, + *is_object_blank, + is_new_subject, + ); } for &(target_cls_id, pred_id, object, is_object_blank) in &buf_writer.borrow_object_props { - cls_writer.write_object_property(target_cls_id, &subj_id, pred_id, object, is_subj_blank, is_object_blank, is_new_subject); + cls_writer.write_object_property( + target_cls_id, + &subj_id, + pred_id, + object, + is_subj_blank, + is_object_blank, + is_new_subject, + ); } for lplan in class_plan.literal_props.iter() { cls_writer.write_data_property(&subj_id, lplan.predicate_id, &lplan.value); } cls_writer.end_record(); + + /// Write literal inverse properties of a class + /// TODO: this hack only works for TurtleWriter as we leverage the specific implementation + /// of begin_record of existing subj_id and write_object_property + write_literal_inverse_properties(cls_writer, class_plan, &subj_id, is_subj_blank); } if !subj_iter.advance() { @@ -158,7 +237,7 @@ pub fn generic_class_map(readers: &[Box], cls_writer: &mut dyn Str loop { let subj_val = readers[subj_attr.resource_id].get_value(subj_iter.value(), 0); let mut is_subj_blank: bool = false; - + let subj_id: String = match &class_plan.subject { Subject::BlankSubject(subj) => { is_subj_blank = true; @@ -185,35 +264,61 @@ pub fn generic_class_map(readers: &[Box], cls_writer: &mut dyn Str } } }; - + let is_new_subject = if class_plan.buffered_object_props.len() > 0 { cls_writer.begin_partial_buffering_record(&subj_id, is_subj_blank) } else { cls_writer.begin_record(&subj_id, is_subj_blank) }; - + if is_new_subject { for (di, dplan) in class_plan.data_props.iter().enumerate() { - generic_optional_dprop_map(&readers, cls_writer, dplan, &mut dprop_aligns[di], &subj_id, subj_val, subj_iter.value(), &mut dprop_indices[di]); + generic_optional_dprop_map( + &readers, + cls_writer, + dplan, + &mut dprop_aligns[di], + &subj_id, + subj_val, + subj_iter.value(), + &mut dprop_indices[di], + ); } - + for lplan in class_plan.literal_props.iter() { cls_writer.write_data_property(&subj_id, lplan.predicate_id, &lplan.value); } } - + for (oi, oplan) in class_plan.object_props.iter().enumerate() { generic_optional_oprop_map( - readers, cls_writer, oplan, &mut oprop_aligns[oi], &subj_id, - subj_val, subj_iter.value(), &mut oprop_indices[oi], is_subj_blank, is_new_subject); + readers, + cls_writer, + oplan, + &mut oprop_aligns[oi], + &subj_id, + subj_val, + subj_iter.value(), + &mut oprop_indices[oi], + is_subj_blank, + is_new_subject, + ); } - + for (oi, oplan) in class_plan.buffered_object_props.iter().enumerate() { generic_optional_buffered_oprop_map( - readers, cls_writer, oplan, &mut buffered_oprop_aligns[oi], &subj_id, - subj_val, subj_iter.value(), &mut buffered_oprop_indices[oi], is_new_subject); + readers, + cls_writer, + oplan, + &mut buffered_oprop_aligns[oi], + &subj_id, + subj_val, + subj_iter.value(), + &mut buffered_oprop_indices[oi], + is_new_subject, + ); } - + if is_new_subject { if class_plan.buffered_object_props.len() > 0 { cls_writer.end_partial_buffering_record(); @@ -221,10 +326,44 @@ pub fn generic_class_map(readers: &[Box], cls_writer: &mut dyn Str cls_writer.end_record(); } } - + + /// Write literal inverse properties of a class + /// TODO: this hack only works for TurtleWriter as we leverage the specific implementation + /// of begin_record of existing subj_id and write_object_property + write_literal_inverse_properties(cls_writer, class_plan, &subj_id, is_subj_blank); + if !subj_iter.advance() { break; } } } } + +/// Write literal inverse properties of a class +/// TODO: this hack only works for TurtleWriter as we leverage the specific implementation +/// of begin_record of existing subj_id and write_object_property +pub fn write_literal_inverse_properties( + cls_writer: &mut dyn StreamClassWriter, + class_plan: &ClassMapPlan, + subj_id: &String, + is_subj_blank: bool, +) { + if class_plan.literal_inverse_props.len() == 0 { + return; + } + // println!("{:?}", class_plan); + let is_new_subj = cls_writer.begin_record(&subj_id, is_subj_blank); + // assert!(!is_new_subj); + for lplan in class_plan.literal_inverse_props.iter() { + cls_writer.write_object_property( + 100000, + lplan.value.as_str(), + lplan.predicate_id, + &subj_id, + false, + is_subj_blank, + false, + ); + } + cls_writer.end_record(); +} diff --git a/src/writers/stream_writer/turtle/class_writers/generic_writer.rs b/src/writers/stream_writer/turtle/class_writers/generic_writer.rs index 6a9b822..803fde1 100644 --- a/src/writers/stream_writer/turtle/class_writers/generic_writer.rs +++ b/src/writers/stream_writer/turtle/class_writers/generic_writer.rs @@ -1,13 +1,12 @@ -use std::io::{BufWriter, Write}; use std::fmt::Debug; +use std::io::{BufWriter, Write}; -use hashbrown::{HashSet}; +use hashbrown::HashSet; -use readers::prelude::Value; -use crate::writers::stream_writer::StreamClassWriter; use crate::writers::stream_writer::turtle::temp_object_props::TempObjectProps; use crate::writers::stream_writer::turtle::value_fmt::ValueFmt; - +use crate::writers::stream_writer::StreamClassWriter; +use readers::prelude::Value; #[allow(dead_code)] pub struct GenericWriter<'a, W: Write + Debug> { @@ -26,13 +25,13 @@ impl<'a, W: Write + Debug> StreamClassWriter for GenericWriter<'a, W> { fn has_written_record(&self, class_id: usize, subject: &str) -> bool { self.always_write_records[class_id] || self.written_records[class_id].contains(subject) } - + fn begin_record(&mut self, subject: &str, is_blank: bool) -> bool { // check if has been inserted before if self.written_records[self.class_id].contains(subject) { return false; } - + self.written_records[self.class_id].insert(subject.to_string()); if is_blank { write!(self.channel, "{} a {};\n", subject, self.ont_class).unwrap(); @@ -52,13 +51,13 @@ impl<'a, W: Write + Debug> StreamClassWriter for GenericWriter<'a, W> { if self.written_records[self.class_id].contains(subject) { return false; } - + self.buffer_oprops[self.class_id].push(TempObjectProps { id: subject.to_string(), is_blank, props: vec![], }); - + self.written_records[self.class_id].insert(subject.to_string()); if is_blank { write!(self.channel, "{} a {};\n", subject, self.ont_class).unwrap(); @@ -79,9 +78,10 @@ impl<'a, W: Write + Debug> StreamClassWriter for GenericWriter<'a, W> { // encounter a null value, TTL doesn't have a way to represent a null value, so we should panic // because null may mean different things panic!("Cannot write null value because RDF doesn't have a way to represent it") - }, + } Value::Str(v) => { - self.value_templates[predicate_id].write_string_value(&mut self.channel, &v.replace("\"", "\\\"")); + self.value_templates[predicate_id] + .write_string_value(&mut self.channel, &v.replace("\"", "\\\"")); } Value::Bool(v) => { self.value_templates[predicate_id].write_value(&mut self.channel, &v.to_string()); @@ -92,17 +92,42 @@ impl<'a, W: Write + Debug> StreamClassWriter for GenericWriter<'a, W> { Value::F64(v) => { self.value_templates[predicate_id].write_value(&mut self.channel, &v.to_string()); } - Value::Array(_) => unimplemented!("TTL writers does not support writing array yet. The input value is: {:?}", value), - Value::Object(_) => unimplemented!("TTL writers does not support writing array yet. The input value is: {:?}", value), + Value::Array(_) => unimplemented!( + "TTL writers does not support writing array yet. The input value is: {:?}", + value + ), + Value::Object(_) => unimplemented!( + "TTL writers does not support writing array yet. The input value is: {:?}", + value + ), } } - fn write_object_property(&mut self, _target_cls: usize, subject: &str, predicate_id: usize, object: &str, is_subject_blank: bool, is_object_blank: bool, is_new_subj: bool) { + fn write_object_property( + &mut self, + _target_cls: usize, + subject: &str, + predicate_id: usize, + object: &str, + is_subject_blank: bool, + is_object_blank: bool, + is_new_subj: bool, + ) { if is_new_subj { if is_object_blank { - write!(self.channel, "\t{} {};\n", self.predicates[predicate_id], object).unwrap(); + write!( + self.channel, + "\t{} {};\n", + self.predicates[predicate_id], object + ) + .unwrap(); } else { - write!(self.channel, "\t{} <{}>;\n", self.predicates[predicate_id], object).unwrap(); + write!( + self.channel, + "\t{} <{}>;\n", + self.predicates[predicate_id], object + ) + .unwrap(); } } else { if is_subject_blank { @@ -110,18 +135,36 @@ impl<'a, W: Write + Debug> StreamClassWriter for GenericWriter<'a, W> { } else { write!(self.channel, "<{}>", subject).unwrap(); } - + if is_object_blank { - write!(self.channel, " {} {}.\n", self.predicates[predicate_id], object).unwrap(); + write!( + self.channel, + " {} {}.\n", + self.predicates[predicate_id], object + ) + .unwrap(); } else { - write!(self.channel, " {} <{}>.\n", self.predicates[predicate_id], object).unwrap(); + write!( + self.channel, + " {} <{}>.\n", + self.predicates[predicate_id], object + ) + .unwrap(); } } } - fn buffer_object_property(&mut self, _target_cls: usize, predicate_id: usize, object: String, is_object_blank: bool) { - self.buffer_oprops[self.class_id].last_mut().unwrap() + fn buffer_object_property( + &mut self, + _target_cls: usize, + predicate_id: usize, + object: String, + is_object_blank: bool, + ) { + self.buffer_oprops[self.class_id] + .last_mut() + .unwrap() .props .push((predicate_id, object, is_object_blank)); } -} \ No newline at end of file +}